/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.37 - (hide annotations) (download) (as text)
Tue Sep 8 14:34:31 2009 UTC (2 months, 2 weeks ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-7_5-5_fc12
Changes since 1.36: +6 -8 lines
File MIME type: text/x-patch
fix sort -h for multibyte locales (reported via http://bugs.archlinux.org/task/16022)
1 ovasik 1.31 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2     --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3     +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4     @@ -26,7 +26,7 @@
5     my $prog = 'cut';
6     my $try = "Try \`$prog --help' for more information.\n";
7     my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8     -my $inval = "$prog: invalid byte or field list\n$try";
9     +my $inval = "$prog: invalid byte, character or field list\n$try";
10     my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11    
12     my @Tests =
13     @@ -140,8 +140,8 @@
14     ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15    
16     # None of the following invalid ranges provoked an error up to coreutils-6.9.
17     - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18     - {ERR=>"$prog: invalid decreasing range\n$try"}],
19     + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20     + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21     ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22     ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23     ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 ovasik 1.26 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 twaugh 1.20 @@ -0,0 +1,58 @@
27     +#! /bin/sh
28     +case $# in
29 ovasik 1.26 + 0) xx='../src/sort';;
30 twaugh 1.20 + *) xx="$1";;
31     +esac
32     +test "$VERBOSE" && echo=echo || echo=:
33     +$echo testing program: $xx
34     +errors=0
35     +test "$srcdir" || srcdir=.
36     +test "$VERBOSE" && $xx --version 2> /dev/null
37     +
38     +export LC_ALL=en_US.UTF-8
39     +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40     +errors=0
41     +
42 ovasik 1.26 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 twaugh 1.20 +code=$?
44     +if test $code != 0; then
45     + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46     + errors=`expr $errors + 1`
47     +else
48 ovasik 1.26 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 twaugh 1.20 + case $? in
50     + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 ovasik 1.26 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52     + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 twaugh 1.20 + errors=`expr $errors + 1`;;
54     + 2) $echo "Test mb1 may have failed." 1>&2
55 ovasik 1.26 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 twaugh 1.20 + errors=`expr $errors + 1`;;
57     + esac
58     +fi
59     +
60 ovasik 1.26 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 twaugh 1.20 +code=$?
62     +if test $code != 0; then
63     + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64     + errors=`expr $errors + 1`
65     +else
66 ovasik 1.26 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 twaugh 1.20 + case $? in
68     + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 ovasik 1.26 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70     + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 twaugh 1.20 + errors=`expr $errors + 1`;;
72     + 2) $echo "Test mb2 may have failed." 1>&2
73 ovasik 1.26 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 twaugh 1.20 + errors=`expr $errors + 1`;;
75     + esac
76     +fi
77     +
78     +if test $errors = 0; then
79     + $echo Passed all 113 tests. 1>&2
80     +else
81     + $echo Failed $errors tests. 1>&2
82     +fi
83     +test $errors = 0 || errors=1
84     +exit $errors
85 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 twaugh 1.20 @@ -0,0 +1,4 @@
88     +Apple@AA10@@20
89     +Banana@AA5@@30
90     +Citrus@AA20@@5
91     +Cherry@AA30@@10
92 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 twaugh 1.20 @@ -0,0 +1,4 @@
95     +Citrus@AA20@@5
96     +Cherry@AA30@@10
97     +Apple@AA10@@20
98     +Banana@AA5@@30
99 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 twaugh 1.20 @@ -0,0 +1,4 @@
102     +Apple@10
103     +Banana@5
104     +Citrus@20
105     +Cherry@30
106 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 twaugh 1.20 @@ -0,0 +1,4 @@
109     +Banana@5
110     +Apple@10
111     +Citrus@20
112     +Cherry@30
113 ovasik 1.26 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114     --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115     +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 ovasik 1.28 @@ -192,6 +192,7 @@
117 ovasik 1.26 misc/sort-compress \
118 ovasik 1.33 misc/sort-continue \
119 ovasik 1.28 misc/sort-files0-from \
120 ovasik 1.26 + misc/sort-mb-tests \
121     misc/sort-merge \
122 ovasik 1.33 misc/sort-merge-fdlimit \
123 ovasik 1.26 misc/sort-rand \
124     @@ -391,6 +392,10 @@
125     $(root_tests)
126    
127     pr_data = \
128     + misc/mb1.X \
129     + misc/mb1.I \
130     + misc/mb2.X \
131     + misc/mb2.I \
132     pr/0F \
133     pr/0FF \
134     pr/0FFnt \
135 twaugh 1.21 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136     +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 twaugh 1.20 @@ -22,6 +22,11 @@
138    
139     # include <stdio.h>
140    
141     +/* Get mbstate_t. */
142     +# if HAVE_WCHAR_H
143     +# include <wchar.h>
144     +# endif
145     +
146     /* A `struct linebuffer' holds a line of text. */
147    
148     struct linebuffer
149     @@ -29,6 +34,9 @@
150     size_t size; /* Allocated. */
151     size_t length; /* Used. */
152     char *buffer;
153     +# if HAVE_WCHAR_H
154     + mbstate_t state;
155     +# endif
156     };
157    
158     /* Initialize linebuffer LINEBUFFER for use. */
159 twaugh 1.21 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160     +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 twaugh 1.16 @@ -38,11 +38,28 @@
162     #include <stdio.h>
163 cvsdist 1.1 #include <getopt.h>
164     #include <sys/types.h>
165     +
166 twaugh 1.16 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 cvsdist 1.1 +#if HAVE_WCHAR_H
168     +# include <wchar.h>
169     +#endif
170 twaugh 1.16 +
171 cvsdist 1.1 #include "system.h"
172     #include "error.h"
173     #include "quote.h"
174     #include "xstrndup.h"
175    
176     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 twaugh 1.16 + installation; work around this configuration error. */
178 cvsdist 1.1 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179     +# define MB_LEN_MAX 16
180     +#endif
181     +
182     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183     +#if HAVE_MBRTOWC && defined mbstate_t
184     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185     +#endif
186     +
187     /* The official name of this program (e.g., no `g' prefix). */
188 twaugh 1.16 #define PROGRAM_NAME "expand"
189 cvsdist 1.1
190 twaugh 1.20 @@ -183,6 +200,7 @@
191 twaugh 1.16 stops = num_start + len - 1;
192     }
193     }
194 cvsdist 1.1 +
195     else
196 twaugh 1.16 {
197     error (0, 0, _("tab size contains invalid character(s): %s"),
198 twaugh 1.20 @@ -365,6 +383,142 @@
199 cvsdist 1.1 }
200     }
201    
202     +#if HAVE_MBRTOWC
203     +static void
204 twaugh 1.16 +expand_multibyte (void)
205 cvsdist 1.1 +{
206 twaugh 1.16 + FILE *fp; /* Input strem. */
207     + mbstate_t i_state; /* Current shift state of the input stream. */
208     + mbstate_t i_state_bak; /* Back up the I_STATE. */
209     + mbstate_t o_state; /* Current shift state of the output stream. */
210 cvsdist 1.1 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
211 twaugh 1.16 + char *bufpos; /* Next read position of BUF. */
212     + size_t buflen = 0; /* The length of the byte sequence in buf. */
213     + wchar_t wc; /* A gotten wide character. */
214     + size_t mblength; /* The byte size of a multibyte character
215     + which shows as same character as WC. */
216     + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
217     + int column = 0; /* Column on screen of the next char. */
218     + int next_tab_column; /* Column the next tab stop is on. */
219     + int convert = 1; /* If nonzero, perform translations. */
220     +
221     + fp = next_file ((FILE *) NULL);
222     + if (fp == NULL)
223     + return;
224 cvsdist 1.1 +
225 twaugh 1.16 + memset (&o_state, '\0', sizeof(mbstate_t));
226     + memset (&i_state, '\0', sizeof(mbstate_t));
227 cvsdist 1.1 +
228 twaugh 1.16 + for (;;)
229 cvsdist 1.1 + {
230 twaugh 1.16 + /* Refill the buffer BUF. */
231     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
232 cvsdist 1.1 + {
233 twaugh 1.16 + memmove (buf, bufpos, buflen);
234     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
235     + bufpos = buf;
236 cvsdist 1.1 + }
237 twaugh 1.16 +
238     + /* No character is left in BUF. */
239     + if (buflen < 1)
240 cvsdist 1.1 + {
241 twaugh 1.16 + fp = next_file (fp);
242     +
243     + if (fp == NULL)
244     + break; /* No more files. */
245     + else
246     + {
247     + memset (&i_state, '\0', sizeof(mbstate_t));
248     + continue;
249     + }
250 cvsdist 1.1 + }
251     +
252 twaugh 1.16 + /* Get a wide character. */
253     + i_state_bak = i_state;
254     + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
255 cvsdist 1.1 +
256 twaugh 1.16 + switch (mblength)
257 cvsdist 1.1 + {
258 twaugh 1.16 + case (size_t)-1: /* illegal byte sequence. */
259     + case (size_t)-2:
260     + mblength = 1;
261     + i_state = i_state_bak;
262     + if (convert)
263 cvsdist 1.1 + {
264 twaugh 1.16 + ++column;
265     + if (convert_entire_line == 0)
266     + convert = 0;
267     + }
268     + putchar (*bufpos);
269     + break;
270 cvsdist 1.1 +
271 twaugh 1.16 + case 0: /* null. */
272     + mblength = 1;
273     + if (convert && convert_entire_line == 0)
274     + convert = 0;
275     + putchar ('\0');
276     + break;
277 cvsdist 1.1 +
278 twaugh 1.16 + default:
279     + if (wc == L'\n') /* LF. */
280     + {
281     + tab_index = 0;
282     + column = 0;
283     + convert = 1;
284     + putchar ('\n');
285 cvsdist 1.1 + }
286 twaugh 1.16 + else if (wc == L'\t' && convert) /* Tab. */
287 cvsdist 1.1 + {
288 twaugh 1.16 + if (tab_size == 0)
289 cvsdist 1.1 + {
290 twaugh 1.16 + /* Do not let tab_index == first_free_tab;
291     + stop when it is 1 less. */
292     + while (tab_index < first_free_tab - 1
293     + && column >= tab_list[tab_index])
294     + tab_index++;
295     + next_tab_column = tab_list[tab_index];
296     + if (tab_index < first_free_tab - 1)
297     + tab_index++;
298     + if (column >= next_tab_column)
299     + next_tab_column = column + 1;
300 cvsdist 1.1 + }
301     + else
302 twaugh 1.16 + next_tab_column = column + tab_size - column % tab_size;
303     +
304     + while (column < next_tab_column)
305 cvsdist 1.1 + {
306 twaugh 1.16 + putchar (' ');
307     + ++column;
308 cvsdist 1.1 + }
309     + }
310 twaugh 1.16 + else /* Others. */
311 cvsdist 1.1 + {
312 twaugh 1.16 + if (convert)
313 cvsdist 1.1 + {
314 twaugh 1.16 + if (wc == L'\b')
315     + {
316     + if (column > 0)
317     + --column;
318     + }
319     + else
320     + {
321     + int width; /* The width of WC. */
322 cvsdist 1.1 +
323 twaugh 1.16 + width = wcwidth (wc);
324     + column += (width > 0) ? width : 0;
325     + if (convert_entire_line == 0)
326     + convert = 0;
327     + }
328 cvsdist 1.1 + }
329 twaugh 1.16 + fwrite (bufpos, sizeof(char), mblength, stdout);
330 cvsdist 1.1 + }
331     + }
332 twaugh 1.16 + buflen -= mblength;
333     + bufpos += mblength;
334 cvsdist 1.1 + }
335     +}
336     +#endif
337     +
338 twaugh 1.16 int
339     main (int argc, char **argv)
340 cvsdist 1.1 {
341 twaugh 1.20 @@ -429,7 +583,12 @@
342 twaugh 1.16
343     file_list = (optind < argc ? &argv[optind] : stdin_argv);
344    
345     - expand ();
346 cvsdist 1.1 +#if HAVE_MBRTOWC
347 twaugh 1.16 + if (MB_CUR_MAX > 1)
348     + expand_multibyte ();
349     + else
350     +#endif
351     + expand ();
352    
353     if (have_read_stdin && fclose (stdin) != 0)
354     error (EXIT_FAILURE, errno, "-");
355 twaugh 1.21 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
356     +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
357 ovasik 1.29 @@ -23,16 +23,30 @@
358 twaugh 1.20 #include <sys/types.h>
359     #include <getopt.h>
360 twaugh 1.11
361 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
362 cvsdist 1.1 +#if HAVE_WCHAR_H
363     +# include <wchar.h>
364     +#endif
365     +
366 twaugh 1.20 +/* Get iswblank(), towupper. */
367 cvsdist 1.1 +#if HAVE_WCTYPE_H
368     +# include <wctype.h>
369     +#endif
370     +
371     #include "system.h"
372     #include "error.h"
373 twaugh 1.20 #include "linebuffer.h"
374     -#include "memcasecmp.h"
375     #include "quote.h"
376     #include "stdio--.h"
377     #include "xmemcoll.h"
378 cvsdist 1.1 #include "xstrtol.h"
379 ovasik 1.25 #include "argmatch.h"
380 cvsdist 1.1
381     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
382     +#if HAVE_MBRTOWC && defined mbstate_t
383     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
384     +#endif
385     +
386     /* The official name of this program (e.g., no `g' prefix). */
387 twaugh 1.20 #define PROGRAM_NAME "join"
388 cvsdist 1.1
389 twaugh 1.20 @@ -104,10 +118,12 @@
390     /* Last element in `outlist', where a new element can be added. */
391     static struct outlist *outlist_end = &outlist_head;
392 cvsdist 1.1
393 twaugh 1.20 -/* Tab character separating fields. If negative, fields are separated
394     - by any nonempty string of blanks, otherwise by exactly one
395     - tab character whose value (when cast to unsigned char) equals TAB. */
396     -static int tab = -1;
397     +/* Tab character separating fields. If NULL, fields are separated
398     + by any nonempty string of blanks. */
399     +static char *tab = NULL;
400 cvsdist 1.1 +
401 twaugh 1.20 +/* The number of bytes used for tab. */
402     +static size_t tablen = 0;
403 cvsdist 1.1
404 ovasik 1.27 /* If nonzero, check that the input is correctly ordered. */
405     static enum
406 twaugh 1.20 @@ -199,10 +217,11 @@
407     if (ptr == lim)
408     return;
409 cvsdist 1.1
410 twaugh 1.20 - if (0 <= tab)
411     + if (tab != NULL)
412     {
413     + unsigned char t = tab[0];
414     char *sep;
415     - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
416     + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
417     extract_field (line, ptr, sep - ptr);
418     }
419     else
420     @@ -229,6 +248,148 @@
421     extract_field (line, ptr, lim - ptr);
422     }
423 cvsdist 1.1
424 twaugh 1.20 +#if HAVE_MBRTOWC
425     +static void
426     +xfields_multibyte (struct line *line)
427     +{
428     + char *ptr = line->buf.buffer;
429     + char const *lim = ptr + line->buf.length - 1;
430     + wchar_t wc = 0;
431     + size_t mblength = 1;
432     + mbstate_t state, state_bak;
433 cvsdist 1.1 +
434 twaugh 1.20 + memset (&state, 0, sizeof (mbstate_t));
435 cvsdist 1.1 +
436 ovasik 1.34 + if (ptr >= lim)
437 twaugh 1.20 + return;
438 cvsdist 1.1 +
439 twaugh 1.20 + if (tab != NULL)
440 cvsdist 1.1 + {
441 twaugh 1.20 + unsigned char t = tab[0];
442     + char *sep = ptr;
443     + for (; ptr < lim; ptr = sep + mblength)
444     + {
445     + sep = ptr;
446     + while (sep < lim)
447     + {
448     + state_bak = state;
449     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
450     +
451     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
452     + {
453     + mblength = 1;
454     + state = state_bak;
455     + }
456     + mblength = (mblength < 1) ? 1 : mblength;
457     +
458     + if (mblength == tablen && !memcmp (sep, tab, mblength))
459     + break;
460     + else
461     + {
462     + sep += mblength;
463     + continue;
464     + }
465     + }
466     +
467 ovasik 1.34 + if (sep >= lim)
468 twaugh 1.20 + break;
469     +
470     + extract_field (line, ptr, sep - ptr);
471     + }
472 cvsdist 1.1 + }
473     + else
474     + {
475 twaugh 1.20 + /* Skip leading blanks before the first field. */
476     + while(ptr < lim)
477     + {
478     + state_bak = state;
479     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
480     +
481     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
482     + {
483     + mblength = 1;
484     + state = state_bak;
485     + break;
486     + }
487     + mblength = (mblength < 1) ? 1 : mblength;
488     +
489     + if (!iswblank(wc))
490     + break;
491     + ptr += mblength;
492     + }
493 cvsdist 1.1 +
494 twaugh 1.20 + do
495     + {
496     + char *sep;
497     + state_bak = state;
498     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
499     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
500 cvsdist 1.1 + {
501 twaugh 1.20 + mblength = 1;
502     + state = state_bak;
503     + break;
504     + }
505     + mblength = (mblength < 1) ? 1 : mblength;
506 cvsdist 1.1 +
507 twaugh 1.20 + sep = ptr + mblength;
508 ovasik 1.34 + while (sep < lim)
509 cvsdist 1.1 + {
510 twaugh 1.20 + state_bak = state;
511     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
512     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
513     + {
514     + mblength = 1;
515     + state = state_bak;
516     + break;
517     + }
518     + mblength = (mblength < 1) ? 1 : mblength;
519     +
520     + if (iswblank (wc))
521     + break;
522 cvsdist 1.1 +
523 twaugh 1.20 + sep += mblength;
524 cvsdist 1.1 + }
525     +
526 twaugh 1.20 + extract_field (line, ptr, sep - ptr);
527 ovasik 1.34 + if (sep >= lim)
528 twaugh 1.20 + return;
529 cvsdist 1.1 +
530 twaugh 1.20 + state_bak = state;
531     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
532 cvsdist 1.1 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
533     + {
534 twaugh 1.20 + mblength = 1;
535     + state = state_bak;
536     + break;
537 cvsdist 1.1 + }
538 twaugh 1.20 + mblength = (mblength < 1) ? 1 : mblength;
539     +
540     + ptr = sep + mblength;
541 ovasik 1.34 + while (ptr < lim)
542 cvsdist 1.1 + {
543 twaugh 1.20 + state_bak = state;
544     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
545     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
546     + {
547     + mblength = 1;
548     + state = state_bak;
549     + break;
550     + }
551     + mblength = (mblength < 1) ? 1 : mblength;
552     +
553     + if (!iswblank (wc))
554     + break;
555     +
556     + ptr += mblength;
557 cvsdist 1.1 + }
558 twaugh 1.20 + }
559 ovasik 1.34 + while (ptr < lim);
560 twaugh 1.20 + }
561 cvsdist 1.1 +
562 twaugh 1.20 + extract_field (line, ptr, lim - ptr);
563     +}
564 cvsdist 1.1 +#endif
565 ovasik 1.27 +
566 ovasik 1.28 static void
567     freeline (struct line *line)
568 ovasik 1.27 {
569 twaugh 1.20 @@ -377,11 +601,18 @@
570 cvsdist 1.1
571 twaugh 1.20 /* Print the join of LINE1 and LINE2. */
572 cvsdist 1.1
573 twaugh 1.20 +#define PUT_TAB_CHAR \
574     + do \
575     + { \
576     + (tab != NULL) ? \
577     + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
578     + } \
579     + while (0)
580     +
581     static void
582     prjoin (struct line const *line1, struct line const *line2)
583 cvsdist 1.1 {
584 twaugh 1.20 const struct outlist *outlist;
585     - char output_separator = tab < 0 ? ' ' : tab;
586    
587     outlist = outlist_head.next;
588     if (outlist)
589     @@ -397,12 +628,12 @@
590     if (o->file == 0)
591     {
592     if (line1 == &uni_blank)
593     - {
594     + {
595     line = line2;
596     field = join_field_2;
597     }
598     else
599     - {
600     + {
601     line = line1;
602     field = join_field_1;
603     }
604     @@ -416,7 +647,7 @@
605     o = o->next;
606     if (o == NULL)
607     break;
608     - putchar (output_separator);
609     + PUT_TAB_CHAR;
610     }
611     putchar ('\n');
612     }
613     @@ -434,23 +665,23 @@
614     prfield (join_field_1, line1);
615     for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
616     {
617     - putchar (output_separator);
618     + PUT_TAB_CHAR;
619     prfield (i, line1);
620     }
621     for (i = join_field_1 + 1; i < line1->nfields; ++i)
622     {
623     - putchar (output_separator);
624     + PUT_TAB_CHAR;
625     prfield (i, line1);
626     }
627 cvsdist 1.1
628 twaugh 1.20 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
629     {
630     - putchar (output_separator);
631     + PUT_TAB_CHAR;
632     prfield (i, line2);
633     }
634     for (i = join_field_2 + 1; i < line2->nfields; ++i)
635     {
636     - putchar (output_separator);
637     + PUT_TAB_CHAR;
638     prfield (i, line2);
639     }
640     putchar ('\n');
641 twaugh 1.21 @@ -859,20 +1090,41 @@
642 cvsdist 1.1
643 twaugh 1.20 case 't':
644     {
645     - unsigned char newtab = optarg[0];
646     - if (! newtab)
647     + char *newtab;
648     + size_t newtablen;
649     + if (! optarg[0])
650     error (EXIT_FAILURE, 0, _("empty tab"));
651     - if (optarg[1])
652     + newtab = xstrdup (optarg);
653     +#if HAVE_MBRTOWC
654     + if (MB_CUR_MAX > 1)
655     + {
656     + mbstate_t state;
657     +
658     + memset (&state, 0, sizeof (mbstate_t));
659     + newtablen = mbrtowc (NULL, newtab,
660     + strnlen (newtab, MB_LEN_MAX),
661     + &state);
662     + if (newtablen == (size_t) 0
663     + || newtablen == (size_t) -1
664     + || newtablen == (size_t) -2)
665     + newtablen = 1;
666     + }
667     + else
668     +#endif
669     + newtablen = 1;
670     +
671     + if (newtablen == 1 && newtab[1])
672     + {
673     + if (STREQ (newtab, "\\0"))
674     + newtab[0] = '\0';
675     + }
676     + if (tab != NULL && strcmp (tab, newtab))
677     {
678     - if (STREQ (optarg, "\\0"))
679     - newtab = '\0';
680     - else
681     - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
682     - quote (optarg));
683     + free (newtab);
684     + error (EXIT_FAILURE, 0, _("incompatible tabs"));
685     }
686     - if (0 <= tab && tab != newtab)
687     - error (EXIT_FAILURE, 0, _("incompatible tabs"));
688     tab = newtab;
689     + tablen = newtablen;
690     }
691     break;
692 ovasik 1.25
693     diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
694     --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
695     +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
696     @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
697     size_t jf_1, size_t jf_2)
698     {
699     /* Start of field to compare in each file. */
700     - char *beg1;
701     - char *beg2;
702     -
703     - size_t len1;
704     - size_t len2; /* Length of fields to compare. */
705     + char *beg[2];
706     + char *copy[2];
707 ovasik 1.27 + size_t len[2]; /* Length of fields to compare. */
708 ovasik 1.25 int diff;
709     + int i, j;
710    
711     if (jf_1 < line1->nfields)
712     {
713     - beg1 = line1->fields[jf_1].beg;
714     - len1 = line1->fields[jf_1].len;
715     + beg[0] = line1->fields[jf_1].beg;
716     + len[0] = line1->fields[jf_1].len;
717     }
718     else
719     {
720     - beg1 = NULL;
721     - len1 = 0;
722     + beg[0] = NULL;
723     + len[0] = 0;
724     }
725    
726     if (jf_2 < line2->nfields)
727     {
728     - beg2 = line2->fields[jf_2].beg;
729     - len2 = line2->fields[jf_2].len;
730     + beg[1] = line2->fields[jf_2].beg;
731     + len[1] = line2->fields[jf_2].len;
732     }
733     else
734     {
735     - beg2 = NULL;
736     - len2 = 0;
737     + beg[1] = NULL;
738     + len[1] = 0;
739     }
740    
741     - if (len1 == 0)
742     - return len2 == 0 ? 0 : -1;
743     - if (len2 == 0)
744     + if (len[0] == 0)
745     + return len[1] == 0 ? 0 : -1;
746     + if (len[1] == 0)
747     return 1;
748    
749     if (ignore_case)
750     {
751     - /* FIXME: ignore_case does not work with NLS (in particular,
752     - with multibyte chars). */
753     - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
754     +#ifdef HAVE_MBRTOWC
755     + if (MB_CUR_MAX > 1)
756     + {
757     + size_t mblength;
758     + wchar_t wc, uwc;
759     + mbstate_t state, state_bak;
760     +
761     + memset (&state, '\0', sizeof (mbstate_t));
762     +
763     + for (i = 0; i < 2; i++)
764     + {
765     + copy[i] = alloca (len[i] + 1);
766     +
767     + for (j = 0; j < MIN (len[0], len[1]);)
768     + {
769     + state_bak = state;
770     + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
771     +
772     + switch (mblength)
773     + {
774     + case (size_t) -1:
775     + case (size_t) -2:
776     + state = state_bak;
777     + /* Fall through */
778     + case 0:
779     + mblength = 1;
780     + break;
781     +
782     + default:
783     + uwc = towupper (wc);
784     +
785     + if (uwc != wc)
786     + {
787     + mbstate_t state_wc;
788     +
789     + memset (&state_wc, '\0', sizeof (mbstate_t));
790     + wcrtomb (copy[i] + j, uwc, &state_wc);
791     + }
792     + else
793     + memcpy (copy[i] + j, beg[i] + j, mblength);
794     + }
795     + j += mblength;
796     + }
797     + copy[i][j] = '\0';
798     + }
799     + }
800     + else
801     +#endif
802     + {
803     + for (i = 0; i < 2; i++)
804     + {
805     + copy[i] = alloca (len[i] + 1);
806     +
807     + for (j = 0; j < MIN (len[0], len[1]); j++)
808     + copy[i][j] = toupper (beg[i][j]);
809     +
810     + copy[i][j] = '\0';
811     + }
812     + }
813     }
814     else
815     {
816     - if (hard_LC_COLLATE)
817     - return xmemcoll (beg1, len1, beg2, len2);
818     - diff = memcmp (beg1, beg2, MIN (len1, len2));
819     + copy[0] = (unsigned char *) beg[0];
820     + copy[1] = (unsigned char *) beg[1];
821     }
822    
823     + if (hard_LC_COLLATE)
824     + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
825     + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
826     +
827     +
828     if (diff)
829     return diff;
830     - return len1 < len2 ? -1 : len1 != len2;
831     + return len[0] - len[1];
832     }
833 cvsdist 1.1
834 ovasik 1.27 /* Check that successive input lines PREV and CURRENT from input file
835 twaugh 1.21 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
836     +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
837 twaugh 1.20 @@ -23,6 +23,16 @@
838     #include <getopt.h>
839     #include <sys/types.h>
840 cvsdist 1.1
841 twaugh 1.20 +/* Get mbstate_t, mbrtowc(). */
842     +#if HAVE_WCHAR_H
843     +# include <wchar.h>
844     +#endif
845     +
846     +/* Get isw* functions. */
847     +#if HAVE_WCTYPE_H
848     +# include <wctype.h>
849     +#endif
850     +
851     #include "system.h"
852     #include "argmatch.h"
853     #include "linebuffer.h"
854     @@ -32,7 +42,19 @@
855     #include "quote.h"
856     #include "xmemcoll.h"
857     #include "xstrtol.h"
858     -#include "memcasecmp.h"
859     +#include "xmemcoll.h"
860     +
861     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
862     + installation; work around this configuration error. */
863     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
864     +# define MB_LEN_MAX 16
865     +#endif
866     +
867     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
868     +#if HAVE_MBRTOWC && defined mbstate_t
869     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
870     +#endif
871     +
872    
873     /* The official name of this program (e.g., no `g' prefix). */
874     #define PROGRAM_NAME "uniq"
875     @@ -109,6 +131,10 @@
876     /* Select whether/how to delimit groups of duplicate lines. */
877     static enum delimit_method delimit_groups;
878    
879     +/* Function pointers. */
880     +static char *
881     +(*find_field) (struct linebuffer *line);
882     +
883     static struct option const longopts[] =
884     {
885     {"count", no_argument, NULL, 'c'},
886 twaugh 1.21 @@ -198,7 +224,7 @@
887 twaugh 1.20 return a pointer to the beginning of the line's field to be compared. */
888    
889     static char *
890 ovasik 1.28 -find_field (struct linebuffer const *line)
891 twaugh 1.20 +find_field_uni (struct linebuffer *line)
892     {
893     size_t count;
894 ovasik 1.28 char const *lp = line->buffer;
895 twaugh 1.21 @@ -219,6 +245,83 @@
896 ovasik 1.28 return line->buffer + i;
897 twaugh 1.20 }
898    
899     +#if HAVE_MBRTOWC
900     +
901     +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
902     + do \
903     + { \
904     + mbstate_t state_bak; \
905     + \
906     + CONVFAIL = 0; \
907     + state_bak = *STATEP; \
908     + \
909     + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
910     + \
911     + switch (MBLENGTH) \
912     + { \
913     + case (size_t)-2: \
914     + case (size_t)-1: \
915     + *STATEP = state_bak; \
916     + CONVFAIL++; \
917     + /* Fall through */ \
918     + case 0: \
919     + MBLENGTH = 1; \
920     + } \
921     + } \
922     + while (0)
923     +
924     +static char *
925     +find_field_multi (struct linebuffer *line)
926 cvsdist 1.1 +{
927 twaugh 1.20 + size_t count;
928     + char *lp = line->buffer;
929     + size_t size = line->length - 1;
930     + size_t pos;
931     + size_t mblength;
932 cvsdist 1.1 + wchar_t wc;
933 twaugh 1.20 + mbstate_t *statep;
934     + int convfail;
935 cvsdist 1.1 +
936 twaugh 1.20 + pos = 0;
937     + statep = &(line->state);
938 cvsdist 1.1 +
939 twaugh 1.20 + /* skip fields. */
940     + for (count = 0; count < skip_fields && pos < size; count++)
941 cvsdist 1.1 + {
942 twaugh 1.20 + while (pos < size)
943 cvsdist 1.1 + {
944 twaugh 1.20 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
945     +
946     + if (convfail || !iswblank (wc))
947 cvsdist 1.1 + {
948 twaugh 1.20 + pos += mblength;
949     + break;
950 cvsdist 1.1 + }
951 twaugh 1.20 + pos += mblength;
952     + }
953 cvsdist 1.1 +
954 twaugh 1.20 + while (pos < size)
955     + {
956     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
957 cvsdist 1.1 +
958 twaugh 1.20 + if (!convfail && iswblank (wc))
959     + break;
960 cvsdist 1.1 +
961 twaugh 1.20 + pos += mblength;
962     + }
963     + }
964 cvsdist 1.1 +
965 twaugh 1.20 + /* skip fields. */
966     + for (count = 0; count < skip_chars && pos < size; count++)
967     + {
968     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
969     + pos += mblength;
970 cvsdist 1.1 + }
971     +
972 twaugh 1.20 + return lp + pos;
973 cvsdist 1.1 +}
974     +#endif
975     +
976 twaugh 1.20 /* Return false if two strings OLD and NEW match, true if not.
977     OLD and NEW point not to the beginnings of the lines
978     but rather to the beginnings of the fields to compare.
979 twaugh 1.21 @@ -227,6 +330,8 @@
980 twaugh 1.20 static bool
981     different (char *old, char *new, size_t oldlen, size_t newlen)
982     {
983     + char *copy_old, *copy_new;
984     +
985     if (check_chars < oldlen)
986     oldlen = check_chars;
987     if (check_chars < newlen)
988 twaugh 1.21 @@ -234,14 +339,92 @@
989 cvsdist 1.1
990 twaugh 1.20 if (ignore_case)
991     {
992     - /* FIXME: This should invoke strcoll somehow. */
993     - return oldlen != newlen || memcasecmp (old, new, oldlen);
994     + size_t i;
995     +
996     + copy_old = alloca (oldlen + 1);
997     + copy_new = alloca (oldlen + 1);
998     +
999     + for (i = 0; i < oldlen; i++)
1000     + {
1001     + copy_old[i] = toupper (old[i]);
1002     + copy_new[i] = toupper (new[i]);
1003     + }
1004     }
1005     - else if (hard_LC_COLLATE)
1006     - return xmemcoll (old, oldlen, new, newlen) != 0;
1007     else
1008     - return oldlen != newlen || memcmp (old, new, oldlen);
1009     + {
1010     + copy_old = (char *)old;
1011     + copy_new = (char *)new;
1012     + }
1013     +
1014     + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1015     +}
1016     +
1017     +#if HAVE_MBRTOWC
1018     +static int
1019     +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1020     +{
1021     + size_t i, j, chars;
1022     + const char *str[2];
1023     + char *copy[2];
1024     + size_t len[2];
1025     + mbstate_t state[2];
1026     + size_t mblength;
1027     + wchar_t wc, uwc;
1028     + mbstate_t state_bak;
1029     +
1030     + str[0] = old;
1031     + str[1] = new;
1032     + len[0] = oldlen;
1033     + len[1] = newlen;
1034     + state[0] = oldstate;
1035     + state[1] = newstate;
1036     +
1037     + for (i = 0; i < 2; i++)
1038     + {
1039     + copy[i] = alloca (len[i] + 1);
1040     +
1041     + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1042     + {
1043     + state_bak = state[i];
1044     + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1045     +
1046     + switch (mblength)
1047     + {
1048     + case (size_t)-1:
1049     + case (size_t)-2:
1050     + state[i] = state_bak;
1051     + /* Fall through */
1052     + case 0:
1053     + mblength = 1;
1054     + break;
1055     +
1056     + default:
1057     + if (ignore_case)
1058     + {
1059     + uwc = towupper (wc);
1060     +
1061     + if (uwc != wc)
1062     + {
1063     + mbstate_t state_wc;
1064     +
1065     + memset (&state_wc, '\0', sizeof(mbstate_t));
1066     + wcrtomb (copy[i] + j, uwc, &state_wc);
1067     + }
1068     + else
1069     + memcpy (copy[i] + j, str[i] + j, mblength);
1070     + }
1071     + else
1072     + memcpy (copy[i] + j, str[i] + j, mblength);
1073     + }
1074     + j += mblength;
1075     + }
1076     + copy[i][j] = '\0';
1077     + len[i] = j;
1078     + }
1079     +
1080     + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1081     }
1082     +#endif
1083    
1084     /* Output the line in linebuffer LINE to standard output
1085     provided that the switches say it should be output.
1086 twaugh 1.21 @@ -295,15 +478,43 @@
1087 twaugh 1.20 {
1088     char *prevfield IF_LINT (= NULL);
1089     size_t prevlen IF_LINT (= 0);
1090     +#if HAVE_MBRTOWC
1091     + mbstate_t prevstate;
1092     +
1093     + memset (&prevstate, '\0', sizeof (mbstate_t));
1094     +#endif
1095    
1096     while (!feof (stdin))
1097     {
1098     char *thisfield;
1099     size_t thislen;
1100     +#if HAVE_MBRTOWC
1101     + mbstate_t thisstate;
1102     +#endif
1103     +
1104 ovasik 1.23 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1105 twaugh 1.20 break;
1106     thisfield = find_field (thisline);
1107     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1108     +#if HAVE_MBRTOWC
1109     + if (MB_CUR_MAX > 1)
1110     + {
1111     + thisstate = thisline->state;
1112     +
1113     + if (prevline->length == 0 || different_multi
1114     + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1115     + {
1116     + fwrite (thisline->buffer, sizeof (char),
1117     + thisline->length, stdout);
1118     +
1119     + SWAP_LINES (prevline, thisline);
1120     + prevfield = thisfield;
1121     + prevlen = thislen;
1122     + prevstate = thisstate;
1123     + }
1124     + }
1125     + else
1126     +#endif
1127     if (prevline->length == 0
1128     || different (thisfield, prevfield, thislen, prevlen))
1129     {
1130 twaugh 1.21 @@ -322,17 +533,26 @@
1131 twaugh 1.20 size_t prevlen;
1132     uintmax_t match_count = 0;
1133     bool first_delimiter = true;
1134     +#if HAVE_MBRTOWC
1135     + mbstate_t prevstate;
1136     +#endif
1137    
1138 ovasik 1.23 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1139 twaugh 1.20 goto closefiles;
1140     prevfield = find_field (prevline);
1141     prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1142     +#if HAVE_MBRTOWC
1143     + prevstate = prevline->state;
1144     +#endif
1145    
1146     while (!feof (stdin))
1147     {
1148     bool match;
1149     char *thisfield;
1150     size_t thislen;
1151     +#if HAVE_MBRTOWC
1152     + mbstate_t thisstate;
1153     +#endif
1154 ovasik 1.23 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1155 twaugh 1.20 {
1156     if (ferror (stdin))
1157 twaugh 1.21 @@ -341,6 +561,15 @@
1158 twaugh 1.20 }
1159     thisfield = find_field (thisline);
1160     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1161     +#if HAVE_MBRTOWC
1162     + if (MB_CUR_MAX > 1)
1163     + {
1164     + thisstate = thisline->state;
1165     + match = !different_multi (thisfield, prevfield,
1166     + thislen, prevlen, thisstate, prevstate);
1167     + }
1168     + else
1169     +#endif
1170     match = !different (thisfield, prevfield, thislen, prevlen);
1171     match_count += match;
1172    
1173 twaugh 1.21 @@ -373,6 +602,9 @@
1174 twaugh 1.20 SWAP_LINES (prevline, thisline);
1175     prevfield = thisfield;
1176     prevlen = thislen;
1177     +#if HAVE_MBRTOWC
1178     + prevstate = thisstate;
1179     +#endif
1180     if (!match)
1181     match_count = 0;
1182     }
1183 twaugh 1.21 @@ -417,6 +649,19 @@
1184 twaugh 1.20
1185     atexit (close_stdout);
1186    
1187     +#if HAVE_MBRTOWC
1188     + if (MB_CUR_MAX > 1)
1189     + {
1190     + find_field = find_field_multi;
1191     + }
1192     + else
1193     +#endif
1194     + {
1195     + find_field = find_field_uni;
1196     + }
1197     +
1198     +
1199     +
1200     skip_chars = 0;
1201     skip_fields = 0;
1202     check_chars = SIZE_MAX;
1203 twaugh 1.21 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1204     +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1205 twaugh 1.20 @@ -23,11 +23,33 @@
1206     #include <getopt.h>
1207 twaugh 1.16 #include <sys/types.h>
1208 cvsdist 1.1
1209 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1210 cvsdist 1.1 +#if HAVE_WCHAR_H
1211     +# include <wchar.h>
1212     +#endif
1213     +
1214 twaugh 1.20 +/* Get iswprint(), iswblank(), wcwidth(). */
1215 cvsdist 1.1 +#if HAVE_WCTYPE_H
1216     +# include <wctype.h>
1217     +#endif
1218     +
1219     #include "system.h"
1220 twaugh 1.16 #include "error.h"
1221 twaugh 1.11 #include "quote.h"
1222 cvsdist 1.1 #include "xstrtol.h"
1223 twaugh 1.16
1224 twaugh 1.20 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1225     + installation; work around this configuration error. */
1226     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1227     +# undef MB_LEN_MAX
1228     +# define MB_LEN_MAX 16
1229     +#endif
1230     +
1231 cvsdist 1.1 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1232     +#if HAVE_MBRTOWC && defined mbstate_t
1233     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1234     +#endif
1235     +
1236 twaugh 1.20 #define TAB_WIDTH 8
1237    
1238 twaugh 1.16 /* The official name of this program (e.g., no `g' prefix). */
1239 ovasik 1.28 @@ -35,20 +57,41 @@
1240 cvsdist 1.1
1241 ovasik 1.27 #define AUTHORS proper_name ("David MacKenzie")
1242 cvsdist 1.1
1243 twaugh 1.20 +#define FATAL_ERROR(Message) \
1244     + do \
1245     + { \
1246     + error (0, 0, (Message)); \
1247     + usage (2); \
1248     + } \
1249     + while (0)
1250 cvsdist 1.1 +
1251 twaugh 1.20 +enum operating_mode
1252     +{
1253     + /* Fold texts by columns that are at the given positions. */
1254     + column_mode,
1255 twaugh 1.16 +
1256 twaugh 1.20 + /* Fold texts by bytes that are at the given positions. */
1257     + byte_mode,
1258     +
1259     + /* Fold texts by characters that are at the given positions. */
1260     + character_mode,
1261     +};
1262     +
1263     +/* The argument shows current mode. (Default: column_mode) */
1264     +static enum operating_mode operating_mode;
1265     +
1266     /* If nonzero, try to break on whitespace. */
1267     static bool break_spaces;
1268    
1269     -/* If nonzero, count bytes, not column positions. */
1270     -static bool count_bytes;
1271     -
1272     /* If nonzero, at least one of the files we read was standard input. */
1273     static bool have_read_stdin;
1274    
1275     -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1276     +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1277    
1278     static struct option const longopts[] =
1279     {
1280     {"bytes", no_argument, NULL, 'b'},
1281     + {"characters", no_argument, NULL, 'c'},
1282     {"spaces", no_argument, NULL, 's'},
1283     {"width", required_argument, NULL, 'w'},
1284     {GETOPT_HELP_OPTION_DECL},
1285     @@ -81,6 +124,7 @@
1286     "), stdout);
1287     fputs (_("\
1288     -b, --bytes count bytes rather than columns\n\
1289     + -c, --characters count characters rather than columns\n\
1290     -s, --spaces break at spaces\n\
1291     -w, --width=WIDTH use WIDTH columns instead of 80\n\
1292     "), stdout);
1293     @@ -98,7 +142,7 @@
1294     static size_t
1295     adjust_column (size_t column, char c)
1296     {
1297     - if (!count_bytes)
1298     + if (operating_mode != byte_mode)
1299     {
1300     if (c == '\b')
1301     {
1302     @@ -121,30 +165,14 @@
1303     to stdout, with maximum line length WIDTH.
1304     Return true if successful. */
1305    
1306     -static bool
1307     -fold_file (char const *filename, size_t width)
1308     +static void
1309     +fold_text (FILE *istream, size_t width, int *saved_errno)
1310 cvsdist 1.1 {
1311 twaugh 1.20 - FILE *istream;
1312     int c;
1313     size_t column = 0; /* Screen column where next char will go. */
1314     size_t offset_out = 0; /* Index in `line_out' for next char. */
1315     static char *line_out = NULL;
1316     static size_t allocated_out = 0;
1317     - int saved_errno;
1318     -
1319     - if (STREQ (filename, "-"))
1320     - {
1321     - istream = stdin;
1322     - have_read_stdin = true;
1323     - }
1324     - else
1325     - istream = fopen (filename, "r");
1326     -
1327     - if (istream == NULL)
1328     - {
1329     - error (0, errno, "%s", filename);
1330     - return false;
1331     - }
1332 twaugh 1.16
1333 twaugh 1.20 while ((c = getc (istream)) != EOF)
1334 twaugh 1.16 {
1335 twaugh 1.20 @@ -172,6 +200,15 @@
1336     bool found_blank = false;
1337     size_t logical_end = offset_out;
1338    
1339     + /* If LINE_OUT has no wide character,
1340     + put a new wide character in LINE_OUT
1341     + if column is bigger than width. */
1342     + if (offset_out == 0)
1343     + {
1344     + line_out[offset_out++] = c;
1345     + continue;
1346     + }
1347     +
1348     /* Look for the last blank. */
1349     while (logical_end)
1350