/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.39 - (hide annotations) (download) (as text)
Sat Sep 12 09:28:49 2009 UTC (2 months, 1 week ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-7_6-4_fc12, coreutils-7_6-5_fc12, coreutils-7_6-7_fc13, coreutils-7_6-2_fc12, F-12-split, coreutils-7_6-3_fc12, coreutils-7_6-6_fc13, coreutils-7_6-6_fc12, coreutils-7_6-1_fc12
Changes since 1.38: +1196 -1219 lines
File MIME type: text/x-patch
new upstream bugfix release 7.6, removed applied patches,defuzzed the rest
1 ovasik 1.31 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2     --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3     +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4     @@ -26,7 +26,7 @@
5     my $prog = 'cut';
6     my $try = "Try \`$prog --help' for more information.\n";
7     my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8     -my $inval = "$prog: invalid byte or field list\n$try";
9     +my $inval = "$prog: invalid byte, character or field list\n$try";
10     my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11    
12     my @Tests =
13     @@ -140,8 +140,8 @@
14     ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15    
16     # None of the following invalid ranges provoked an error up to coreutils-6.9.
17     - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18     - {ERR=>"$prog: invalid decreasing range\n$try"}],
19     + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20     + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21     ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22     ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23     ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 ovasik 1.26 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 twaugh 1.20 @@ -0,0 +1,58 @@
27     +#! /bin/sh
28     +case $# in
29 ovasik 1.26 + 0) xx='../src/sort';;
30 twaugh 1.20 + *) xx="$1";;
31     +esac
32     +test "$VERBOSE" && echo=echo || echo=:
33     +$echo testing program: $xx
34     +errors=0
35     +test "$srcdir" || srcdir=.
36     +test "$VERBOSE" && $xx --version 2> /dev/null
37     +
38     +export LC_ALL=en_US.UTF-8
39     +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40     +errors=0
41     +
42 ovasik 1.26 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 twaugh 1.20 +code=$?
44     +if test $code != 0; then
45     + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46     + errors=`expr $errors + 1`
47     +else
48 ovasik 1.26 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 twaugh 1.20 + case $? in
50     + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 ovasik 1.26 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52     + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 twaugh 1.20 + errors=`expr $errors + 1`;;
54     + 2) $echo "Test mb1 may have failed." 1>&2
55 ovasik 1.26 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 twaugh 1.20 + errors=`expr $errors + 1`;;
57     + esac
58     +fi
59     +
60 ovasik 1.26 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 twaugh 1.20 +code=$?
62     +if test $code != 0; then
63     + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64     + errors=`expr $errors + 1`
65     +else
66 ovasik 1.26 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 twaugh 1.20 + case $? in
68     + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 ovasik 1.26 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70     + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 twaugh 1.20 + errors=`expr $errors + 1`;;
72     + 2) $echo "Test mb2 may have failed." 1>&2
73 ovasik 1.26 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 twaugh 1.20 + errors=`expr $errors + 1`;;
75     + esac
76     +fi
77     +
78     +if test $errors = 0; then
79     + $echo Passed all 113 tests. 1>&2
80     +else
81     + $echo Failed $errors tests. 1>&2
82     +fi
83     +test $errors = 0 || errors=1
84     +exit $errors
85 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 twaugh 1.20 @@ -0,0 +1,4 @@
88     +Apple@AA10@@20
89     +Banana@AA5@@30
90     +Citrus@AA20@@5
91     +Cherry@AA30@@10
92 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 twaugh 1.20 @@ -0,0 +1,4 @@
95     +Citrus@AA20@@5
96     +Cherry@AA30@@10
97     +Apple@AA10@@20
98     +Banana@AA5@@30
99 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 twaugh 1.20 @@ -0,0 +1,4 @@
102     +Apple@10
103     +Banana@5
104     +Citrus@20
105     +Cherry@30
106 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 twaugh 1.20 @@ -0,0 +1,4 @@
109     +Banana@5
110     +Apple@10
111     +Citrus@20
112     +Cherry@30
113 ovasik 1.26 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114     --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115     +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 ovasik 1.28 @@ -192,6 +192,7 @@
117 ovasik 1.26 misc/sort-compress \
118 ovasik 1.33 misc/sort-continue \
119 ovasik 1.28 misc/sort-files0-from \
120 ovasik 1.26 + misc/sort-mb-tests \
121     misc/sort-merge \
122 ovasik 1.33 misc/sort-merge-fdlimit \
123 ovasik 1.26 misc/sort-rand \
124     @@ -391,6 +392,10 @@
125     $(root_tests)
126    
127     pr_data = \
128     + misc/mb1.X \
129     + misc/mb1.I \
130     + misc/mb2.X \
131     + misc/mb2.I \
132     pr/0F \
133     pr/0FF \
134     pr/0FFnt \
135 twaugh 1.21 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136     +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 twaugh 1.20 @@ -22,6 +22,11 @@
138    
139     # include <stdio.h>
140    
141     +/* Get mbstate_t. */
142     +# if HAVE_WCHAR_H
143     +# include <wchar.h>
144     +# endif
145     +
146     /* A `struct linebuffer' holds a line of text. */
147    
148     struct linebuffer
149     @@ -29,6 +34,9 @@
150     size_t size; /* Allocated. */
151     size_t length; /* Used. */
152     char *buffer;
153     +# if HAVE_WCHAR_H
154     + mbstate_t state;
155     +# endif
156     };
157    
158     /* Initialize linebuffer LINEBUFFER for use. */
159 twaugh 1.21 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160     +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 twaugh 1.16 @@ -38,11 +38,28 @@
162     #include <stdio.h>
163 cvsdist 1.1 #include <getopt.h>
164     #include <sys/types.h>
165     +
166 twaugh 1.16 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 cvsdist 1.1 +#if HAVE_WCHAR_H
168     +# include <wchar.h>
169     +#endif
170 twaugh 1.16 +
171 cvsdist 1.1 #include "system.h"
172     #include "error.h"
173     #include "quote.h"
174     #include "xstrndup.h"
175    
176     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 twaugh 1.16 + installation; work around this configuration error. */
178 cvsdist 1.1 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179     +# define MB_LEN_MAX 16
180     +#endif
181     +
182     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183     +#if HAVE_MBRTOWC && defined mbstate_t
184     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185     +#endif
186     +
187     /* The official name of this program (e.g., no `g' prefix). */
188 twaugh 1.16 #define PROGRAM_NAME "expand"
189 cvsdist 1.1
190 twaugh 1.20 @@ -365,6 +383,142 @@
191 cvsdist 1.1 }
192     }
193    
194     +#if HAVE_MBRTOWC
195     +static void
196 twaugh 1.16 +expand_multibyte (void)
197 cvsdist 1.1 +{
198 twaugh 1.16 + FILE *fp; /* Input strem. */
199     + mbstate_t i_state; /* Current shift state of the input stream. */
200     + mbstate_t i_state_bak; /* Back up the I_STATE. */
201     + mbstate_t o_state; /* Current shift state of the output stream. */
202 cvsdist 1.1 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
203 twaugh 1.16 + char *bufpos; /* Next read position of BUF. */
204     + size_t buflen = 0; /* The length of the byte sequence in buf. */
205     + wchar_t wc; /* A gotten wide character. */
206     + size_t mblength; /* The byte size of a multibyte character
207     + which shows as same character as WC. */
208     + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
209     + int column = 0; /* Column on screen of the next char. */
210     + int next_tab_column; /* Column the next tab stop is on. */
211     + int convert = 1; /* If nonzero, perform translations. */
212     +
213     + fp = next_file ((FILE *) NULL);
214     + if (fp == NULL)
215     + return;
216 cvsdist 1.1 +
217 twaugh 1.16 + memset (&o_state, '\0', sizeof(mbstate_t));
218     + memset (&i_state, '\0', sizeof(mbstate_t));
219 cvsdist 1.1 +
220 twaugh 1.16 + for (;;)
221 cvsdist 1.1 + {
222 twaugh 1.16 + /* Refill the buffer BUF. */
223     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
224 cvsdist 1.1 + {
225 twaugh 1.16 + memmove (buf, bufpos, buflen);
226     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
227     + bufpos = buf;
228 cvsdist 1.1 + }
229 twaugh 1.16 +
230     + /* No character is left in BUF. */
231     + if (buflen < 1)
232 cvsdist 1.1 + {
233 twaugh 1.16 + fp = next_file (fp);
234     +
235     + if (fp == NULL)
236     + break; /* No more files. */
237     + else
238     + {
239     + memset (&i_state, '\0', sizeof(mbstate_t));
240     + continue;
241     + }
242 cvsdist 1.1 + }
243     +
244 twaugh 1.16 + /* Get a wide character. */
245     + i_state_bak = i_state;
246     + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
247 cvsdist 1.1 +
248 twaugh 1.16 + switch (mblength)
249 cvsdist 1.1 + {
250 twaugh 1.16 + case (size_t)-1: /* illegal byte sequence. */
251     + case (size_t)-2:
252     + mblength = 1;
253     + i_state = i_state_bak;
254     + if (convert)
255 cvsdist 1.1 + {
256 twaugh 1.16 + ++column;
257     + if (convert_entire_line == 0)
258     + convert = 0;
259     + }
260     + putchar (*bufpos);
261     + break;
262 cvsdist 1.1 +
263 twaugh 1.16 + case 0: /* null. */
264     + mblength = 1;
265     + if (convert && convert_entire_line == 0)
266     + convert = 0;
267     + putchar ('\0');
268     + break;
269 cvsdist 1.1 +
270 twaugh 1.16 + default:
271     + if (wc == L'\n') /* LF. */
272     + {
273     + tab_index = 0;
274     + column = 0;
275     + convert = 1;
276     + putchar ('\n');
277 cvsdist 1.1 + }
278 twaugh 1.16 + else if (wc == L'\t' && convert) /* Tab. */
279 cvsdist 1.1 + {
280 twaugh 1.16 + if (tab_size == 0)
281 cvsdist 1.1 + {
282 twaugh 1.16 + /* Do not let tab_index == first_free_tab;
283     + stop when it is 1 less. */
284     + while (tab_index < first_free_tab - 1
285     + && column >= tab_list[tab_index])
286     + tab_index++;
287     + next_tab_column = tab_list[tab_index];
288     + if (tab_index < first_free_tab - 1)
289     + tab_index++;
290     + if (column >= next_tab_column)
291     + next_tab_column = column + 1;
292 cvsdist 1.1 + }
293     + else
294 twaugh 1.16 + next_tab_column = column + tab_size - column % tab_size;
295     +
296     + while (column < next_tab_column)
297 cvsdist 1.1 + {
298 twaugh 1.16 + putchar (' ');
299     + ++column;
300 cvsdist 1.1 + }
301     + }
302 twaugh 1.16 + else /* Others. */
303 cvsdist 1.1 + {
304 twaugh 1.16 + if (convert)
305 cvsdist 1.1 + {
306 twaugh 1.16 + if (wc == L'\b')
307     + {
308     + if (column > 0)
309     + --column;
310     + }
311     + else
312     + {
313     + int width; /* The width of WC. */
314 cvsdist 1.1 +
315 twaugh 1.16 + width = wcwidth (wc);
316     + column += (width > 0) ? width : 0;
317     + if (convert_entire_line == 0)
318     + convert = 0;
319     + }
320 cvsdist 1.1 + }
321 twaugh 1.16 + fwrite (bufpos, sizeof(char), mblength, stdout);
322 cvsdist 1.1 + }
323     + }
324 twaugh 1.16 + buflen -= mblength;
325     + bufpos += mblength;
326 cvsdist 1.1 + }
327     +}
328     +#endif
329     +
330 twaugh 1.16 int
331     main (int argc, char **argv)
332 cvsdist 1.1 {
333 twaugh 1.20 @@ -429,7 +583,12 @@
334 twaugh 1.16
335     file_list = (optind < argc ? &argv[optind] : stdin_argv);
336    
337     - expand ();
338 cvsdist 1.1 +#if HAVE_MBRTOWC
339 twaugh 1.16 + if (MB_CUR_MAX > 1)
340     + expand_multibyte ();
341     + else
342     +#endif
343     + expand ();
344    
345     if (have_read_stdin && fclose (stdin) != 0)
346     error (EXIT_FAILURE, errno, "-");
347 twaugh 1.21 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
348     +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
349 ovasik 1.29 @@ -23,16 +23,30 @@
350 twaugh 1.20 #include <sys/types.h>
351     #include <getopt.h>
352 twaugh 1.11
353 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
354 cvsdist 1.1 +#if HAVE_WCHAR_H
355     +# include <wchar.h>
356     +#endif
357     +
358 twaugh 1.20 +/* Get iswblank(), towupper. */
359 cvsdist 1.1 +#if HAVE_WCTYPE_H
360     +# include <wctype.h>
361     +#endif
362     +
363     #include "system.h"
364     #include "error.h"
365 twaugh 1.20 #include "linebuffer.h"
366     -#include "memcasecmp.h"
367     #include "quote.h"
368     #include "stdio--.h"
369     #include "xmemcoll.h"
370 cvsdist 1.1 #include "xstrtol.h"
371 ovasik 1.25 #include "argmatch.h"
372 cvsdist 1.1
373     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
374     +#if HAVE_MBRTOWC && defined mbstate_t
375     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
376     +#endif
377     +
378     /* The official name of this program (e.g., no `g' prefix). */
379 twaugh 1.20 #define PROGRAM_NAME "join"
380 cvsdist 1.1
381 twaugh 1.20 @@ -104,10 +118,12 @@
382     /* Last element in `outlist', where a new element can be added. */
383     static struct outlist *outlist_end = &outlist_head;
384 cvsdist 1.1
385 twaugh 1.20 -/* Tab character separating fields. If negative, fields are separated
386     - by any nonempty string of blanks, otherwise by exactly one
387     - tab character whose value (when cast to unsigned char) equals TAB. */
388     -static int tab = -1;
389     +/* Tab character separating fields. If NULL, fields are separated
390     + by any nonempty string of blanks. */
391     +static char *tab = NULL;
392 cvsdist 1.1 +
393 twaugh 1.20 +/* The number of bytes used for tab. */
394     +static size_t tablen = 0;
395 cvsdist 1.1
396 ovasik 1.27 /* If nonzero, check that the input is correctly ordered. */
397     static enum
398 twaugh 1.20 @@ -199,10 +217,11 @@
399     if (ptr == lim)
400     return;
401 cvsdist 1.1
402 twaugh 1.20 - if (0 <= tab)
403     + if (tab != NULL)
404     {
405     + unsigned char t = tab[0];
406     char *sep;
407     - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
408     + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
409 ovasik 1.39 extract_field (line, ptr, sep - ptr);
410 twaugh 1.20 }
411     else
412     @@ -229,6 +248,148 @@
413     extract_field (line, ptr, lim - ptr);
414     }
415 cvsdist 1.1
416 twaugh 1.20 +#if HAVE_MBRTOWC
417     +static void
418     +xfields_multibyte (struct line *line)
419     +{
420     + char *ptr = line->buf.buffer;
421     + char const *lim = ptr + line->buf.length - 1;
422     + wchar_t wc = 0;
423     + size_t mblength = 1;
424     + mbstate_t state, state_bak;
425 cvsdist 1.1 +
426 twaugh 1.20 + memset (&state, 0, sizeof (mbstate_t));
427 cvsdist 1.1 +
428 ovasik 1.34 + if (ptr >= lim)
429 twaugh 1.20 + return;
430 cvsdist 1.1 +
431 twaugh 1.20 + if (tab != NULL)
432 cvsdist 1.1 + {
433 twaugh 1.20 + unsigned char t = tab[0];
434     + char *sep = ptr;
435     + for (; ptr < lim; ptr = sep + mblength)
436     + {
437     + sep = ptr;
438     + while (sep < lim)
439     + {
440     + state_bak = state;
441     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
442     +
443     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
444     + {
445     + mblength = 1;
446     + state = state_bak;
447     + }
448     + mblength = (mblength < 1) ? 1 : mblength;
449     +
450     + if (mblength == tablen && !memcmp (sep, tab, mblength))
451     + break;
452     + else
453     + {
454     + sep += mblength;
455     + continue;
456     + }
457     + }
458     +
459 ovasik 1.34 + if (sep >= lim)
460 twaugh 1.20 + break;
461     +
462     + extract_field (line, ptr, sep - ptr);
463     + }
464 cvsdist 1.1 + }
465     + else
466     + {
467 twaugh 1.20 + /* Skip leading blanks before the first field. */
468     + while(ptr < lim)
469     + {
470     + state_bak = state;
471     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
472     +
473     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
474     + {
475     + mblength = 1;
476     + state = state_bak;
477     + break;
478     + }
479     + mblength = (mblength < 1) ? 1 : mblength;
480     +
481     + if (!iswblank(wc))
482     + break;
483     + ptr += mblength;
484     + }
485 cvsdist 1.1 +
486 twaugh 1.20 + do
487     + {
488     + char *sep;
489     + state_bak = state;
490     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
491     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
492 cvsdist 1.1 + {
493 twaugh 1.20 + mblength = 1;
494     + state = state_bak;
495     + break;
496     + }
497     + mblength = (mblength < 1) ? 1 : mblength;
498 cvsdist 1.1 +
499 twaugh 1.20 + sep = ptr + mblength;
500 ovasik 1.34 + while (sep < lim)
501 cvsdist 1.1 + {
502 twaugh 1.20 + state_bak = state;
503     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
504     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
505     + {
506     + mblength = 1;
507     + state = state_bak;
508     + break;
509     + }
510     + mblength = (mblength < 1) ? 1 : mblength;
511     +
512     + if (iswblank (wc))
513     + break;
514 cvsdist 1.1 +
515 twaugh 1.20 + sep += mblength;
516 cvsdist 1.1 + }
517     +
518 twaugh 1.20 + extract_field (line, ptr, sep - ptr);
519 ovasik 1.34 + if (sep >= lim)
520 twaugh 1.20 + return;
521 cvsdist 1.1 +
522 twaugh 1.20 + state_bak = state;
523     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
524 cvsdist 1.1 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
525     + {
526 twaugh 1.20 + mblength = 1;
527     + state = state_bak;
528     + break;
529 cvsdist 1.1 + }
530 twaugh 1.20 + mblength = (mblength < 1) ? 1 : mblength;
531     +
532     + ptr = sep + mblength;
533 ovasik 1.34 + while (ptr < lim)
534 cvsdist 1.1 + {
535 twaugh 1.20 + state_bak = state;
536     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
537     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
538     + {
539     + mblength = 1;
540     + state = state_bak;
541     + break;
542     + }
543     + mblength = (mblength < 1) ? 1 : mblength;
544     +
545     + if (!iswblank (wc))
546     + break;
547     +
548     + ptr += mblength;
549 cvsdist 1.1 + }
550 twaugh 1.20 + }
551 ovasik 1.34 + while (ptr < lim);
552 twaugh 1.20 + }
553 cvsdist 1.1 +
554 twaugh 1.20 + extract_field (line, ptr, lim - ptr);
555     +}
556 cvsdist 1.1 +#endif
557 ovasik 1.27 +
558 ovasik 1.28 static void
559     freeline (struct line *line)
560 ovasik 1.27 {
561 twaugh 1.20 @@ -377,11 +601,18 @@
562 cvsdist 1.1
563 twaugh 1.20 /* Print the join of LINE1 and LINE2. */
564 cvsdist 1.1
565 twaugh 1.20 +#define PUT_TAB_CHAR \
566     + do \
567     + { \
568     + (tab != NULL) ? \
569     + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
570     + } \
571     + while (0)
572     +
573     static void
574     prjoin (struct line const *line1, struct line const *line2)
575 cvsdist 1.1 {
576 twaugh 1.20 const struct outlist *outlist;
577     - char output_separator = tab < 0 ? ' ' : tab;
578    
579     outlist = outlist_head.next;
580     if (outlist)
581     @@ -416,7 +647,7 @@
582 ovasik 1.39 o = o->next;
583     if (o == NULL)
584     break;
585     - putchar (output_separator);
586     + PUT_TAB_CHAR;
587     }
588 twaugh 1.20 putchar ('\n');
589     }
590     @@ -434,23 +665,23 @@
591     prfield (join_field_1, line1);
592     for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
593 ovasik 1.39 {
594     - putchar (output_separator);
595     + PUT_TAB_CHAR;
596     prfield (i, line1);
597     }
598 twaugh 1.20 for (i = join_field_1 + 1; i < line1->nfields; ++i)
599 ovasik 1.39 {
600     - putchar (output_separator);
601     + PUT_TAB_CHAR;
602     prfield (i, line1);
603     }
604 cvsdist 1.1
605 twaugh 1.20 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
606 ovasik 1.39 {
607     - putchar (output_separator);
608     + PUT_TAB_CHAR;
609     prfield (i, line2);
610     }
611 twaugh 1.20 for (i = join_field_2 + 1; i < line2->nfields; ++i)
612 ovasik 1.39 {
613     - putchar (output_separator);
614     + PUT_TAB_CHAR;
615     prfield (i, line2);
616     }
617 twaugh 1.20 putchar ('\n');
618 twaugh 1.21 @@ -859,20 +1090,41 @@
619 cvsdist 1.1
620 ovasik 1.39 case 't':
621     {
622     - unsigned char newtab = optarg[0];
623     - if (! newtab)
624     + char *newtab;
625     + size_t newtablen;
626     + if (! optarg[0])
627     error (EXIT_FAILURE, 0, _("empty tab"));
628     - if (optarg[1])
629     + newtab = xstrdup (optarg);
630     +#if HAVE_MBRTOWC
631     + if (MB_CUR_MAX > 1)
632     + {
633     + mbstate_t state;
634     +
635     + memset (&state, 0, sizeof (mbstate_t));
636     + newtablen = mbrtowc (NULL, newtab,
637     + strnlen (newtab, MB_LEN_MAX),
638     + &state);
639     + if (newtablen == (size_t) 0
640     + || newtablen == (size_t) -1
641     + || newtablen == (size_t) -2)
642     + newtablen = 1;
643     + }
644     + else
645     +#endif
646     + newtablen = 1;
647     +
648     + if (newtablen == 1 && newtab[1])
649     + {
650     + if (STREQ (newtab, "\\0"))
651     + newtab[0] = '\0';
652     + }
653     + if (tab != NULL && strcmp (tab, newtab))
654     {
655     - if (STREQ (optarg, "\\0"))
656     - newtab = '\0';
657     - else
658     - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
659     - quote (optarg));
660     + free (newtab);
661     + error (EXIT_FAILURE, 0, _("incompatible tabs"));
662     }
663     - if (0 <= tab && tab != newtab)
664     - error (EXIT_FAILURE, 0, _("incompatible tabs"));
665     tab = newtab;
666     + tablen = newtablen;
667     }
668     break;
669    
670 ovasik 1.25 diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
671     --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
672     +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
673     @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
674 ovasik 1.39 size_t jf_1, size_t jf_2)
675 ovasik 1.25 {
676     /* Start of field to compare in each file. */
677     - char *beg1;
678     - char *beg2;
679     -
680     - size_t len1;
681     - size_t len2; /* Length of fields to compare. */
682     + char *beg[2];
683     + char *copy[2];
684 ovasik 1.27 + size_t len[2]; /* Length of fields to compare. */
685 ovasik 1.25 int diff;
686     + int i, j;
687    
688     if (jf_1 < line1->nfields)
689     {
690     - beg1 = line1->fields[jf_1].beg;
691     - len1 = line1->fields[jf_1].len;
692     + beg[0] = line1->fields[jf_1].beg;
693     + len[0] = line1->fields[jf_1].len;
694     }
695     else
696     {
697     - beg1 = NULL;
698     - len1 = 0;
699     + beg[0] = NULL;
700     + len[0] = 0;
701     }
702    
703     if (jf_2 < line2->nfields)
704     {
705     - beg2 = line2->fields[jf_2].beg;
706     - len2 = line2->fields[jf_2].len;
707     + beg[1] = line2->fields[jf_2].beg;
708     + len[1] = line2->fields[jf_2].len;
709     }
710     else
711     {
712     - beg2 = NULL;
713     - len2 = 0;
714     + beg[1] = NULL;
715     + len[1] = 0;
716     }
717    
718     - if (len1 == 0)
719     - return len2 == 0 ? 0 : -1;
720     - if (len2 == 0)
721     + if (len[0] == 0)
722     + return len[1] == 0 ? 0 : -1;
723     + if (len[1] == 0)
724     return 1;
725    
726     if (ignore_case)
727     {
728     - /* FIXME: ignore_case does not work with NLS (in particular,
729     - with multibyte chars). */
730     - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
731     +#ifdef HAVE_MBRTOWC
732     + if (MB_CUR_MAX > 1)
733     + {
734     + size_t mblength;
735     + wchar_t wc, uwc;
736     + mbstate_t state, state_bak;
737     +
738     + memset (&state, '\0', sizeof (mbstate_t));
739     +
740     + for (i = 0; i < 2; i++)
741     + {
742     + copy[i] = alloca (len[i] + 1);
743     +
744     + for (j = 0; j < MIN (len[0], len[1]);)
745     + {
746     + state_bak = state;
747     + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
748     +
749     + switch (mblength)
750     + {
751     + case (size_t) -1:
752     + case (size_t) -2:
753     + state = state_bak;
754     + /* Fall through */
755     + case 0:
756     + mblength = 1;
757     + break;
758     +
759     + default:
760     + uwc = towupper (wc);
761     +
762     + if (uwc != wc)
763     + {
764     + mbstate_t state_wc;
765     +
766     + memset (&state_wc, '\0', sizeof (mbstate_t));
767     + wcrtomb (copy[i] + j, uwc, &state_wc);
768     + }
769     + else
770     + memcpy (copy[i] + j, beg[i] + j, mblength);
771     + }
772     + j += mblength;
773     + }
774     + copy[i][j] = '\0';
775     + }
776     + }
777     + else
778     +#endif
779     + {
780     + for (i = 0; i < 2; i++)
781     + {
782     + copy[i] = alloca (len[i] + 1);
783     +
784     + for (j = 0; j < MIN (len[0], len[1]); j++)
785     + copy[i][j] = toupper (beg[i][j]);
786     +
787     + copy[i][j] = '\0';
788     + }
789     + }
790     }
791     else
792     {
793     - if (hard_LC_COLLATE)
794 ovasik 1.39 - return xmemcoll (beg1, len1, beg2, len2);
795 ovasik 1.25 - diff = memcmp (beg1, beg2, MIN (len1, len2));
796     + copy[0] = (unsigned char *) beg[0];
797     + copy[1] = (unsigned char *) beg[1];
798     }
799    
800     + if (hard_LC_COLLATE)
801     + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
802     + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
803     +
804     +
805     if (diff)
806     return diff;
807     - return len1 < len2 ? -1 : len1 != len2;
808     + return len[0] - len[1];
809     }
810 cvsdist 1.1
811 ovasik 1.27 /* Check that successive input lines PREV and CURRENT from input file
812 twaugh 1.21 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
813     +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
814 twaugh 1.20 @@ -23,6 +23,16 @@
815     #include <getopt.h>
816     #include <sys/types.h>
817 cvsdist 1.1
818 twaugh 1.20 +/* Get mbstate_t, mbrtowc(). */
819     +#if HAVE_WCHAR_H
820     +# include <wchar.h>
821     +#endif
822     +
823     +/* Get isw* functions. */
824     +#if HAVE_WCTYPE_H
825     +# include <wctype.h>
826     +#endif
827     +
828     #include "system.h"
829     #include "argmatch.h"
830     #include "linebuffer.h"
831     @@ -32,7 +42,19 @@
832     #include "quote.h"
833     #include "xmemcoll.h"
834     #include "xstrtol.h"
835     -#include "memcasecmp.h"
836     +#include "xmemcoll.h"
837     +
838     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
839     + installation; work around this configuration error. */
840     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
841     +# define MB_LEN_MAX 16
842     +#endif
843     +
844     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
845     +#if HAVE_MBRTOWC && defined mbstate_t
846     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
847     +#endif
848     +
849    
850     /* The official name of this program (e.g., no `g' prefix). */
851     #define PROGRAM_NAME "uniq"
852     @@ -109,6 +131,10 @@
853     /* Select whether/how to delimit groups of duplicate lines. */
854     static enum delimit_method delimit_groups;
855    
856     +/* Function pointers. */
857     +static char *
858     +(*find_field) (struct linebuffer *line);
859     +
860     static struct option const longopts[] =
861     {
862     {"count", no_argument, NULL, 'c'},
863 twaugh 1.21 @@ -198,7 +224,7 @@
864 twaugh 1.20 return a pointer to the beginning of the line's field to be compared. */
865    
866     static char *
867 ovasik 1.28 -find_field (struct linebuffer const *line)
868 twaugh 1.20 +find_field_uni (struct linebuffer *line)
869     {
870     size_t count;
871 ovasik 1.28 char const *lp = line->buffer;
872 twaugh 1.21 @@ -219,6 +245,83 @@
873 ovasik 1.28 return line->buffer + i;
874 twaugh 1.20 }
875    
876     +#if HAVE_MBRTOWC
877     +
878     +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
879 ovasik 1.39 + do \
880     + { \
881     + mbstate_t state_bak; \
882     + \
883     + CONVFAIL = 0; \
884     + state_bak = *STATEP; \
885     + \
886     + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
887     + \
888     + switch (MBLENGTH) \
889     + { \
890     + case (size_t)-2: \
891     + case (size_t)-1: \
892     + *STATEP = state_bak; \
893     + CONVFAIL++; \
894     + /* Fall through */ \
895     + case 0: \
896     + MBLENGTH = 1; \
897     + } \
898     + } \
899 twaugh 1.20 + while (0)
900     +
901     +static char *
902     +find_field_multi (struct linebuffer *line)
903 cvsdist 1.1 +{
904 twaugh 1.20 + size_t count;
905     + char *lp = line->buffer;
906     + size_t size = line->length - 1;
907     + size_t pos;
908     + size_t mblength;
909 cvsdist 1.1 + wchar_t wc;
910 twaugh 1.20 + mbstate_t *statep;
911     + int convfail;
912 cvsdist 1.1 +
913 twaugh 1.20 + pos = 0;
914     + statep = &(line->state);
915 cvsdist 1.1 +
916 twaugh 1.20 + /* skip fields. */
917     + for (count = 0; count < skip_fields && pos < size; count++)
918 cvsdist 1.1 + {
919 twaugh 1.20 + while (pos < size)
920 ovasik 1.39 + {
921     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
922 twaugh 1.20 +
923 ovasik 1.39 + if (convfail || !iswblank (wc))
924     + {
925     + pos += mblength;
926     + break;
927     + }
928     + pos += mblength;
929     + }
930 cvsdist 1.1 +
931 twaugh 1.20 + while (pos < size)
932 ovasik 1.39 + {
933     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
934 cvsdist 1.1 +
935 ovasik 1.39 + if (!convfail && iswblank (wc))
936     + break;
937 cvsdist 1.1 +
938 ovasik 1.39 + pos += mblength;
939     + }
940 twaugh 1.20 + }
941 cvsdist 1.1 +
942 twaugh 1.20 + /* skip fields. */
943     + for (count = 0; count < skip_chars && pos < size; count++)
944     + {
945     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
946     + pos += mblength;
947 cvsdist 1.1 + }
948     +
949 twaugh 1.20 + return lp + pos;
950 cvsdist 1.1 +}
951     +#endif
952     +
953 twaugh 1.20 /* Return false if two strings OLD and NEW match, true if not.
954     OLD and NEW point not to the beginnings of the lines
955     but rather to the beginnings of the fields to compare.
956 twaugh 1.21 @@ -227,6 +330,8 @@
957 twaugh 1.20 static bool
958     different (char *old, char *new, size_t oldlen, size_t newlen)
959     {
960     + char *copy_old, *copy_new;
961     +
962     if (check_chars < oldlen)
963     oldlen = check_chars;
964     if (check_chars < newlen)
965 twaugh 1.21 @@ -234,14 +339,92 @@
966 cvsdist 1.1
967 twaugh 1.20 if (ignore_case)
968     {
969     - /* FIXME: This should invoke strcoll somehow. */
970     - return oldlen != newlen || memcasecmp (old, new, oldlen);
971     + size_t i;
972     +
973     + copy_old = alloca (oldlen + 1);
974     + copy_new = alloca (oldlen + 1);
975     +
976     + for (i = 0; i < oldlen; i++)
977 ovasik 1.39 + {
978     + copy_old[i] = toupper (old[i]);
979     + copy_new[i] = toupper (new[i]);
980     + }
981 twaugh 1.20 }
982     - else if (hard_LC_COLLATE)
983     - return xmemcoll (old, oldlen, new, newlen) != 0;
984     else
985     - return oldlen != newlen || memcmp (old, new, oldlen);
986     + {
987     + copy_old = (char *)old;
988     + copy_new = (char *)new;
989     + }
990     +
991     + return xmemcoll (copy_old, oldlen, copy_new, newlen);
992     +}
993     +
994     +#if HAVE_MBRTOWC
995     +static int
996     +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
997     +{
998     + size_t i, j, chars;
999     + const char *str[2];
1000     + char *copy[2];
1001     + size_t len[2];
1002     + mbstate_t state[2];
1003     + size_t mblength;
1004     + wchar_t wc, uwc;
1005     + mbstate_t state_bak;
1006     +
1007     + str[0] = old;
1008     + str[1] = new;
1009     + len[0] = oldlen;
1010     + len[1] = newlen;
1011     + state[0] = oldstate;
1012     + state[1] = newstate;
1013     +
1014     + for (i = 0; i < 2; i++)
1015     + {
1016     + copy[i] = alloca (len[i] + 1);
1017     +
1018     + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1019 ovasik 1.39 + {
1020     + state_bak = state[i];
1021     + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1022 twaugh 1.20 +
1023 ovasik 1.39 + switch (mblength)
1024     + {
1025     + case (size_t)-1:
1026     + case (size_t)-2:
1027     + state[i] = state_bak;
1028     + /* Fall through */
1029     + case 0:
1030     + mblength = 1;
1031     + break;
1032     +
1033     + default:
1034     + if (ignore_case)
1035     + {
1036     + uwc = towupper (wc);
1037     +
1038     + if (uwc != wc)
1039     + {
1040     + mbstate_t state_wc;
1041     +
1042     + memset (&state_wc, '\0', sizeof(mbstate_t));
1043     + wcrtomb (copy[i] + j, uwc, &state_wc);
1044     + }
1045     + else
1046     + memcpy (copy[i] + j, str[i] + j, mblength);
1047     + }
1048     + else
1049     + memcpy (copy[i] + j, str[i] + j, mblength);
1050     + }
1051     + j += mblength;
1052     + }
1053 twaugh 1.20 + copy[i][j] = '\0';
1054     + len[i] = j;
1055     + }
1056     +
1057     + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1058     }
1059     +#endif
1060    
1061     /* Output the line in linebuffer LINE to standard output
1062     provided that the switches say it should be output.
1063 twaugh 1.21 @@ -295,15 +478,43 @@
1064 twaugh 1.20 {
1065     char *prevfield IF_LINT (= NULL);
1066     size_t prevlen IF_LINT (= 0);
1067     +#if HAVE_MBRTOWC
1068     + mbstate_t prevstate;
1069     +
1070     + memset (&prevstate, '\0', sizeof (mbstate_t));
1071     +#endif
1072    
1073     while (!feof (stdin))
1074 ovasik 1.39 {
1075     char *thisfield;
1076     size_t thislen;
1077 twaugh 1.20 +#if HAVE_MBRTOWC
1078 ovasik 1.39 + mbstate_t thisstate;
1079 twaugh 1.20 +#endif
1080     +
1081 ovasik 1.39 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1082     break;
1083     thisfield = find_field (thisline);
1084     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1085 twaugh 1.20 +#if HAVE_MBRTOWC
1086 ovasik 1.39 + if (MB_CUR_MAX > 1)
1087 twaugh 1.20 + {
1088     + thisstate = thisline->state;
1089     +
1090     + if (prevline->length == 0 || different_multi
1091     + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1092     + {
1093     + fwrite (thisline->buffer, sizeof (char),
1094     + thisline->length, stdout);
1095     +
1096     + SWAP_LINES (prevline, thisline);
1097     + prevfield = thisfield;
1098     + prevlen = thislen;
1099     + prevstate = thisstate;
1100     + }
1101     + }
1102 ovasik 1.39 + else
1103 twaugh 1.20 +#endif
1104 ovasik 1.39 if (prevline->length == 0
1105     || different (thisfield, prevfield, thislen, prevlen))
1106     {
1107 twaugh 1.21 @@ -322,17 +533,26 @@
1108 twaugh 1.20 size_t prevlen;
1109     uintmax_t match_count = 0;
1110     bool first_delimiter = true;
1111     +#if HAVE_MBRTOWC
1112     + mbstate_t prevstate;
1113     +#endif
1114    
1115 ovasik 1.23 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1116 ovasik 1.39 goto closefiles;
1117 twaugh 1.20 prevfield = find_field (prevline);
1118     prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1119     +#if HAVE_MBRTOWC
1120     + prevstate = prevline->state;
1121     +#endif
1122    
1123     while (!feof (stdin))
1124 ovasik 1.39 {
1125     bool match;
1126     char *thisfield;
1127     size_t thislen;
1128     +#if HAVE_MBRTOWC
1129     + mbstate_t thisstate;
1130     +#endif
1131     if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1132     {
1133     if (ferror (stdin))
1134 twaugh 1.21 @@ -341,6 +561,15 @@
1135 ovasik 1.39 }
1136     thisfield = find_field (thisline);
1137     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1138 twaugh 1.20 +#if HAVE_MBRTOWC
1139 ovasik 1.39 + if (MB_CUR_MAX > 1)
1140     + {
1141 twaugh 1.20 + thisstate = thisline->state;
1142     + match = !different_multi (thisfield, prevfield,
1143     + thislen, prevlen, thisstate, prevstate);
1144     + }
1145 ovasik 1.39 + else
1146 twaugh 1.20 +#endif
1147 ovasik 1.39 match = !different (thisfield, prevfield, thislen, prevlen);
1148     match_count += match;
1149 twaugh 1.20
1150 twaugh 1.21 @@ -373,6 +602,9 @@
1151 ovasik 1.39 SWAP_LINES (prevline, thisline);
1152     prevfield = thisfield;
1153     prevlen = thislen;
1154     +#if HAVE_MBRTOWC
1155     + prevstate = thisstate;
1156     +#endif
1157     if (!match)
1158     match_count = 0;
1159     }
1160 twaugh 1.21 @@ -417,6 +649,19 @@
1161 twaugh 1.20
1162     atexit (close_stdout);
1163    
1164     +#if HAVE_MBRTOWC
1165     + if (MB_CUR_MAX > 1)
1166     + {
1167     + find_field = find_field_multi;
1168     + }
1169     + else
1170     +#endif
1171     + {
1172     + find_field = find_field_uni;
1173     + }
1174     +
1175     +
1176     +
1177     skip_chars = 0;
1178     skip_fields = 0;
1179     check_chars = SIZE_MAX;
1180 twaugh 1.21 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1181     +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1182 twaugh 1.20 @@ -23,11 +23,33 @@
1183     #include <getopt.h>
1184 twaugh 1.16 #include <sys/types.h>
1185 cvsdist 1.1
1186 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1187 cvsdist 1.1 +#if HAVE_WCHAR_H
1188     +# include <wchar.h>
1189     +#endif
1190     +
1191 twaugh 1.20 +/* Get iswprint(), iswblank(), wcwidth(). */
1192 cvsdist 1.1 +#if HAVE_WCTYPE_H
1193     +# include <wctype.h>
1194     +#endif
1195     +
1196     #include "system.h"
1197 twaugh 1.16 #include "error.h"
1198 twaugh 1.11 #include "quote.h"
1199 cvsdist 1.1 #include "xstrtol.h"
1200 twaugh 1.16
1201 twaugh 1.20 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1202     + installation; work around this configuration error. */
1203     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1204     +# undef MB_LEN_MAX
1205     +# define MB_LEN_MAX 16
1206     +#endif
1207     +
1208 cvsdist 1.1 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1209     +#if HAVE_MBRTOWC && defined mbstate_t
1210     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1211     +#endif
1212     +
1213 twaugh 1.20 #define TAB_WIDTH 8
1214    
1215 twaugh 1.16 /* The official name of this program (e.g., no `g' prefix). */
1216 ovasik 1.28 @@ -35,20 +57,41 @@
1217 cvsdist 1.1
1218 ovasik 1.27 #define AUTHORS proper_name ("David MacKenzie")
1219 cvsdist 1.1
1220 twaugh 1.20 +#define FATAL_ERROR(Message) \
1221     + do \
1222     + { \
1223     + error (0, 0, (Message)); \
1224     + usage (2); \
1225     + } \
1226     + while (0)
1227 cvsdist 1.1 +
1228 twaugh 1.20 +enum operating_mode
1229     +{
1230     + /* Fold texts by columns that are at the given positions. */
1231     + column_mode,
1232 twaugh 1.16 +
1233 twaugh 1.20 + /* Fold texts by bytes that are at the given positions. */
1234     + byte_mode,
1235     +
1236     + /* Fold texts by characters that are at the given positions. */
1237     + character_mode,
1238     +};
1239     +
1240     +/* The argument shows current mode. (Default: column_mode) */
1241     +static enum operating_mode operating_mode;
1242     +
1243     /* If nonzero, try to break on whitespace. */
1244     static bool break_spaces;
1245    
1246     -/* If nonzero, count bytes, not column positions. */
1247     -static bool count_bytes;
1248     -
1249     /* If nonzero, at least one of the files we read was standard input. */
1250     static bool have_read_stdin;
1251    
1252     -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1253     +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1254    
1255     static struct option const longopts[] =
1256     {
1257     {"bytes", no_argument, NULL, 'b'},
1258     + {"characters", no_argument, NULL, 'c'},
1259     {"spaces", no_argument, NULL, 's'},
1260     {"width", required_argument, NULL, 'w'},
1261     {GETOPT_HELP_OPTION_DECL},
1262     @@ -81,6 +124,7 @@
1263     "), stdout);
1264     fputs (_("\
1265     -b, --bytes count bytes rather than columns\n\
1266     + -c, --characters count characters rather than columns\n\
1267     -s, --spaces break at spaces\n\
1268     -w, --width=WIDTH use WIDTH columns instead of 80\n\
1269     "), stdout);
1270     @@ -98,7 +142,7 @@
1271     static size_t
1272     adjust_column (size_t column, char c)
1273     {
1274     - if (!count_bytes)
1275     + if (operating_mode != byte_mode)
1276     {
1277     if (c == '\b')
1278 ovasik 1.39 {
1279 twaugh 1.20 @@ -121,30 +165,14 @@
1280     to stdout, with maximum line length WIDTH.
1281     Return true if successful. */
1282    
1283     -static bool
1284     -fold_file (char const *filename, size_t width)
1285     +static void
1286     +fold_text (FILE *istream, size_t width, int *saved_errno)
1287 cvsdist 1.1 {
1288 twaugh 1.20 - FILE *istream;
1289     int c;
1290     size_t column = 0; /* Screen column where next char will go. */
1291     size_t offset_out = 0; /* Index in `line_out' for next char. */
1292     static char *line_out = NULL;
1293     static size_t allocated_out = 0;
1294     - int saved_errno;
1295     -
1296     - if (STREQ (filename, "-"))
1297     - {
1298     - istream = stdin;
1299     - have_read_stdin = true;
1300     - }
1301     - else
1302     - istream = fopen (filename, "r");
1303     -
1304     - if (istream == NULL)
1305     - {
1306     - error (0, errno, "%s", filename);
1307     - return false;
1308     - }
1309 twaugh 1.16
1310 twaugh 1.20 while ((c = getc (istream)) != EOF)
1311 twaugh 1.16 {
1312 twaugh 1.20 @@ -172,6 +200,15 @@
1313 ovasik 1.39 bool found_blank = false;
1314     size_t logical_end = offset_out;
1315 twaugh 1.20
1316 ovasik 1.39 + /* If LINE_OUT has no wide character,
1317     + put a new wide character in LINE_OUT
1318     + if column is bigger than width. */
1319     + if (offset_out == 0)
1320     + {
1321     + line_out[offset_out++] = c;
1322     + continue;
1323     + }
1324     +
1325