/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.34 - (show annotations) (download) (as text)
Thu Apr 23 17:01:01 2009 UTC (7 months ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-7_4-2_fc12, coreutils-7_2-3_fc12, coreutils-7_4-1_fc12
Changes since 1.33: +6 -6 lines
File MIME type: text/x-patch
fix segfaults in join (i18n patch) when using multibyte locales(#497368)
1 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2 --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3 +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4 @@ -26,7 +26,7 @@
5 my $prog = 'cut';
6 my $try = "Try \`$prog --help' for more information.\n";
7 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8 -my $inval = "$prog: invalid byte or field list\n$try";
9 +my $inval = "$prog: invalid byte, character or field list\n$try";
10 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11
12 my @Tests =
13 @@ -140,8 +140,8 @@
14 ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15
16 # None of the following invalid ranges provoked an error up to coreutils-6.9.
17 - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18 - {ERR=>"$prog: invalid decreasing range\n$try"}],
19 + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 @@ -0,0 +1,58 @@
27 +#! /bin/sh
28 +case $# in
29 + 0) xx='../src/sort';;
30 + *) xx="$1";;
31 +esac
32 +test "$VERBOSE" && echo=echo || echo=:
33 +$echo testing program: $xx
34 +errors=0
35 +test "$srcdir" || srcdir=.
36 +test "$VERBOSE" && $xx --version 2> /dev/null
37 +
38 +export LC_ALL=en_US.UTF-8
39 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40 +errors=0
41 +
42 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 +code=$?
44 +if test $code != 0; then
45 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46 + errors=`expr $errors + 1`
47 +else
48 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 + case $? in
50 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 + errors=`expr $errors + 1`;;
54 + 2) $echo "Test mb1 may have failed." 1>&2
55 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 + errors=`expr $errors + 1`;;
57 + esac
58 +fi
59 +
60 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 +code=$?
62 +if test $code != 0; then
63 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64 + errors=`expr $errors + 1`
65 +else
66 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 + case $? in
68 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 + errors=`expr $errors + 1`;;
72 + 2) $echo "Test mb2 may have failed." 1>&2
73 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 + errors=`expr $errors + 1`;;
75 + esac
76 +fi
77 +
78 +if test $errors = 0; then
79 + $echo Passed all 113 tests. 1>&2
80 +else
81 + $echo Failed $errors tests. 1>&2
82 +fi
83 +test $errors = 0 || errors=1
84 +exit $errors
85 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 @@ -0,0 +1,4 @@
88 +Apple@AA10@@20
89 +Banana@AA5@@30
90 +Citrus@AA20@@5
91 +Cherry@AA30@@10
92 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 @@ -0,0 +1,4 @@
95 +Citrus@AA20@@5
96 +Cherry@AA30@@10
97 +Apple@AA10@@20
98 +Banana@AA5@@30
99 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 @@ -0,0 +1,4 @@
102 +Apple@10
103 +Banana@5
104 +Citrus@20
105 +Cherry@30
106 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 @@ -0,0 +1,4 @@
109 +Banana@5
110 +Apple@10
111 +Citrus@20
112 +Cherry@30
113 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114 --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115 +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 @@ -192,6 +192,7 @@
117 misc/sort-compress \
118 misc/sort-continue \
119 misc/sort-files0-from \
120 + misc/sort-mb-tests \
121 misc/sort-merge \
122 misc/sort-merge-fdlimit \
123 misc/sort-rand \
124 @@ -391,6 +392,10 @@
125 $(root_tests)
126
127 pr_data = \
128 + misc/mb1.X \
129 + misc/mb1.I \
130 + misc/mb2.X \
131 + misc/mb2.I \
132 pr/0F \
133 pr/0FF \
134 pr/0FFnt \
135 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136 +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 @@ -22,6 +22,11 @@
138
139 # include <stdio.h>
140
141 +/* Get mbstate_t. */
142 +# if HAVE_WCHAR_H
143 +# include <wchar.h>
144 +# endif
145 +
146 /* A `struct linebuffer' holds a line of text. */
147
148 struct linebuffer
149 @@ -29,6 +34,9 @@
150 size_t size; /* Allocated. */
151 size_t length; /* Used. */
152 char *buffer;
153 +# if HAVE_WCHAR_H
154 + mbstate_t state;
155 +# endif
156 };
157
158 /* Initialize linebuffer LINEBUFFER for use. */
159 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160 +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 @@ -38,11 +38,28 @@
162 #include <stdio.h>
163 #include <getopt.h>
164 #include <sys/types.h>
165 +
166 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 +#if HAVE_WCHAR_H
168 +# include <wchar.h>
169 +#endif
170 +
171 #include "system.h"
172 #include "error.h"
173 #include "quote.h"
174 #include "xstrndup.h"
175
176 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 + installation; work around this configuration error. */
178 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179 +# define MB_LEN_MAX 16
180 +#endif
181 +
182 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183 +#if HAVE_MBRTOWC && defined mbstate_t
184 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185 +#endif
186 +
187 /* The official name of this program (e.g., no `g' prefix). */
188 #define PROGRAM_NAME "expand"
189
190 @@ -183,6 +200,7 @@
191 stops = num_start + len - 1;
192 }
193 }
194 +
195 else
196 {
197 error (0, 0, _("tab size contains invalid character(s): %s"),
198 @@ -365,6 +383,142 @@
199 }
200 }
201
202 +#if HAVE_MBRTOWC
203 +static void
204 +expand_multibyte (void)
205 +{
206 + FILE *fp; /* Input strem. */
207 + mbstate_t i_state; /* Current shift state of the input stream. */
208 + mbstate_t i_state_bak; /* Back up the I_STATE. */
209 + mbstate_t o_state; /* Current shift state of the output stream. */
210 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
211 + char *bufpos; /* Next read position of BUF. */
212 + size_t buflen = 0; /* The length of the byte sequence in buf. */
213 + wchar_t wc; /* A gotten wide character. */
214 + size_t mblength; /* The byte size of a multibyte character
215 + which shows as same character as WC. */
216 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
217 + int column = 0; /* Column on screen of the next char. */
218 + int next_tab_column; /* Column the next tab stop is on. */
219 + int convert = 1; /* If nonzero, perform translations. */
220 +
221 + fp = next_file ((FILE *) NULL);
222 + if (fp == NULL)
223 + return;
224 +
225 + memset (&o_state, '\0', sizeof(mbstate_t));
226 + memset (&i_state, '\0', sizeof(mbstate_t));
227 +
228 + for (;;)
229 + {
230 + /* Refill the buffer BUF. */
231 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
232 + {
233 + memmove (buf, bufpos, buflen);
234 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
235 + bufpos = buf;
236 + }
237 +
238 + /* No character is left in BUF. */
239 + if (buflen < 1)
240 + {
241 + fp = next_file (fp);
242 +
243 + if (fp == NULL)
244 + break; /* No more files. */
245 + else
246 + {
247 + memset (&i_state, '\0', sizeof(mbstate_t));
248 + continue;
249 + }
250 + }
251 +
252 + /* Get a wide character. */
253 + i_state_bak = i_state;
254 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
255 +
256 + switch (mblength)
257 + {
258 + case (size_t)-1: /* illegal byte sequence. */
259 + case (size_t)-2:
260 + mblength = 1;
261 + i_state = i_state_bak;
262 + if (convert)
263 + {
264 + ++column;
265 + if (convert_entire_line == 0)
266 + convert = 0;
267 + }
268 + putchar (*bufpos);
269 + break;
270 +
271 + case 0: /* null. */
272 + mblength = 1;
273 + if (convert && convert_entire_line == 0)
274 + convert = 0;
275 + putchar ('\0');
276 + break;
277 +
278 + default:
279 + if (wc == L'\n') /* LF. */
280 + {
281 + tab_index = 0;
282 + column = 0;
283 + convert = 1;
284 + putchar ('\n');
285 + }
286 + else if (wc == L'\t' && convert) /* Tab. */
287 + {
288 + if (tab_size == 0)
289 + {
290 + /* Do not let tab_index == first_free_tab;
291 + stop when it is 1 less. */
292 + while (tab_index < first_free_tab - 1
293 + && column >= tab_list[tab_index])
294 + tab_index++;
295 + next_tab_column = tab_list[tab_index];
296 + if (tab_index < first_free_tab - 1)
297 + tab_index++;
298 + if (column >= next_tab_column)
299 + next_tab_column = column + 1;
300 + }
301 + else
302 + next_tab_column = column + tab_size - column % tab_size;
303 +
304 + while (column < next_tab_column)
305 + {
306 + putchar (' ');
307 + ++column;
308 + }
309 + }
310 + else /* Others. */
311 + {
312 + if (convert)
313 + {
314 + if (wc == L'\b')
315 + {
316 + if (column > 0)
317 + --column;
318 + }
319 + else
320 + {
321 + int width; /* The width of WC. */
322 +
323 + width = wcwidth (wc);
324 + column += (width > 0) ? width : 0;
325 + if (convert_entire_line == 0)
326 + convert = 0;
327 + }
328 + }
329 + fwrite (bufpos, sizeof(char), mblength, stdout);
330 + }
331 + }
332 + buflen -= mblength;
333 + bufpos += mblength;
334 + }
335 +}
336 +#endif
337 +
338 int
339 main (int argc, char **argv)
340 {
341 @@ -429,7 +583,12 @@
342
343 file_list = (optind < argc ? &argv[optind] : stdin_argv);
344
345 - expand ();
346 +#if HAVE_MBRTOWC
347 + if (MB_CUR_MAX > 1)
348 + expand_multibyte ();
349 + else
350 +#endif
351 + expand ();
352
353 if (have_read_stdin && fclose (stdin) != 0)
354 error (EXIT_FAILURE, errno, "-");
355 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
356 +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
357 @@ -23,16 +23,30 @@
358 #include <sys/types.h>
359 #include <getopt.h>
360
361 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
362 +#if HAVE_WCHAR_H
363 +# include <wchar.h>
364 +#endif
365 +
366 +/* Get iswblank(), towupper. */
367 +#if HAVE_WCTYPE_H
368 +# include <wctype.h>
369 +#endif
370 +
371 #include "system.h"
372 #include "error.h"
373 #include "linebuffer.h"
374 -#include "memcasecmp.h"
375 #include "quote.h"
376 #include "stdio--.h"
377 #include "xmemcoll.h"
378 #include "xstrtol.h"
379 #include "argmatch.h"
380
381 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
382 +#if HAVE_MBRTOWC && defined mbstate_t
383 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
384 +#endif
385 +
386 /* The official name of this program (e.g., no `g' prefix). */
387 #define PROGRAM_NAME "join"
388
389 @@ -104,10 +118,12 @@
390 /* Last element in `outlist', where a new element can be added. */
391 static struct outlist *outlist_end = &outlist_head;
392
393 -/* Tab character separating fields. If negative, fields are separated
394 - by any nonempty string of blanks, otherwise by exactly one
395 - tab character whose value (when cast to unsigned char) equals TAB. */
396 -static int tab = -1;
397 +/* Tab character separating fields. If NULL, fields are separated
398 + by any nonempty string of blanks. */
399 +static char *tab = NULL;
400 +
401 +/* The number of bytes used for tab. */
402 +static size_t tablen = 0;
403
404 /* If nonzero, check that the input is correctly ordered. */
405 static enum
406 @@ -199,10 +217,11 @@
407 if (ptr == lim)
408 return;
409
410 - if (0 <= tab)
411 + if (tab != NULL)
412 {
413 + unsigned char t = tab[0];
414 char *sep;
415 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
416 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
417 extract_field (line, ptr, sep - ptr);
418 }
419 else
420 @@ -229,6 +248,148 @@
421 extract_field (line, ptr, lim - ptr);
422 }
423
424 +#if HAVE_MBRTOWC
425 +static void
426 +xfields_multibyte (struct line *line)
427 +{
428 + char *ptr = line->buf.buffer;
429 + char const *lim = ptr + line->buf.length - 1;
430 + wchar_t wc = 0;
431 + size_t mblength = 1;
432 + mbstate_t state, state_bak;
433 +
434 + memset (&state, 0, sizeof (mbstate_t));
435 +
436 + if (ptr >= lim)
437 + return;
438 +
439 + if (tab != NULL)
440 + {
441 + unsigned char t = tab[0];
442 + char *sep = ptr;
443 + for (; ptr < lim; ptr = sep + mblength)
444 + {
445 + sep = ptr;
446 + while (sep < lim)
447 + {
448 + state_bak = state;
449 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
450 +
451 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
452 + {
453 + mblength = 1;
454 + state = state_bak;
455 + }
456 + mblength = (mblength < 1) ? 1 : mblength;
457 +
458 + if (mblength == tablen && !memcmp (sep, tab, mblength))
459 + break;
460 + else
461 + {
462 + sep += mblength;
463 + continue;
464 + }
465 + }
466 +
467 + if (sep >= lim)
468 + break;
469 +
470 + extract_field (line, ptr, sep - ptr);
471 + }
472 + }
473 + else
474 + {
475 + /* Skip leading blanks before the first field. */
476 + while(ptr < lim)
477 + {
478 + state_bak = state;
479 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
480 +
481 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
482 + {
483 + mblength = 1;
484 + state = state_bak;
485 + break;
486 + }
487 + mblength = (mblength < 1) ? 1 : mblength;
488 +
489 + if (!iswblank(wc))
490 + break;
491 + ptr += mblength;
492 + }
493 +
494 + do
495 + {
496 + char *sep;
497 + state_bak = state;
498 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
499 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
500 + {
501 + mblength = 1;
502 + state = state_bak;
503 + break;
504 + }
505 + mblength = (mblength < 1) ? 1 : mblength;
506 +
507 + sep = ptr + mblength;
508 + while (sep < lim)
509 + {
510 + state_bak = state;
511 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
512 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
513 + {
514 + mblength = 1;
515 + state = state_bak;
516 + break;
517 + }
518 + mblength = (mblength < 1) ? 1 : mblength;
519 +
520 + if (iswblank (wc))
521 + break;
522 +
523 + sep += mblength;
524 + }
525 +
526 + extract_field (line, ptr, sep - ptr);
527 + if (sep >= lim)
528 + return;
529 +
530 + state_bak = state;
531 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
532 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
533 + {
534 + mblength = 1;
535 + state = state_bak;
536 + break;
537 + }
538 + mblength = (mblength < 1) ? 1 : mblength;
539 +
540 + ptr = sep + mblength;
541 + while (ptr < lim)
542 + {
543 + state_bak = state;
544 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
545 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
546 + {
547 + mblength = 1;
548 + state = state_bak;
549 + break;
550 + }
551 + mblength = (mblength < 1) ? 1 : mblength;
552 +
553 + if (!iswblank (wc))
554 + break;
555 +
556 + ptr += mblength;
557 + }
558 + }
559 + while (ptr < lim);
560 + }
561 +
562 + extract_field (line, ptr, lim - ptr);
563 +}
564 +#endif
565 +
566 static void
567 freeline (struct line *line)
568 {
569 @@ -377,11 +601,18 @@
570
571 /* Print the join of LINE1 and LINE2. */
572
573 +#define PUT_TAB_CHAR \
574 + do \
575 + { \
576 + (tab != NULL) ? \
577 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
578 + } \
579 + while (0)
580 +
581 static void
582 prjoin (struct line const *line1, struct line const *line2)
583 {
584 const struct outlist *outlist;
585 - char output_separator = tab < 0 ? ' ' : tab;
586
587 outlist = outlist_head.next;
588 if (outlist)
589 @@ -397,12 +628,12 @@
590 if (o->file == 0)
591 {
592 if (line1 == &uni_blank)
593 - {
594 + {
595 line = line2;
596 field = join_field_2;
597 }
598 else
599 - {
600 + {
601 line = line1;
602 field = join_field_1;
603 }
604 @@ -416,7 +647,7 @@
605 o = o->next;
606 if (o == NULL)
607 break;
608 - putchar (output_separator);
609 + PUT_TAB_CHAR;
610 }
611 putchar ('\n');
612 }
613 @@ -434,23 +665,23 @@
614 prfield (join_field_1, line1);
615 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
616 {
617 - putchar (output_separator);
618 + PUT_TAB_CHAR;
619 prfield (i, line1);
620 }
621 for (i = join_field_1 + 1; i < line1->nfields; ++i)
622 {
623 - putchar (output_separator);
624 + PUT_TAB_CHAR;
625 prfield (i, line1);
626 }
627
628 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
629 {
630 - putchar (output_separator);
631 + PUT_TAB_CHAR;
632 prfield (i, line2);
633 }
634 for (i = join_field_2 + 1; i < line2->nfields; ++i)
635 {
636 - putchar (output_separator);
637 + PUT_TAB_CHAR;
638 prfield (i, line2);
639 }
640 putchar ('\n');
641 @@ -859,20 +1090,41 @@
642
643 case 't':
644 {
645 - unsigned char newtab = optarg[0];
646 - if (! newtab)
647 + char *newtab;
648 + size_t newtablen;
649 + if (! optarg[0])
650 error (EXIT_FAILURE, 0, _("empty tab"));
651 - if (optarg[1])
652 + newtab = xstrdup (optarg);
653 +#if HAVE_MBRTOWC
654 + if (MB_CUR_MAX > 1)
655 + {
656 + mbstate_t state;
657 +
658 + memset (&state, 0, sizeof (mbstate_t));
659 + newtablen = mbrtowc (NULL, newtab,
660 + strnlen (newtab, MB_LEN_MAX),
661 + &state);
662 + if (newtablen == (size_t) 0
663 + || newtablen == (size_t) -1
664 + || newtablen == (size_t) -2)
665 + newtablen = 1;
666 + }
667 + else
668 +#endif
669 + newtablen = 1;
670 +
671 + if (newtablen == 1 && newtab[1])
672 + {
673 + if (STREQ (newtab, "\\0"))
674 + newtab[0] = '\0';
675 + }
676 + if (tab != NULL && strcmp (tab, newtab))
677 {
678 - if (STREQ (optarg, "\\0"))
679 - newtab = '\0';
680 - else
681 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
682 - quote (optarg));
683 + free (newtab);
684 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
685 }
686 - if (0 <= tab && tab != newtab)
687 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
688 tab = newtab;
689 + tablen = newtablen;
690 }
691 break;
692
693 diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
694 --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
695 +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
696 @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
697 size_t jf_1, size_t jf_2)
698 {
699 /* Start of field to compare in each file. */
700 - char *beg1;
701 - char *beg2;
702 -
703 - size_t len1;
704 - size_t len2; /* Length of fields to compare. */
705 + char *beg[2];
706 + char *copy[2];
707 + size_t len[2]; /* Length of fields to compare. */
708 int diff;
709 + int i, j;
710
711 if (jf_1 < line1->nfields)
712 {
713 - beg1 = line1->fields[jf_1].beg;
714 - len1 = line1->fields[jf_1].len;
715 + beg[0] = line1->fields[jf_1].beg;
716 + len[0] = line1->fields[jf_1].len;
717 }
718 else
719 {
720 - beg1 = NULL;
721 - len1 = 0;
722 + beg[0] = NULL;
723 + len[0] = 0;
724 }
725
726 if (jf_2 < line2->nfields)
727 {
728 - beg2 = line2->fields[jf_2].beg;
729 - len2 = line2->fields[jf_2].len;
730 + beg[1] = line2->fields[jf_2].beg;
731 + len[1] = line2->fields[jf_2].len;
732 }
733 else
734 {
735 - beg2 = NULL;
736 - len2 = 0;
737 + beg[1] = NULL;
738 + len[1] = 0;
739 }
740
741 - if (len1 == 0)
742 - return len2 == 0 ? 0 : -1;
743 - if (len2 == 0)
744 + if (len[0] == 0)
745 + return len[1] == 0 ? 0 : -1;
746 + if (len[1] == 0)
747 return 1;
748
749 if (ignore_case)
750 {
751 - /* FIXME: ignore_case does not work with NLS (in particular,
752 - with multibyte chars). */
753 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
754 +#ifdef HAVE_MBRTOWC
755 + if (MB_CUR_MAX > 1)
756 + {
757 + size_t mblength;
758 + wchar_t wc, uwc;
759 + mbstate_t state, state_bak;
760 +
761 + memset (&state, '\0', sizeof (mbstate_t));
762 +
763 + for (i = 0; i < 2; i++)
764 + {
765 + copy[i] = alloca (len[i] + 1);
766 +
767 + for (j = 0; j < MIN (len[0], len[1]);)
768 + {
769 + state_bak = state;
770 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
771 +
772 + switch (mblength)
773 + {
774 + case (size_t) -1:
775 + case (size_t) -2:
776 + state = state_bak;
777 + /* Fall through */
778 + case 0:
779 + mblength = 1;
780 + break;
781 +
782 + default:
783 + uwc = towupper (wc);
784 +
785 + if (uwc != wc)
786 + {
787 + mbstate_t state_wc;
788 +
789 + memset (&state_wc, '\0', sizeof (mbstate_t));
790 + wcrtomb (copy[i] + j, uwc, &state_wc);
791 + }
792 + else
793 + memcpy (copy[i] + j, beg[i] + j, mblength);
794 + }
795 + j += mblength;
796 + }
797 + copy[i][j] = '\0';
798 + }
799 + }
800 + else
801 +#endif
802 + {
803 + for (i = 0; i < 2; i++)
804 + {
805 + copy[i] = alloca (len[i] + 1);
806 +
807 + for (j = 0; j < MIN (len[0], len[1]); j++)
808 + copy[i][j] = toupper (beg[i][j]);
809 +
810 + copy[i][j] = '\0';
811 + }
812 + }
813 }
814 else
815 {
816 - if (hard_LC_COLLATE)
817 - return xmemcoll (beg1, len1, beg2, len2);
818 - diff = memcmp (beg1, beg2, MIN (len1, len2));
819 + copy[0] = (unsigned char *) beg[0];
820 + copy[1] = (unsigned char *) beg[1];
821 }
822
823 + if (hard_LC_COLLATE)
824 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
825 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
826 +
827 +
828 if (diff)
829 return diff;
830 - return len1 < len2 ? -1 : len1 != len2;
831 + return len[0] - len[1];
832 }
833
834 /* Check that successive input lines PREV and CURRENT from input file
835 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
836 +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
837 @@ -23,6 +23,16 @@
838 #include <getopt.h>
839 #include <sys/types.h>
840
841 +/* Get mbstate_t, mbrtowc(). */
842 +#if HAVE_WCHAR_H
843 +# include <wchar.h>
844 +#endif
845 +
846 +/* Get isw* functions. */
847 +#if HAVE_WCTYPE_H
848 +# include <wctype.h>
849 +#endif
850 +
851 #include "system.h"
852 #include "argmatch.h"
853 #include "linebuffer.h"
854 @@ -32,7 +42,19 @@
855 #include "quote.h"
856 #include "xmemcoll.h"
857 #include "xstrtol.h"
858 -#include "memcasecmp.h"
859 +#include "xmemcoll.h"
860 +
861 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
862 + installation; work around this configuration error. */
863 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
864 +# define MB_LEN_MAX 16
865 +#endif
866 +
867 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
868 +#if HAVE_MBRTOWC && defined mbstate_t
869 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
870 +#endif
871 +
872
873 /* The official name of this program (e.g., no `g' prefix). */
874 #define PROGRAM_NAME "uniq"
875 @@ -109,6 +131,10 @@
876 /* Select whether/how to delimit groups of duplicate lines. */
877 static enum delimit_method delimit_groups;
878
879 +/* Function pointers. */
880 +static char *
881 +(*find_field) (struct linebuffer *line);
882 +
883 static struct option const longopts[] =
884 {
885 {"count", no_argument, NULL, 'c'},
886 @@ -198,7 +224,7 @@
887 return a pointer to the beginning of the line's field to be compared. */
888
889 static char *
890 -find_field (struct linebuffer const *line)
891 +find_field_uni (struct linebuffer *line)
892 {
893 size_t count;
894 char const *lp = line->buffer;
895 @@ -219,6 +245,83 @@
896 return line->buffer + i;
897 }
898
899 +#if HAVE_MBRTOWC
900 +
901 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
902 + do \
903 + { \
904 + mbstate_t state_bak; \
905 + \
906 + CONVFAIL = 0; \
907 + state_bak = *STATEP; \
908 + \
909 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
910 + \
911 + switch (MBLENGTH) \
912 + { \
913 + case (size_t)-2: \
914 + case (size_t)-1: \
915 + *STATEP = state_bak; \
916 + CONVFAIL++; \
917 + /* Fall through */ \
918 + case 0: \
919 + MBLENGTH = 1; \
920 + } \
921 + } \
922 + while (0)
923 +
924 +static char *
925 +find_field_multi (struct linebuffer *line)
926 +{
927 + size_t count;
928 + char *lp = line->buffer;
929 + size_t size = line->length - 1;
930 + size_t pos;
931 + size_t mblength;
932 + wchar_t wc;
933 + mbstate_t *statep;
934 + int convfail;
935 +
936 + pos = 0;
937 + statep = &(line->state);
938 +
939 + /* skip fields. */
940 + for (count = 0; count < skip_fields && pos < size; count++)
941 + {
942 + while (pos < size)
943 + {
944 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
945 +
946 + if (convfail || !iswblank (wc))
947 + {
948 + pos += mblength;
949 + break;
950 + }
951 + pos += mblength;
952 + }
953 +
954 + while (pos < size)
955 + {
956 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
957 +
958 + if (!convfail && iswblank (wc))
959 + break;
960 +
961 + pos += mblength;
962 + }
963 + }
964 +
965 + /* skip fields. */
966 + for (count = 0; count < skip_chars && pos < size; count++)
967 + {
968 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
969 + pos += mblength;
970 + }
971 +
972 + return lp + pos;
973 +}
974 +#endif
975 +
976 /* Return false if two strings OLD and NEW match, true if not.
977 OLD and NEW point not to the beginnings of the lines
978 but rather to the beginnings of the fields to compare.
979 @@ -227,6 +330,8 @@
980 static bool
981 different (char *old, char *new, size_t oldlen, size_t newlen)
982 {
983 + char *copy_old, *copy_new;
984 +
985 if (check_chars < oldlen)
986 oldlen = check_chars;
987 if (check_chars < newlen)
988 @@ -234,14 +339,92 @@
989
990 if (ignore_case)
991 {
992 - /* FIXME: This should invoke strcoll somehow. */
993 - return oldlen != newlen || memcasecmp (old, new, oldlen);
994 + size_t i;
995 +
996 + copy_old = alloca (oldlen + 1);
997 + copy_new = alloca (oldlen + 1);
998 +
999 + for (i = 0; i < oldlen; i++)
1000 + {
1001 + copy_old[i] = toupper (old[i]);
1002 + copy_new[i] = toupper (new[i]);
1003 + }
1004 }
1005 - else if (hard_LC_COLLATE)
1006 - return xmemcoll (old, oldlen, new, newlen) != 0;
1007 else
1008 - return oldlen != newlen || memcmp (old, new, oldlen);
1009 + {
1010 + copy_old = (char *)old;
1011 + copy_new = (char *)new;
1012 + }
1013 +
1014 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1015 +}
1016 +
1017 +#if HAVE_MBRTOWC
1018 +static int
1019 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1020 +{
1021 + size_t i, j, chars;
1022 + const char *str[2];
1023 + char *copy[2];
1024 + size_t len[2];
1025 + mbstate_t state[2];
1026 + size_t mblength;
1027 + wchar_t wc, uwc;
1028 + mbstate_t state_bak;
1029 +
1030 + str[0] = old;
1031 + str[1] = new;
1032 + len[0] = oldlen;
1033 + len[1] = newlen;
1034 + state[0] = oldstate;
1035 + state[1] = newstate;
1036 +
1037 + for (i = 0; i < 2; i++)
1038 + {
1039 + copy[i] = alloca (len[i] + 1);
1040 +
1041 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1042 + {
1043 + state_bak = state[i];
1044 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1045 +
1046 + switch (mblength)
1047 + {
1048 + case (size_t)-1:
1049 + case (size_t)-2:
1050 + state[i] = state_bak;
1051 + /* Fall through */
1052 + case 0:
1053 + mblength = 1;
1054 + break;
1055 +
1056 + default:
1057 + if (ignore_case)
1058 + {
1059 + uwc = towupper (wc);
1060 +
1061 + if (uwc != wc)
1062 + {
1063 + mbstate_t state_wc;
1064 +
1065 + memset (&state_wc, '\0', sizeof(mbstate_t));
1066 + wcrtomb (copy[i] + j, uwc, &state_wc);
1067 + }
1068 + else
1069 + memcpy (copy[i] + j, str[i] + j, mblength);
1070 + }
1071 + else
1072 + memcpy (copy[i] + j, str[i] + j, mblength);
1073 + }
1074 + j += mblength;
1075 + }
1076 + copy[i][j] = '\0';
1077 + len[i] = j;
1078 + }
1079 +
1080 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1081 }
1082 +#endif
1083
1084 /* Output the line in linebuffer LINE to standard output
1085 provided that the switches say it should be output.
1086 @@ -295,15 +478,43 @@
1087 {
1088 char *prevfield IF_LINT (= NULL);
1089 size_t prevlen IF_LINT (= 0);
1090 +#if HAVE_MBRTOWC
1091 + mbstate_t prevstate;
1092 +
1093 + memset (&prevstate, '\0', sizeof (mbstate_t));
1094 +#endif
1095
1096 while (!feof (stdin))
1097 {
1098 char *thisfield;
1099 size_t thislen;
1100 +#if HAVE_MBRTOWC
1101 + mbstate_t thisstate;
1102 +#endif
1103 +
1104 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1105 break;
1106 thisfield = find_field (thisline);
1107 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1108 +#if HAVE_MBRTOWC
1109 + if (MB_CUR_MAX > 1)
1110 + {
1111 + thisstate = thisline->state;
1112 +
1113 + if (prevline->length == 0 || different_multi
1114 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1115 + {
1116 + fwrite (thisline->buffer, sizeof (char),
1117 + thisline->length, stdout);
1118 +
1119 + SWAP_LINES (prevline, thisline);
1120 + prevfield = thisfield;
1121 + prevlen = thislen;
1122 + prevstate = thisstate;
1123 + }
1124 + }
1125 + else
1126 +#endif
1127 if (prevline->length == 0
1128 || different (thisfield, prevfield, thislen, prevlen))
1129 {
1130 @@ -322,17 +533,26 @@
1131 size_t prevlen;
1132 uintmax_t match_count = 0;
1133 bool first_delimiter = true;
1134 +#if HAVE_MBRTOWC
1135 + mbstate_t prevstate;
1136 +#endif
1137
1138 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1139 goto closefiles;
1140 prevfield = find_field (prevline);
1141 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1142 +#if HAVE_MBRTOWC
1143 + prevstate = prevline->state;
1144 +#endif
1145
1146 while (!feof (stdin))
1147 {
1148 bool match;
1149 char *thisfield;
1150 size_t thislen;
1151 +#if HAVE_MBRTOWC
1152 + mbstate_t thisstate;
1153 +#endif
1154 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1155 {
1156 if (ferror (stdin))
1157 @@ -341,6 +561,15 @@
1158 }
1159 thisfield = find_field (thisline);
1160 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1161 +#if HAVE_MBRTOWC
1162 + if (MB_CUR_MAX > 1)
1163 + {
1164 + thisstate = thisline->state;
1165 + match = !different_multi (thisfield, prevfield,
1166 + thislen, prevlen, thisstate, prevstate);
1167 + }
1168 + else
1169 +#endif
1170 match = !different (thisfield, prevfield, thislen, prevlen);
1171 match_count += match;
1172
1173 @@ -373,6 +602,9 @@
1174 SWAP_LINES (prevline, thisline);
1175 prevfield = thisfield;
1176 prevlen = thislen;
1177 +#if HAVE_MBRTOWC
1178 + prevstate = thisstate;
1179 +#endif
1180 if (!match)
1181 match_count = 0;
1182 }
1183 @@ -417,6 +649,19 @@
1184
1185 atexit (close_stdout);
1186
1187 +#if HAVE_MBRTOWC
1188 + if (MB_CUR_MAX > 1)
1189 + {
1190 + find_field = find_field_multi;
1191 + }
1192 + else
1193 +#endif
1194 + {
1195 + find_field = find_field_uni;
1196 + }
1197 +
1198 +
1199 +
1200 skip_chars = 0;
1201 skip_fields = 0;
1202 check_chars = SIZE_MAX;
1203 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1204 +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1205 @@ -23,11 +23,33 @@
1206 #include <getopt.h>
1207 #include <sys/types.h>
1208
1209 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1210 +#if HAVE_WCHAR_H
1211 +# include <wchar.h>
1212 +#endif
1213 +
1214 +/* Get iswprint(), iswblank(), wcwidth(). */
1215 +#if HAVE_WCTYPE_H
1216 +# include <wctype.h>
1217 +#endif
1218 +
1219 #include "system.h"
1220 #include "error.h"
1221 #include "quote.h"
1222 #include "xstrtol.h"
1223
1224 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1225 + installation; work around this configuration error. */
1226 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1227 +# undef MB_LEN_MAX
1228 +# define MB_LEN_MAX 16
1229 +#endif
1230 +
1231 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1232 +#if HAVE_MBRTOWC && defined mbstate_t
1233 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1234 +#endif
1235 +
1236 #define TAB_WIDTH 8
1237
1238 /* The official name of this program (e.g., no `g' prefix). */
1239 @@ -35,20 +57,41 @@
1240
1241 #define AUTHORS proper_name ("David MacKenzie")
1242
1243 +#define FATAL_ERROR(Message) \
1244 + do \
1245 + { \
1246 + error (0, 0, (Message)); \
1247 + usage (2); \
1248 + } \
1249 + while (0)
1250 +
1251 +enum operating_mode
1252 +{
1253 + /* Fold texts by columns that are at the given positions. */
1254 + column_mode,
1255 +
1256 + /* Fold texts by bytes that are at the given positions. */
1257 + byte_mode,
1258 +
1259 + /* Fold texts by characters that are at the given positions. */
1260 + character_mode,
1261 +};
1262 +
1263 +/* The argument shows current mode. (Default: column_mode) */
1264 +static enum operating_mode operating_mode;
1265 +
1266 /* If nonzero, try to break on whitespace. */
1267 static bool break_spaces;
1268
1269 -/* If nonzero, count bytes, not column positions. */
1270 -static bool count_bytes;
1271 -
1272 /* If nonzero, at least one of the files we read was standard input. */
1273 static bool have_read_stdin;
1274
1275 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1276 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1277
1278 static struct option const longopts[] =
1279 {
1280 {"bytes", no_argument, NULL, 'b'},
1281 + {"characters", no_argument, NULL, 'c'},
1282 {"spaces", no_argument, NULL, 's'},
1283 {"width", required_argument, NULL, 'w'},
1284 {GETOPT_HELP_OPTION_DECL},
1285 @@ -81,6 +124,7 @@
1286 "), stdout);
1287 fputs (_("\
1288 -b, --bytes count bytes rather than columns\n\
1289 + -c, --characters count characters rather than columns\n\
1290 -s, --spaces break at spaces\n\
1291 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1292 "), stdout);
1293 @@ -98,7 +142,7 @@
1294 static size_t
1295 adjust_column (size_t column, char c)
1296 {
1297 - if (!count_bytes)
1298 + if (operating_mode != byte_mode)
1299 {
1300 if (c == '\b')
1301 {
1302 @@ -121,30 +165,14 @@
1303 to stdout, with maximum line length WIDTH.
1304 Return true if successful. */
1305
1306 -static bool
1307 -fold_file (char const *filename, size_t width)
1308 +static void
1309 +fold_text (FILE *istream, size_t width, int *saved_errno)
1310 {
1311 - FILE *istream;
1312 int c;
1313 size_t column = 0; /* Screen column where next char will go. */
1314 size_t offset_out = 0; /* Index in `line_out' for next char. */
1315 static char *line_out = NULL;
1316 static size_t allocated_out = 0;
1317 - int saved_errno;
1318 -
1319 - if (STREQ (filename, "-"))
1320 - {
1321 - istream = stdin;
1322 - have_read_stdin = true;
1323 - }
1324 - else
1325 - istream = fopen (filename, "r");
1326 -
1327 - if (istream == NULL)
1328 - {
1329 - error (0, errno, "%s", filename);
1330 - return false;
1331 - }
1332
1333 while ((c = getc (istream)) != EOF)
1334 {
1335 @@ -172,6 +200,15 @@
1336 bool found_blank = false;
1337 size_t logical_end = offset_out;
1338
1339 + /* If LINE_OUT has no wide character,
1340 + put a new wide character in LINE_OUT
1341 + if column is bigger than width. */
1342 + if (offset_out == 0)
1343 + {
1344 + line_out[offset_out++] = c;
1345 + continue;
1346 + }
1347 +
1348 /* Look for the last blank. */
1349 while (logical_end)
1350 {
1351 @@ -218,11 +255,225 @@
1352 line_out[offset_out++] = c;
1353 }
1354
1355 - saved_errno = errno;
1356 + *saved_errno = errno;
1357 +
1358 + if (offset_out)
1359 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1360 +
1361 + free(line_out);
1362 +}
1363 +
1364 +#if HAVE_MBRTOWC
1365 +static void
1366 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1367 +{
1368 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1369 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1370 + char *bufpos; /* Next read position of BUF. */
1371 + wint_t wc; /* A gotten wide character. */
1372 + size_t mblength; /* The byte size of a multibyte character which shows
1373 + as same character as WC. */
1374 + mbstate_t state, state_bak; /* State of the stream. */
1375 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
1376 +
1377 + char *line_out = NULL;
1378 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1379 + size_t allocated_out = 0;
1380 +
1381 + int increment;
1382 + size_t column = 0;
1383 +
1384 + size_t last_blank_pos;
1385 + size_t last_blank_column;
1386 + int is_blank_seen;
1387 + int last_blank_increment;
1388 + int is_bs_following_last_blank;
1389 + size_t bs_following_last_blank_num;
1390 + int is_cr_after_last_blank;
1391 +
1392 +#define CLEAR_FLAGS \
1393 + do \
1394 + { \
1395 + last_blank_pos = 0; \
1396 + last_blank_column = 0; \
1397 + is_blank_seen = 0; \
1398 + is_bs_following_last_blank = 0; \
1399 + bs_following_last_blank_num = 0; \
1400 + is_cr_after_last_blank = 0; \
1401 + } \
1402 + while (0)
1403 +
1404 +#define START_NEW_LINE \
1405 + do \
1406 + { \
1407 + putchar ('\n'); \
1408 + column = 0; \
1409 + offset_out = 0; \
1410 + CLEAR_FLAGS; \
1411 + } \
1412 + while (0)
1413 +
1414 + CLEAR_FLAGS;
1415 + memset (&state, '\0', sizeof(mbstate_t));
1416 +
1417 + for (;; bufpos += mblength, buflen -= mblength)
1418 + {
1419 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1420 + {
1421 + memmove (buf, bufpos, buflen);
1422 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1423 + bufpos = buf;
1424 + }
1425 +
1426 + if (buflen < 1)
1427 + break;
1428 +
1429 + /* Get a wide character. */
1430 + convfail = 0;
1431 + state_bak = state;
1432 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1433 +
1434 + switch (mblength)
1435 + {
1436 + case (size_t)-1:
1437 + case (size_t)-2:
1438 + convfail++;
1439 + state = state_bak;
1440 + /* Fall through. */
1441 +
1442 + case 0:
1443 + mblength = 1;
1444 + break;
1445 + }
1446 +
1447 +rescan:
1448 + if (operating_mode == byte_mode) /* byte mode */
1449 + increment = mblength;
1450 + else if (operating_mode == character_mode) /* character mode */
1451 + increment = 1;
1452 + else /* column mode */
1453 + {
1454 + if (convfail)
1455 + increment = 1;
1456 + else
1457 + {
1458 + switch (wc)
1459 + {
1460 + case L'\n':
1461 + fwrite (line_out, sizeof(char), offset_out, stdout);
1462 + START_NEW_LINE;
1463 + continue;
1464 +
1465 + case L'\b':
1466 + increment = (column > 0) ? -1 : 0;
1467 + break;
1468 +
1469 + case L'\r':
1470 + increment = -1 * column;
1471 + break;
1472 +
1473 + case L'\t':
1474 + increment = 8 - column % 8;
1475 + break;
1476 +
1477 + default:
1478 + increment = wcwidth (wc);
1479 + increment = (increment < 0) ? 0 : increment;
1480 + }
1481 + }
1482 + }
1483 +
1484 + if (column + increment > width && break_spaces && last_blank_pos)
1485 + {
1486 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1487 + putchar ('\n');
1488 +
1489 + offset_out = offset_out - last_blank_pos;
1490 + column = column - last_blank_column + ((is_cr_after_last_blank)
1491 + ? last_blank_increment : bs_following_last_blank_num);
1492 + memmove (line_out, line_out + last_blank_pos, offset_out);
1493 + CLEAR_FLAGS;
1494 + goto rescan;
1495 + }
1496 +
1497 + if (column + increment > width && column != 0)
1498 + {
1499 + fwrite (line_out, sizeof(char), offset_out, stdout);
1500 + START_NEW_LINE;
1501 + goto rescan;
1502 + }
1503 +
1504 + if (allocated_out < offset_out + mblength)
1505 + {
1506 + allocated_out += 1024;
1507 + line_out = xrealloc (line_out, allocated_out);
1508 + }
1509 +
1510 + memcpy (line_out + offset_out, bufpos, mblength);
1511 + offset_out += mblength;
1512 + column += increment;
1513 +
1514 + if (is_blank_seen && !convfail && wc == L'\r')
1515 + is_cr_after_last_blank = 1;
1516 +
1517 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1518 + ++bs_following_last_blank_num;
1519 + else
1520 + is_bs_following_last_blank = 0;
1521 +
1522 + if (break_spaces && !convfail && iswblank (wc))
1523 + {
1524 + last_blank_pos = offset_out;
1525 + last_blank_column = column;
1526 + is_blank_seen = 1;
1527 + last_blank_increment = increment;
1528 + is_bs_following_last_blank = 1;
1529 + bs_following_last_blank_num = 0;
1530 + is_cr_after_last_blank = 0;
1531 + }
1532 + }
1533 +
1534 + *saved_errno = errno;
1535
1536 if (offset_out)
1537 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1538
1539 + free(line_out);
1540 +}
1541 +#endif
1542 +
1543 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1544 + to stdout, with maximum line length WIDTH.
1545 + Return 0 if successful, 1 if an error occurs. */
1546 +
1547 +static bool
1548 +fold_file (char *filename, size_t width)
1549 +{
1550 + FILE *istream;
1551 + int saved_errno;
1552 +
1553 + if (STREQ (filename, "-"))
1554 + {
1555 + istream = stdin;
1556 + have_read_stdin = 1;
1557 + }
1558 + else
1559 + istream = fopen (filename, "r");
1560 +
1561 + if (istream == NULL)
1562 + {
1563 + error (0, errno, "%s", filename);
1564 + return 1;
1565 + }
1566 +
1567 + /* Define how ISTREAM is being folded. */
1568 +#if HAVE_MBRTOWC
1569 + if (MB_CUR_MAX > 1)
1570 + fold_multibyte_text (istream, width, &saved_errno);
1571 + else
1572 +#endif
1573 + fold_text (istream, width, &saved_errno);
1574 +
1575 if (ferror (istream))
1576 {
1577 error (0, saved_errno, "%s", filename);
1578 @@ -255,7 +506,8 @@
1579
1580 atexit (close_stdout);
1581
1582 - break_spaces = count_bytes = have_read_stdin = false;
1583 + operating_mode = column_mode;
1584 + break_spaces = have_read_stdin = false;
1585
1586 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1587 {
1588 @@ -264,7 +516,15 @@
1589 switch (optc)
1590 {
1591 case 'b': /* Count bytes rather than columns. */
1592 - count_bytes = true;
1593 + if (operating_mode != column_mode)
1594 + FATAL_ERROR (_("only one way of folding may be specified"));
1595 + operating_mode = byte_mode;
1596 + break;
1597 +
1598 + case 'c':
1599 + if (operating_mode != column_mode)
1600 + FATAL_ERROR (_("only one way of folding may be specified"));
1601 + operating_mode = character_mode;
1602 break;
1603
1604 case 's': /* Break at word boundaries. */
1605 --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1606 +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1607 @@ -23,10 +23,19 @@
1608
1609 #include <config.h>
1610
1611 +#include <assert.h>
1612 #include <getopt.h>
1613 #include <sys/types.h>
1614 #include <sys/wait.h>
1615 #include <signal.h>
1616 +#if HAVE_WCHAR_H
1617 +# include <wchar.h>
1618 +#endif
1619 +/* Get isw* functions. */
1620 +#if HAVE_WCTYPE_H
1621 +# include <wctype.h>
1622 +#endif
1623 +
1624 #include "system.h"
1625 #include "argmatch.h"
1626 #include "error.h"
1627 @@ -116,14 +125,38 @@
1628 /* Thousands separator; if -1, then there isn't one. */
1629 static int thousands_sep;
1630
1631 +static int force_general_numcompare = 0;
1632 +
1633 /* Nonzero if the corresponding locales are hard. */
1634 static bool hard_LC_COLLATE;
1635 -#if HAVE_NL_LANGINFO
1636 +#if HAVE_LANGINFO_CODESET
1637 static bool hard_LC_TIME;
1638 #endif
1639
1640 #define NONZERO(x) ((x) != 0)
1641
1642 +/* get a multibyte character's byte length. */
1643 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1644 + do \
1645 + { \
1646 + wchar_t wc; \
1647 + mbstate_t state_bak; \
1648 + \
1649 + state_bak = STATE; \
1650 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1651 + \
1652 + switch (MBLENGTH) \
1653 + { \
1654 + case (size_t)-1: \
1655 + case (size_t)-2: \
1656 + STATE = state_bak; \
1657 + /* Fall through. */ \
1658 + case 0: \
1659 + MBLENGTH = 1; \
1660 + } \
1661 + } \
1662 + while (0)
1663 +
1664 /* The kind of blanks for '-b' to skip in various options. */
1665 enum blanktype { bl_start, bl_end, bl_both };
1666
1667 @@ -261,13 +294,11 @@
1668 they were read if all keys compare equal. */
1669 static bool stable;
1670
1671 -/* If TAB has this value, blanks separate fields. */
1672 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
1673 -
1674 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
1675 +/* Tab character separating fields. If tab_length is 0, then fields are
1676 separated by the empty string between a non-blank character and a blank
1677 character. */
1678 -static int tab = TAB_DEFAULT;
1679 +static char tab[MB_LEN_MAX + 1];
1680 +static size_t tab_length = 0;
1681
1682 /* Flag to remove consecutive duplicate lines from the output.
1683 Only the last of a sequence of equal lines will be output. */
1684 @@ -639,6 +670,44 @@
1685 update_proc (pid);
1686 }
1687
1688 +/* Function pointers. */
1689 +static void
1690 +(*inittables) (void);
1691 +static char *
1692 +(*begfield) (const struct line*, const struct keyfield *);
1693 +static char *
1694 +(*limfield) (const struct line*, const struct keyfield *);
1695 +static int
1696 +(*getmonth) (char const *, size_t);
1697 +static int
1698 +(*keycompare) (const struct line *, const struct line *);
1699 +static int
1700 +(*numcompare) (const char *, const char *);
1701 +
1702 +/* Test for white space multibyte character.
1703 + Set LENGTH the byte length of investigated multibyte character. */
1704 +#if HAVE_MBRTOWC
1705 +static int
1706 +ismbblank (const char *str, size_t len, size_t *length)
1707 +{
1708 + size_t mblength;
1709 + wchar_t wc;
1710 + mbstate_t state;
1711 +
1712 + memset (&state, '\0', sizeof(mbstate_t));
1713 + mblength = mbrtowc (&wc, str, len, &state);
1714 +
1715 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1716 + {
1717 + *length = 1;
1718 + return 0;
1719 + }
1720 +
1721 + *length = (mblength < 1) ? 1 : mblength;
1722 + return iswblank (wc);
1723 +}
1724 +#endif
1725 +
1726 /* Clean up any remaining temporary files. */
1727
1728 static void
1729 @@ -978,7 +1047,7 @@
1730 free (node);
1731 }
1732
1733 -#if HAVE_NL_LANGINFO
1734 +#if HAVE_LANGINFO_CODESET
1735
1736 static int
1737 struct_month_cmp (const void *m1, const void *m2)
1738 @@ -993,7 +1062,7 @@
1739 /* Initialize the character class tables. */
1740
1741 static void
1742 -inittables (void)
1743 +inittables_uni (void)
1744 {
1745 size_t i;
1746
1747 @@ -1005,7 +1074,7 @@
1748 fold_toupper[i] = toupper (i);
1749 }
1750
1751 -#if HAVE_NL_LANGINFO
1752 +#if HAVE_LANGINFO_CODESET
1753 /* If we're not in the "C" locale, read different names for months. */
1754 if (hard_LC_TIME)
1755 {
1756 @@ -1031,6 +1100,64 @@
1757 xstrtol_fatal (e, oi, c, long_options, s);
1758 }
1759
1760 +#if HAVE_MBRTOWC
1761 +static void
1762 +inittables_mb (void)
1763 +{
1764 + int i, j, k, l;
1765 + char *name, *s;
1766 + size_t s_len, mblength;
1767 + char mbc[MB_LEN_MAX];
1768 + wchar_t wc, pwc;
1769 + mbstate_t state_mb, state_wc;
1770 +
1771 + for (i = 0; i < MONTHS_PER_YEAR; i++)
1772 + {
1773 + s = (char *) nl_langinfo (ABMON_1 + i);
1774 + s_len = strlen (s);
1775 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
1776 + monthtab[i].val = i + 1;
1777 +
1778 + memset (&state_mb, '\0', sizeof (mbstate_t));
1779 + memset (&state_wc, '\0', sizeof (mbstate_t));
1780 +
1781 + for (j = 0; j < s_len;)
1782 + {
1783 + if (!ismbblank (s + j, s_len - j, &mblength))
1784 + break;
1785 + j += mblength;
1786 + }
1787 +
1788 + for (k = 0; j < s_len;)
1789 + {
1790 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1791 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1792 + if (mblength == 0)
1793 + break;
1794 +
1795 + pwc = towupper (wc);
1796 + if (pwc == wc)
1797 + {
1798 + memcpy (mbc, s + j, mblength);
1799 + j += mblength;
1800 + }
1801 + else
1802 + {
1803 + j += mblength;
1804 + mblength = wcrtomb (mbc, pwc, &state_wc);
1805 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
1806 + }
1807 +
1808 + for (l = 0; l < mblength; l++)
1809 + name[k++] = mbc[l];
1810 + }
1811 + name[k] = '\0';
1812 + }
1813 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
1814 + sizeof (struct month), struct_month_cmp);
1815 +}
1816 +#endif
1817 +
1818 /* Specify the amount of main memory to use when sorting. */
1819 static void
1820 specify_sort_size (int oi, char c, char const *s)
1821 @@ -1241,7 +1368,7 @@
1822 by KEY in LINE. */
1823
1824 static char *
1825 -begfield (const struct line *line, const struct keyfield *key)
1826 +begfield_uni (const struct line *line, const struct keyfield *key)
1827 {
1828 char *ptr = line->text, *lim = ptr + line->length - 1;
1829 size_t sword = key->sword;
1830 @@ -1251,10 +1378,10 @@
1831 /* The leading field separator itself is included in a field when -t
1832 is absent. */
1833
1834 - if (tab != TAB_DEFAULT)
1835 + if (tab_length)
1836 while (ptr < lim && sword--)
1837 {
1838 - while (ptr < lim && *ptr != tab)
1839 + while (ptr < lim && *ptr != tab[0])
1840 ++ptr;
1841 if (ptr < lim)
1842 ++ptr;
1843 @@ -1282,11 +1409,70 @@
1844 return ptr;
1845 }
1846
1847 +#if HAVE_MBRTOWC
1848 +static char *
1849 +begfield_mb (const struct line *line, const struct keyfield *key)
1850 +{
1851 + int i;
1852 + char *ptr = line->text, *lim = ptr + line->length - 1;
1853 + size_t sword = key->sword;
1854 + size_t schar = key->schar;
1855 + size_t mblength;
1856 + mbstate_t state;
1857 +
1858 + memset (&state, '\0', sizeof(mbstate_t));
1859 +
1860 + if (tab_length)
1861 + while (ptr < lim && sword--)
1862 + {
1863 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1864 + {
1865 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1866 + ptr += mblength;
1867 + }
1868 + if (ptr < lim)
1869 + {
1870 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1871 + ptr += mblength;
1872 + }
1873 + }
1874 + else
1875 + while (ptr < lim && sword--)
1876 + {
1877 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1878 + ptr += mblength;
1879 + if (ptr < lim)
1880 + {
1881 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1882 + ptr += mblength;
1883 + }
1884 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1885 + ptr += mblength;
1886 + }
1887 +
1888 + if (key->skipsblanks)
1889 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1890 + ptr += mblength;
1891 +
1892 + for (i = 0; i < schar; i++)
1893 + {
1894 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1895 +
1896 + if (ptr + mblength > lim)
1897 + break;
1898 + else
1899 + ptr += mblength;
1900 + }
1901 +
1902 + return ptr;
1903 +}
1904 +#endif
1905 +
1906 /* Return the limit of (a pointer to the first character after) the field
1907 in LINE specified by KEY. */
1908
1909 static char *
1910 -limfield (const struct line *line, const struct keyfield *key)
1911 +limfield_uni (const struct line *line, const struct keyfield *key)
1912 {
1913 char *ptr = line->text, *lim = ptr + line->length - 1;
1914 size_t eword = key->eword, echar = key->echar;
1915 @@ -1299,10 +1485,10 @@
1916 `beginning' is the first character following the delimiting TAB.
1917 Otherwise, leave PTR pointing at the first `blank' character after
1918 the preceding field. */
1919 - if (tab != TAB_DEFAULT)
1920 + if (tab_length)
1921 while (ptr < lim && eword--)
1922 {
1923 - while (ptr < lim && *ptr != tab)
1924 + while (ptr < lim && *ptr != tab[0])
1925 ++ptr;
1926 if (ptr < lim && (eword | echar))
1927 ++ptr;
1928 @@ -1348,10 +1534,10 @@
1929 */
1930
1931 /* Make LIM point to the end of (one byte past) the current field. */
1932 - if (tab != TAB_DEFAULT)
1933 + if (tab_length)
1934 {
1935 char *newlim;
1936 - newlim = memchr (ptr, tab, lim - ptr);
1937 + newlim = memchr (ptr, tab[0], lim - ptr);
1938 if (newlim)
1939 lim = newlim;
1940 }
1941 @@ -1384,6 +1570,113 @@
1942 return ptr;
1943 }
1944
1945 +#if HAVE_MBRTOWC
1946 +static char *
1947 +limfield_mb (const struct line *line, const struct keyfield *key)
1948 +{
1949 + char *ptr = line->text, *lim = ptr + line->length - 1;
1950 + size_t eword = key->eword, echar = key->echar;
1951 + int i;
1952 + size_t mblength;
1953 + mbstate_t state;
1954 +
1955 + if (echar == 0)
1956 + eword++; /* skip all of end field. */
1957 +
1958 + memset (&state, '\0', sizeof(mbstate_t));
1959 +
1960 + if (tab_length)
1961 + while (ptr < lim && eword--)
1962 + {
1963 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1964 + {
1965 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1966 + ptr += mblength;
1967 + }
1968 + if (ptr < lim && (eword | echar))
1969 + {
1970 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1971 + ptr += mblength;
1972 + }
1973 + }
1974 + else
1975 + while (ptr < lim && eword--)
1976 + {
1977 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1978 + ptr += mblength;
1979 + if (ptr < lim)
1980 + {
1981 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1982 + ptr += mblength;
1983 + }
1984 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1985 + ptr += mblength;
1986 + }
1987 +
1988 +
1989 +# ifdef POSIX_UNSPECIFIED
1990 + /* Make LIM point to the end of (one byte past) the current field. */
1991 + if (tab_length)
1992 + {
1993 + char *newlim, *p;
1994 +
1995 + newlim = NULL;
1996 + for (p = ptr; p < lim;)
1997 + {
1998 + if (memcmp (p, tab, tab_length) == 0)
1999 + {
2000 + newlim = p;
2001 + break;
2002 + }
2003 +
2004 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2005 + p += mblength;
2006 + }
2007 + }
2008 + else
2009 + {
2010 + char *newlim;
2011 + newlim = ptr;
2012 +
2013 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2014 + newlim += mblength;
2015 + if (ptr < lim)
2016 + {
2017 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2018 + ptr += mblength;
2019 + }
2020 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2021 + newlim += mblength;
2022 + lim = newlim;
2023 + }
2024 +# endif
2025 +
2026 + if (echar != 0)
2027 + {
2028 + /* If we're skipping leading blanks, don't start counting characters
2029 + * until after skipping past any leading blanks. */
2030 + if (key->skipsblanks)
2031 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2032 + ptr += mblength;
2033 +
2034 + memset (&state, '\0', sizeof(mbstate_t));
2035 +
2036 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2037 + for (i = 0; i < echar; i++)
2038 + {
2039 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2040 +
2041 + if (ptr + mblength > lim)
2042 + break;
2043 + else
2044 + ptr += mblength;
2045 + }
2046 + }
2047 +
2048 + return ptr;
2049 +}
2050 +#endif
2051 +
2052 /* Fill BUF reading from FP, moving buf->left bytes from the end
2053 of buf->buf to the beginning first. If EOF is reached and the
2054 file wasn't terminated by a newline, supply one. Set up BUF's line
2055 @@ -1466,8 +1753,24 @@
2056 else
2057 {
2058 if (key->skipsblanks)
2059 - while (blanks[to_uchar (*line_start)])
2060 - line_start++;
2061 + {
2062 +#if HAVE_MBRTOWC
2063 + if (MB_CUR_MAX > 1)
2064 + {
2065 + size_t mblength;
2066 + mbstate_t state;
2067 + memset (&state, '\0', sizeof(mbstate_t));
2068 + while (line_start < line->keylim &&
2069 + ismbblank (line_start,
2070 + line->keylim - line_start,
2071 + &mblength))
2072 + line_start += mblength;
2073 + }
2074 + else
2075 +#endif
2076 + while (blanks[to_uchar (*line_start)])
2077 + line_start++;
2078 + }
2079 line->keybeg = line_start;
2080 }
2081 }
2082 @@ -1500,7 +1803,7 @@