/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.39 - (show annotations) (download) (as text)
Sat Sep 12 09:28:49 2009 UTC (2 months, 1 week ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-7_6-4_fc12, coreutils-7_6-5_fc12, coreutils-7_6-7_fc13, coreutils-7_6-2_fc12, F-12-split, coreutils-7_6-3_fc12, coreutils-7_6-6_fc13, coreutils-7_6-6_fc12, coreutils-7_6-1_fc12
Changes since 1.38: +1196 -1219 lines
File MIME type: text/x-patch
new upstream bugfix release 7.6, removed applied patches,defuzzed the rest
1 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2 --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3 +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4 @@ -26,7 +26,7 @@
5 my $prog = 'cut';
6 my $try = "Try \`$prog --help' for more information.\n";
7 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8 -my $inval = "$prog: invalid byte or field list\n$try";
9 +my $inval = "$prog: invalid byte, character or field list\n$try";
10 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11
12 my @Tests =
13 @@ -140,8 +140,8 @@
14 ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15
16 # None of the following invalid ranges provoked an error up to coreutils-6.9.
17 - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18 - {ERR=>"$prog: invalid decreasing range\n$try"}],
19 + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 @@ -0,0 +1,58 @@
27 +#! /bin/sh
28 +case $# in
29 + 0) xx='../src/sort';;
30 + *) xx="$1";;
31 +esac
32 +test "$VERBOSE" && echo=echo || echo=:
33 +$echo testing program: $xx
34 +errors=0
35 +test "$srcdir" || srcdir=.
36 +test "$VERBOSE" && $xx --version 2> /dev/null
37 +
38 +export LC_ALL=en_US.UTF-8
39 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40 +errors=0
41 +
42 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 +code=$?
44 +if test $code != 0; then
45 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46 + errors=`expr $errors + 1`
47 +else
48 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 + case $? in
50 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 + errors=`expr $errors + 1`;;
54 + 2) $echo "Test mb1 may have failed." 1>&2
55 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 + errors=`expr $errors + 1`;;
57 + esac
58 +fi
59 +
60 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 +code=$?
62 +if test $code != 0; then
63 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64 + errors=`expr $errors + 1`
65 +else
66 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 + case $? in
68 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 + errors=`expr $errors + 1`;;
72 + 2) $echo "Test mb2 may have failed." 1>&2
73 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 + errors=`expr $errors + 1`;;
75 + esac
76 +fi
77 +
78 +if test $errors = 0; then
79 + $echo Passed all 113 tests. 1>&2
80 +else
81 + $echo Failed $errors tests. 1>&2
82 +fi
83 +test $errors = 0 || errors=1
84 +exit $errors
85 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 @@ -0,0 +1,4 @@
88 +Apple@AA10@@20
89 +Banana@AA5@@30
90 +Citrus@AA20@@5
91 +Cherry@AA30@@10
92 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 @@ -0,0 +1,4 @@
95 +Citrus@AA20@@5
96 +Cherry@AA30@@10
97 +Apple@AA10@@20
98 +Banana@AA5@@30
99 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 @@ -0,0 +1,4 @@
102 +Apple@10
103 +Banana@5
104 +Citrus@20
105 +Cherry@30
106 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 @@ -0,0 +1,4 @@
109 +Banana@5
110 +Apple@10
111 +Citrus@20
112 +Cherry@30
113 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114 --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115 +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 @@ -192,6 +192,7 @@
117 misc/sort-compress \
118 misc/sort-continue \
119 misc/sort-files0-from \
120 + misc/sort-mb-tests \
121 misc/sort-merge \
122 misc/sort-merge-fdlimit \
123 misc/sort-rand \
124 @@ -391,6 +392,10 @@
125 $(root_tests)
126
127 pr_data = \
128 + misc/mb1.X \
129 + misc/mb1.I \
130 + misc/mb2.X \
131 + misc/mb2.I \
132 pr/0F \
133 pr/0FF \
134 pr/0FFnt \
135 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136 +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 @@ -22,6 +22,11 @@
138
139 # include <stdio.h>
140
141 +/* Get mbstate_t. */
142 +# if HAVE_WCHAR_H
143 +# include <wchar.h>
144 +# endif
145 +
146 /* A `struct linebuffer' holds a line of text. */
147
148 struct linebuffer
149 @@ -29,6 +34,9 @@
150 size_t size; /* Allocated. */
151 size_t length; /* Used. */
152 char *buffer;
153 +# if HAVE_WCHAR_H
154 + mbstate_t state;
155 +# endif
156 };
157
158 /* Initialize linebuffer LINEBUFFER for use. */
159 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160 +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 @@ -38,11 +38,28 @@
162 #include <stdio.h>
163 #include <getopt.h>
164 #include <sys/types.h>
165 +
166 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 +#if HAVE_WCHAR_H
168 +# include <wchar.h>
169 +#endif
170 +
171 #include "system.h"
172 #include "error.h"
173 #include "quote.h"
174 #include "xstrndup.h"
175
176 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 + installation; work around this configuration error. */
178 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179 +# define MB_LEN_MAX 16
180 +#endif
181 +
182 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183 +#if HAVE_MBRTOWC && defined mbstate_t
184 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185 +#endif
186 +
187 /* The official name of this program (e.g., no `g' prefix). */
188 #define PROGRAM_NAME "expand"
189
190 @@ -365,6 +383,142 @@
191 }
192 }
193
194 +#if HAVE_MBRTOWC
195 +static void
196 +expand_multibyte (void)
197 +{
198 + FILE *fp; /* Input strem. */
199 + mbstate_t i_state; /* Current shift state of the input stream. */
200 + mbstate_t i_state_bak; /* Back up the I_STATE. */
201 + mbstate_t o_state; /* Current shift state of the output stream. */
202 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
203 + char *bufpos; /* Next read position of BUF. */
204 + size_t buflen = 0; /* The length of the byte sequence in buf. */
205 + wchar_t wc; /* A gotten wide character. */
206 + size_t mblength; /* The byte size of a multibyte character
207 + which shows as same character as WC. */
208 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
209 + int column = 0; /* Column on screen of the next char. */
210 + int next_tab_column; /* Column the next tab stop is on. */
211 + int convert = 1; /* If nonzero, perform translations. */
212 +
213 + fp = next_file ((FILE *) NULL);
214 + if (fp == NULL)
215 + return;
216 +
217 + memset (&o_state, '\0', sizeof(mbstate_t));
218 + memset (&i_state, '\0', sizeof(mbstate_t));
219 +
220 + for (;;)
221 + {
222 + /* Refill the buffer BUF. */
223 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
224 + {
225 + memmove (buf, bufpos, buflen);
226 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
227 + bufpos = buf;
228 + }
229 +
230 + /* No character is left in BUF. */
231 + if (buflen < 1)
232 + {
233 + fp = next_file (fp);
234 +
235 + if (fp == NULL)
236 + break; /* No more files. */
237 + else
238 + {
239 + memset (&i_state, '\0', sizeof(mbstate_t));
240 + continue;
241 + }
242 + }
243 +
244 + /* Get a wide character. */
245 + i_state_bak = i_state;
246 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
247 +
248 + switch (mblength)
249 + {
250 + case (size_t)-1: /* illegal byte sequence. */
251 + case (size_t)-2:
252 + mblength = 1;
253 + i_state = i_state_bak;
254 + if (convert)
255 + {
256 + ++column;
257 + if (convert_entire_line == 0)
258 + convert = 0;
259 + }
260 + putchar (*bufpos);
261 + break;
262 +
263 + case 0: /* null. */
264 + mblength = 1;
265 + if (convert && convert_entire_line == 0)
266 + convert = 0;
267 + putchar ('\0');
268 + break;
269 +
270 + default:
271 + if (wc == L'\n') /* LF. */
272 + {
273 + tab_index = 0;
274 + column = 0;
275 + convert = 1;
276 + putchar ('\n');
277 + }
278 + else if (wc == L'\t' && convert) /* Tab. */
279 + {
280 + if (tab_size == 0)
281 + {
282 + /* Do not let tab_index == first_free_tab;
283 + stop when it is 1 less. */
284 + while (tab_index < first_free_tab - 1
285 + && column >= tab_list[tab_index])
286 + tab_index++;
287 + next_tab_column = tab_list[tab_index];
288 + if (tab_index < first_free_tab - 1)
289 + tab_index++;
290 + if (column >= next_tab_column)
291 + next_tab_column = column + 1;
292 + }
293 + else
294 + next_tab_column = column + tab_size - column % tab_size;
295 +
296 + while (column < next_tab_column)
297 + {
298 + putchar (' ');
299 + ++column;
300 + }
301 + }
302 + else /* Others. */
303 + {
304 + if (convert)
305 + {
306 + if (wc == L'\b')
307 + {
308 + if (column > 0)
309 + --column;
310 + }
311 + else
312 + {
313 + int width; /* The width of WC. */
314 +
315 + width = wcwidth (wc);
316 + column += (width > 0) ? width : 0;
317 + if (convert_entire_line == 0)
318 + convert = 0;
319 + }
320 + }
321 + fwrite (bufpos, sizeof(char), mblength, stdout);
322 + }
323 + }
324 + buflen -= mblength;
325 + bufpos += mblength;
326 + }
327 +}
328 +#endif
329 +
330 int
331 main (int argc, char **argv)
332 {
333 @@ -429,7 +583,12 @@
334
335 file_list = (optind < argc ? &argv[optind] : stdin_argv);
336
337 - expand ();
338 +#if HAVE_MBRTOWC
339 + if (MB_CUR_MAX > 1)
340 + expand_multibyte ();
341 + else
342 +#endif
343 + expand ();
344
345 if (have_read_stdin && fclose (stdin) != 0)
346 error (EXIT_FAILURE, errno, "-");
347 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
348 +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
349 @@ -23,16 +23,30 @@
350 #include <sys/types.h>
351 #include <getopt.h>
352
353 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
354 +#if HAVE_WCHAR_H
355 +# include <wchar.h>
356 +#endif
357 +
358 +/* Get iswblank(), towupper. */
359 +#if HAVE_WCTYPE_H
360 +# include <wctype.h>
361 +#endif
362 +
363 #include "system.h"
364 #include "error.h"
365 #include "linebuffer.h"
366 -#include "memcasecmp.h"
367 #include "quote.h"
368 #include "stdio--.h"
369 #include "xmemcoll.h"
370 #include "xstrtol.h"
371 #include "argmatch.h"
372
373 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
374 +#if HAVE_MBRTOWC && defined mbstate_t
375 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
376 +#endif
377 +
378 /* The official name of this program (e.g., no `g' prefix). */
379 #define PROGRAM_NAME "join"
380
381 @@ -104,10 +118,12 @@
382 /* Last element in `outlist', where a new element can be added. */
383 static struct outlist *outlist_end = &outlist_head;
384
385 -/* Tab character separating fields. If negative, fields are separated
386 - by any nonempty string of blanks, otherwise by exactly one
387 - tab character whose value (when cast to unsigned char) equals TAB. */
388 -static int tab = -1;
389 +/* Tab character separating fields. If NULL, fields are separated
390 + by any nonempty string of blanks. */
391 +static char *tab = NULL;
392 +
393 +/* The number of bytes used for tab. */
394 +static size_t tablen = 0;
395
396 /* If nonzero, check that the input is correctly ordered. */
397 static enum
398 @@ -199,10 +217,11 @@
399 if (ptr == lim)
400 return;
401
402 - if (0 <= tab)
403 + if (tab != NULL)
404 {
405 + unsigned char t = tab[0];
406 char *sep;
407 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
408 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
409 extract_field (line, ptr, sep - ptr);
410 }
411 else
412 @@ -229,6 +248,148 @@
413 extract_field (line, ptr, lim - ptr);
414 }
415
416 +#if HAVE_MBRTOWC
417 +static void
418 +xfields_multibyte (struct line *line)
419 +{
420 + char *ptr = line->buf.buffer;
421 + char const *lim = ptr + line->buf.length - 1;
422 + wchar_t wc = 0;
423 + size_t mblength = 1;
424 + mbstate_t state, state_bak;
425 +
426 + memset (&state, 0, sizeof (mbstate_t));
427 +
428 + if (ptr >= lim)
429 + return;
430 +
431 + if (tab != NULL)
432 + {
433 + unsigned char t = tab[0];
434 + char *sep = ptr;
435 + for (; ptr < lim; ptr = sep + mblength)
436 + {
437 + sep = ptr;
438 + while (sep < lim)
439 + {
440 + state_bak = state;
441 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
442 +
443 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
444 + {
445 + mblength = 1;
446 + state = state_bak;
447 + }
448 + mblength = (mblength < 1) ? 1 : mblength;
449 +
450 + if (mblength == tablen && !memcmp (sep, tab, mblength))
451 + break;
452 + else
453 + {
454 + sep += mblength;
455 + continue;
456 + }
457 + }
458 +
459 + if (sep >= lim)
460 + break;
461 +
462 + extract_field (line, ptr, sep - ptr);
463 + }
464 + }
465 + else
466 + {
467 + /* Skip leading blanks before the first field. */
468 + while(ptr < lim)
469 + {
470 + state_bak = state;
471 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
472 +
473 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
474 + {
475 + mblength = 1;
476 + state = state_bak;
477 + break;
478 + }
479 + mblength = (mblength < 1) ? 1 : mblength;
480 +
481 + if (!iswblank(wc))
482 + break;
483 + ptr += mblength;
484 + }
485 +
486 + do
487 + {
488 + char *sep;
489 + state_bak = state;
490 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
491 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
492 + {
493 + mblength = 1;
494 + state = state_bak;
495 + break;
496 + }
497 + mblength = (mblength < 1) ? 1 : mblength;
498 +
499 + sep = ptr + mblength;
500 + while (sep < lim)
501 + {
502 + state_bak = state;
503 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
504 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
505 + {
506 + mblength = 1;
507 + state = state_bak;
508 + break;
509 + }
510 + mblength = (mblength < 1) ? 1 : mblength;
511 +
512 + if (iswblank (wc))
513 + break;
514 +
515 + sep += mblength;
516 + }
517 +
518 + extract_field (line, ptr, sep - ptr);
519 + if (sep >= lim)
520 + return;
521 +
522 + state_bak = state;
523 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
524 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
525 + {
526 + mblength = 1;
527 + state = state_bak;
528 + break;
529 + }
530 + mblength = (mblength < 1) ? 1 : mblength;
531 +
532 + ptr = sep + mblength;
533 + while (ptr < lim)
534 + {
535 + state_bak = state;
536 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
537 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
538 + {
539 + mblength = 1;
540 + state = state_bak;
541 + break;
542 + }
543 + mblength = (mblength < 1) ? 1 : mblength;
544 +
545 + if (!iswblank (wc))
546 + break;
547 +
548 + ptr += mblength;
549 + }
550 + }
551 + while (ptr < lim);
552 + }
553 +
554 + extract_field (line, ptr, lim - ptr);
555 +}
556 +#endif
557 +
558 static void
559 freeline (struct line *line)
560 {
561 @@ -377,11 +601,18 @@
562
563 /* Print the join of LINE1 and LINE2. */
564
565 +#define PUT_TAB_CHAR \
566 + do \
567 + { \
568 + (tab != NULL) ? \
569 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
570 + } \
571 + while (0)
572 +
573 static void
574 prjoin (struct line const *line1, struct line const *line2)
575 {
576 const struct outlist *outlist;
577 - char output_separator = tab < 0 ? ' ' : tab;
578
579 outlist = outlist_head.next;
580 if (outlist)
581 @@ -416,7 +647,7 @@
582 o = o->next;
583 if (o == NULL)
584 break;
585 - putchar (output_separator);
586 + PUT_TAB_CHAR;
587 }
588 putchar ('\n');
589 }
590 @@ -434,23 +665,23 @@
591 prfield (join_field_1, line1);
592 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
593 {
594 - putchar (output_separator);
595 + PUT_TAB_CHAR;
596 prfield (i, line1);
597 }
598 for (i = join_field_1 + 1; i < line1->nfields; ++i)
599 {
600 - putchar (output_separator);
601 + PUT_TAB_CHAR;
602 prfield (i, line1);
603 }
604
605 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
606 {
607 - putchar (output_separator);
608 + PUT_TAB_CHAR;
609 prfield (i, line2);
610 }
611 for (i = join_field_2 + 1; i < line2->nfields; ++i)
612 {
613 - putchar (output_separator);
614 + PUT_TAB_CHAR;
615 prfield (i, line2);
616 }
617 putchar ('\n');
618 @@ -859,20 +1090,41 @@
619
620 case 't':
621 {
622 - unsigned char newtab = optarg[0];
623 - if (! newtab)
624 + char *newtab;
625 + size_t newtablen;
626 + if (! optarg[0])
627 error (EXIT_FAILURE, 0, _("empty tab"));
628 - if (optarg[1])
629 + newtab = xstrdup (optarg);
630 +#if HAVE_MBRTOWC
631 + if (MB_CUR_MAX > 1)
632 + {
633 + mbstate_t state;
634 +
635 + memset (&state, 0, sizeof (mbstate_t));
636 + newtablen = mbrtowc (NULL, newtab,
637 + strnlen (newtab, MB_LEN_MAX),
638 + &state);
639 + if (newtablen == (size_t) 0
640 + || newtablen == (size_t) -1
641 + || newtablen == (size_t) -2)
642 + newtablen = 1;
643 + }
644 + else
645 +#endif
646 + newtablen = 1;
647 +
648 + if (newtablen == 1 && newtab[1])
649 + {
650 + if (STREQ (newtab, "\\0"))
651 + newtab[0] = '\0';
652 + }
653 + if (tab != NULL && strcmp (tab, newtab))
654 {
655 - if (STREQ (optarg, "\\0"))
656 - newtab = '\0';
657 - else
658 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
659 - quote (optarg));
660 + free (newtab);
661 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
662 }
663 - if (0 <= tab && tab != newtab)
664 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
665 tab = newtab;
666 + tablen = newtablen;
667 }
668 break;
669
670 diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
671 --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
672 +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
673 @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
674 size_t jf_1, size_t jf_2)
675 {
676 /* Start of field to compare in each file. */
677 - char *beg1;
678 - char *beg2;
679 -
680 - size_t len1;
681 - size_t len2; /* Length of fields to compare. */
682 + char *beg[2];
683 + char *copy[2];
684 + size_t len[2]; /* Length of fields to compare. */
685 int diff;
686 + int i, j;
687
688 if (jf_1 < line1->nfields)
689 {
690 - beg1 = line1->fields[jf_1].beg;
691 - len1 = line1->fields[jf_1].len;
692 + beg[0] = line1->fields[jf_1].beg;
693 + len[0] = line1->fields[jf_1].len;
694 }
695 else
696 {
697 - beg1 = NULL;
698 - len1 = 0;
699 + beg[0] = NULL;
700 + len[0] = 0;
701 }
702
703 if (jf_2 < line2->nfields)
704 {
705 - beg2 = line2->fields[jf_2].beg;
706 - len2 = line2->fields[jf_2].len;
707 + beg[1] = line2->fields[jf_2].beg;
708 + len[1] = line2->fields[jf_2].len;
709 }
710 else
711 {
712 - beg2 = NULL;
713 - len2 = 0;
714 + beg[1] = NULL;
715 + len[1] = 0;
716 }
717
718 - if (len1 == 0)
719 - return len2 == 0 ? 0 : -1;
720 - if (len2 == 0)
721 + if (len[0] == 0)
722 + return len[1] == 0 ? 0 : -1;
723 + if (len[1] == 0)
724 return 1;
725
726 if (ignore_case)
727 {
728 - /* FIXME: ignore_case does not work with NLS (in particular,
729 - with multibyte chars). */
730 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
731 +#ifdef HAVE_MBRTOWC
732 + if (MB_CUR_MAX > 1)
733 + {
734 + size_t mblength;
735 + wchar_t wc, uwc;
736 + mbstate_t state, state_bak;
737 +
738 + memset (&state, '\0', sizeof (mbstate_t));
739 +
740 + for (i = 0; i < 2; i++)
741 + {
742 + copy[i] = alloca (len[i] + 1);
743 +
744 + for (j = 0; j < MIN (len[0], len[1]);)
745 + {
746 + state_bak = state;
747 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
748 +
749 + switch (mblength)
750 + {
751 + case (size_t) -1:
752 + case (size_t) -2:
753 + state = state_bak;
754 + /* Fall through */
755 + case 0:
756 + mblength = 1;
757 + break;
758 +
759 + default:
760 + uwc = towupper (wc);
761 +
762 + if (uwc != wc)
763 + {
764 + mbstate_t state_wc;
765 +
766 + memset (&state_wc, '\0', sizeof (mbstate_t));
767 + wcrtomb (copy[i] + j, uwc, &state_wc);
768 + }
769 + else
770 + memcpy (copy[i] + j, beg[i] + j, mblength);
771 + }
772 + j += mblength;
773 + }
774 + copy[i][j] = '\0';
775 + }
776 + }
777 + else
778 +#endif
779 + {
780 + for (i = 0; i < 2; i++)
781 + {
782 + copy[i] = alloca (len[i] + 1);
783 +
784 + for (j = 0; j < MIN (len[0], len[1]); j++)
785 + copy[i][j] = toupper (beg[i][j]);
786 +
787 + copy[i][j] = '\0';
788 + }
789 + }
790 }
791 else
792 {
793 - if (hard_LC_COLLATE)
794 - return xmemcoll (beg1, len1, beg2, len2);
795 - diff = memcmp (beg1, beg2, MIN (len1, len2));
796 + copy[0] = (unsigned char *) beg[0];
797 + copy[1] = (unsigned char *) beg[1];
798 }
799
800 + if (hard_LC_COLLATE)
801 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
802 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
803 +
804 +
805 if (diff)
806 return diff;
807 - return len1 < len2 ? -1 : len1 != len2;
808 + return len[0] - len[1];
809 }
810
811 /* Check that successive input lines PREV and CURRENT from input file
812 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
813 +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
814 @@ -23,6 +23,16 @@
815 #include <getopt.h>
816 #include <sys/types.h>
817
818 +/* Get mbstate_t, mbrtowc(). */
819 +#if HAVE_WCHAR_H
820 +# include <wchar.h>
821 +#endif
822 +
823 +/* Get isw* functions. */
824 +#if HAVE_WCTYPE_H
825 +# include <wctype.h>
826 +#endif
827 +
828 #include "system.h"
829 #include "argmatch.h"
830 #include "linebuffer.h"
831 @@ -32,7 +42,19 @@
832 #include "quote.h"
833 #include "xmemcoll.h"
834 #include "xstrtol.h"
835 -#include "memcasecmp.h"
836 +#include "xmemcoll.h"
837 +
838 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
839 + installation; work around this configuration error. */
840 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
841 +# define MB_LEN_MAX 16
842 +#endif
843 +
844 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
845 +#if HAVE_MBRTOWC && defined mbstate_t
846 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
847 +#endif
848 +
849
850 /* The official name of this program (e.g., no `g' prefix). */
851 #define PROGRAM_NAME "uniq"
852 @@ -109,6 +131,10 @@
853 /* Select whether/how to delimit groups of duplicate lines. */
854 static enum delimit_method delimit_groups;
855
856 +/* Function pointers. */
857 +static char *
858 +(*find_field) (struct linebuffer *line);
859 +
860 static struct option const longopts[] =
861 {
862 {"count", no_argument, NULL, 'c'},
863 @@ -198,7 +224,7 @@
864 return a pointer to the beginning of the line's field to be compared. */
865
866 static char *
867 -find_field (struct linebuffer const *line)
868 +find_field_uni (struct linebuffer *line)
869 {
870 size_t count;
871 char const *lp = line->buffer;
872 @@ -219,6 +245,83 @@
873 return line->buffer + i;
874 }
875
876 +#if HAVE_MBRTOWC
877 +
878 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
879 + do \
880 + { \
881 + mbstate_t state_bak; \
882 + \
883 + CONVFAIL = 0; \
884 + state_bak = *STATEP; \
885 + \
886 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
887 + \
888 + switch (MBLENGTH) \
889 + { \
890 + case (size_t)-2: \
891 + case (size_t)-1: \
892 + *STATEP = state_bak; \
893 + CONVFAIL++; \
894 + /* Fall through */ \
895 + case 0: \
896 + MBLENGTH = 1; \
897 + } \
898 + } \
899 + while (0)
900 +
901 +static char *
902 +find_field_multi (struct linebuffer *line)
903 +{
904 + size_t count;
905 + char *lp = line->buffer;
906 + size_t size = line->length - 1;
907 + size_t pos;
908 + size_t mblength;
909 + wchar_t wc;
910 + mbstate_t *statep;
911 + int convfail;
912 +
913 + pos = 0;
914 + statep = &(line->state);
915 +
916 + /* skip fields. */
917 + for (count = 0; count < skip_fields && pos < size; count++)
918 + {
919 + while (pos < size)
920 + {
921 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
922 +
923 + if (convfail || !iswblank (wc))
924 + {
925 + pos += mblength;
926 + break;
927 + }
928 + pos += mblength;
929 + }
930 +
931 + while (pos < size)
932 + {
933 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
934 +
935 + if (!convfail && iswblank (wc))
936 + break;
937 +
938 + pos += mblength;
939 + }
940 + }
941 +
942 + /* skip fields. */
943 + for (count = 0; count < skip_chars && pos < size; count++)
944 + {
945 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
946 + pos += mblength;
947 + }
948 +
949 + return lp + pos;
950 +}
951 +#endif
952 +
953 /* Return false if two strings OLD and NEW match, true if not.
954 OLD and NEW point not to the beginnings of the lines
955 but rather to the beginnings of the fields to compare.
956 @@ -227,6 +330,8 @@
957 static bool
958 different (char *old, char *new, size_t oldlen, size_t newlen)
959 {
960 + char *copy_old, *copy_new;
961 +
962 if (check_chars < oldlen)
963 oldlen = check_chars;
964 if (check_chars < newlen)
965 @@ -234,14 +339,92 @@
966
967 if (ignore_case)
968 {
969 - /* FIXME: This should invoke strcoll somehow. */
970 - return oldlen != newlen || memcasecmp (old, new, oldlen);
971 + size_t i;
972 +
973 + copy_old = alloca (oldlen + 1);
974 + copy_new = alloca (oldlen + 1);
975 +
976 + for (i = 0; i < oldlen; i++)
977 + {
978 + copy_old[i] = toupper (old[i]);
979 + copy_new[i] = toupper (new[i]);
980 + }
981 }
982 - else if (hard_LC_COLLATE)
983 - return xmemcoll (old, oldlen, new, newlen) != 0;
984 else
985 - return oldlen != newlen || memcmp (old, new, oldlen);
986 + {
987 + copy_old = (char *)old;
988 + copy_new = (char *)new;
989 + }
990 +
991 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
992 +}
993 +
994 +#if HAVE_MBRTOWC
995 +static int
996 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
997 +{
998 + size_t i, j, chars;
999 + const char *str[2];
1000 + char *copy[2];
1001 + size_t len[2];
1002 + mbstate_t state[2];
1003 + size_t mblength;
1004 + wchar_t wc, uwc;
1005 + mbstate_t state_bak;
1006 +
1007 + str[0] = old;
1008 + str[1] = new;
1009 + len[0] = oldlen;
1010 + len[1] = newlen;
1011 + state[0] = oldstate;
1012 + state[1] = newstate;
1013 +
1014 + for (i = 0; i < 2; i++)
1015 + {
1016 + copy[i] = alloca (len[i] + 1);
1017 +
1018 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1019 + {
1020 + state_bak = state[i];
1021 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1022 +
1023 + switch (mblength)
1024 + {
1025 + case (size_t)-1:
1026 + case (size_t)-2:
1027 + state[i] = state_bak;
1028 + /* Fall through */
1029 + case 0:
1030 + mblength = 1;
1031 + break;
1032 +
1033 + default:
1034 + if (ignore_case)
1035 + {
1036 + uwc = towupper (wc);
1037 +
1038 + if (uwc != wc)
1039 + {
1040 + mbstate_t state_wc;
1041 +
1042 + memset (&state_wc, '\0', sizeof(mbstate_t));
1043 + wcrtomb (copy[i] + j, uwc, &state_wc);
1044 + }
1045 + else
1046 + memcpy (copy[i] + j, str[i] + j, mblength);
1047 + }
1048 + else
1049 + memcpy (copy[i] + j, str[i] + j, mblength);
1050 + }
1051 + j += mblength;
1052 + }
1053 + copy[i][j] = '\0';
1054 + len[i] = j;
1055 + }
1056 +
1057 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1058 }
1059 +#endif
1060
1061 /* Output the line in linebuffer LINE to standard output
1062 provided that the switches say it should be output.
1063 @@ -295,15 +478,43 @@
1064 {
1065 char *prevfield IF_LINT (= NULL);
1066 size_t prevlen IF_LINT (= 0);
1067 +#if HAVE_MBRTOWC
1068 + mbstate_t prevstate;
1069 +
1070 + memset (&prevstate, '\0', sizeof (mbstate_t));
1071 +#endif
1072
1073 while (!feof (stdin))
1074 {
1075 char *thisfield;
1076 size_t thislen;
1077 +#if HAVE_MBRTOWC
1078 + mbstate_t thisstate;
1079 +#endif
1080 +
1081 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1082 break;
1083 thisfield = find_field (thisline);
1084 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1085 +#if HAVE_MBRTOWC
1086 + if (MB_CUR_MAX > 1)
1087 + {
1088 + thisstate = thisline->state;
1089 +
1090 + if (prevline->length == 0 || different_multi
1091 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1092 + {
1093 + fwrite (thisline->buffer, sizeof (char),
1094 + thisline->length, stdout);
1095 +
1096 + SWAP_LINES (prevline, thisline);
1097 + prevfield = thisfield;
1098 + prevlen = thislen;
1099 + prevstate = thisstate;
1100 + }
1101 + }
1102 + else
1103 +#endif
1104 if (prevline->length == 0
1105 || different (thisfield, prevfield, thislen, prevlen))
1106 {
1107 @@ -322,17 +533,26 @@
1108 size_t prevlen;
1109 uintmax_t match_count = 0;
1110 bool first_delimiter = true;
1111 +#if HAVE_MBRTOWC
1112 + mbstate_t prevstate;
1113 +#endif
1114
1115 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1116 goto closefiles;
1117 prevfield = find_field (prevline);
1118 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1119 +#if HAVE_MBRTOWC
1120 + prevstate = prevline->state;
1121 +#endif
1122
1123 while (!feof (stdin))
1124 {
1125 bool match;
1126 char *thisfield;
1127 size_t thislen;
1128 +#if HAVE_MBRTOWC
1129 + mbstate_t thisstate;
1130 +#endif
1131 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1132 {
1133 if (ferror (stdin))
1134 @@ -341,6 +561,15 @@
1135 }
1136 thisfield = find_field (thisline);
1137 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1138 +#if HAVE_MBRTOWC
1139 + if (MB_CUR_MAX > 1)
1140 + {
1141 + thisstate = thisline->state;
1142 + match = !different_multi (thisfield, prevfield,
1143 + thislen, prevlen, thisstate, prevstate);
1144 + }
1145 + else
1146 +#endif
1147 match = !different (thisfield, prevfield, thislen, prevlen);
1148 match_count += match;
1149
1150 @@ -373,6 +602,9 @@
1151 SWAP_LINES (prevline, thisline);
1152 prevfield = thisfield;
1153 prevlen = thislen;
1154 +#if HAVE_MBRTOWC
1155 + prevstate = thisstate;
1156 +#endif
1157 if (!match)
1158 match_count = 0;
1159 }
1160 @@ -417,6 +649,19 @@
1161
1162 atexit (close_stdout);
1163
1164 +#if HAVE_MBRTOWC
1165 + if (MB_CUR_MAX > 1)
1166 + {
1167 + find_field = find_field_multi;
1168 + }
1169 + else
1170 +#endif
1171 + {
1172 + find_field = find_field_uni;
1173 + }
1174 +
1175 +
1176 +
1177 skip_chars = 0;
1178 skip_fields = 0;
1179 check_chars = SIZE_MAX;
1180 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1181 +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1182 @@ -23,11 +23,33 @@
1183 #include <getopt.h>
1184 #include <sys/types.h>
1185
1186 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1187 +#if HAVE_WCHAR_H
1188 +# include <wchar.h>
1189 +#endif
1190 +
1191 +/* Get iswprint(), iswblank(), wcwidth(). */
1192 +#if HAVE_WCTYPE_H
1193 +# include <wctype.h>
1194 +#endif
1195 +
1196 #include "system.h"
1197 #include "error.h"
1198 #include "quote.h"
1199 #include "xstrtol.h"
1200
1201 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1202 + installation; work around this configuration error. */
1203 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1204 +# undef MB_LEN_MAX
1205 +# define MB_LEN_MAX 16
1206 +#endif
1207 +
1208 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1209 +#if HAVE_MBRTOWC && defined mbstate_t
1210 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1211 +#endif
1212 +
1213 #define TAB_WIDTH 8
1214
1215 /* The official name of this program (e.g., no `g' prefix). */
1216 @@ -35,20 +57,41 @@
1217
1218 #define AUTHORS proper_name ("David MacKenzie")
1219
1220 +#define FATAL_ERROR(Message) \
1221 + do \
1222 + { \
1223 + error (0, 0, (Message)); \
1224 + usage (2); \
1225 + } \
1226 + while (0)
1227 +
1228 +enum operating_mode
1229 +{
1230 + /* Fold texts by columns that are at the given positions. */
1231 + column_mode,
1232 +
1233 + /* Fold texts by bytes that are at the given positions. */
1234 + byte_mode,
1235 +
1236 + /* Fold texts by characters that are at the given positions. */
1237 + character_mode,
1238 +};
1239 +
1240 +/* The argument shows current mode. (Default: column_mode) */
1241 +static enum operating_mode operating_mode;
1242 +
1243 /* If nonzero, try to break on whitespace. */
1244 static bool break_spaces;
1245
1246 -/* If nonzero, count bytes, not column positions. */
1247 -static bool count_bytes;
1248 -
1249 /* If nonzero, at least one of the files we read was standard input. */
1250 static bool have_read_stdin;
1251
1252 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1253 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1254
1255 static struct option const longopts[] =
1256 {
1257 {"bytes", no_argument, NULL, 'b'},
1258 + {"characters", no_argument, NULL, 'c'},
1259 {"spaces", no_argument, NULL, 's'},
1260 {"width", required_argument, NULL, 'w'},
1261 {GETOPT_HELP_OPTION_DECL},
1262 @@ -81,6 +124,7 @@
1263 "), stdout);
1264 fputs (_("\
1265 -b, --bytes count bytes rather than columns\n\
1266 + -c, --characters count characters rather than columns\n\
1267 -s, --spaces break at spaces\n\
1268 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1269 "), stdout);
1270 @@ -98,7 +142,7 @@
1271 static size_t
1272 adjust_column (size_t column, char c)
1273 {
1274 - if (!count_bytes)
1275 + if (operating_mode != byte_mode)
1276 {
1277 if (c == '\b')
1278 {
1279 @@ -121,30 +165,14 @@
1280 to stdout, with maximum line length WIDTH.
1281 Return true if successful. */
1282
1283 -static bool
1284 -fold_file (char const *filename, size_t width)
1285 +static void
1286 +fold_text (FILE *istream, size_t width, int *saved_errno)
1287 {
1288 - FILE *istream;
1289 int c;
1290 size_t column = 0; /* Screen column where next char will go. */
1291 size_t offset_out = 0; /* Index in `line_out' for next char. */
1292 static char *line_out = NULL;
1293 static size_t allocated_out = 0;
1294 - int saved_errno;
1295 -
1296 - if (STREQ (filename, "-"))
1297 - {
1298 - istream = stdin;
1299 - have_read_stdin = true;
1300 - }
1301 - else
1302 - istream = fopen (filename, "r");
1303 -
1304 - if (istream == NULL)
1305 - {
1306 - error (0, errno, "%s", filename);
1307 - return false;
1308 - }
1309
1310 while ((c = getc (istream)) != EOF)
1311 {
1312 @@ -172,6 +200,15 @@
1313 bool found_blank = false;
1314 size_t logical_end = offset_out;
1315
1316 + /* If LINE_OUT has no wide character,
1317 + put a new wide character in LINE_OUT
1318 + if column is bigger than width. */
1319 + if (offset_out == 0)
1320 + {
1321 + line_out[offset_out++] = c;
1322 + continue;
1323 + }
1324 +
1325 /* Look for the last blank. */
1326 while (logical_end)
1327 {
1328 @@ -218,11 +255,222 @@
1329 line_out[offset_out++] = c;
1330 }
1331
1332 - saved_errno = errno;
1333 + *saved_errno = errno;
1334 +
1335 + if (offset_out)
1336 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1337 +
1338 +}
1339 +
1340 +#if HAVE_MBRTOWC
1341 +static void
1342 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1343 +{
1344 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1345 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1346 + char *bufpos = NULL; /* Next read position of BUF. */
1347 + wint_t wc; /* A gotten wide character. */
1348 + size_t mblength; /* The byte size of a multibyte character which shows
1349 + as same character as WC. */
1350 + mbstate_t state, state_bak; /* State of the stream. */
1351 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
1352 +
1353 + static char *line_out = NULL;
1354 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1355 + static size_t allocated_out = 0;
1356 +
1357 + int increment;
1358 + size_t column = 0;
1359 +
1360 + size_t last_blank_pos;
1361 + size_t last_blank_column;
1362 + int is_blank_seen;
1363 + int last_blank_increment = 0;
1364 + int is_bs_following_last_blank;
1365 + size_t bs_following_last_blank_num;
1366 + int is_cr_after_last_blank;
1367 +
1368 +#define CLEAR_FLAGS \
1369 + do \
1370 + { \
1371 + last_blank_pos = 0; \
1372 + last_blank_column = 0; \
1373 + is_blank_seen = 0; \
1374 + is_bs_following_last_blank = 0; \
1375 + bs_following_last_blank_num = 0; \
1376 + is_cr_after_last_blank = 0; \
1377 + } \
1378 + while (0)
1379 +
1380 +#define START_NEW_LINE \
1381 + do \
1382 + { \
1383 + putchar ('\n'); \
1384 + column = 0; \
1385 + offset_out = 0; \
1386 + CLEAR_FLAGS; \
1387 + } \
1388 + while (0)
1389 +
1390 + CLEAR_FLAGS;
1391 + memset (&state, '\0', sizeof(mbstate_t));
1392 +
1393 + for (;; bufpos += mblength, buflen -= mblength)
1394 + {
1395 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1396 + {
1397 + memmove (buf, bufpos, buflen);
1398 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1399 + bufpos = buf;
1400 + }
1401 +
1402 + if (buflen < 1)
1403 + break;
1404 +
1405 + /* Get a wide character. */
1406 + convfail = 0;
1407 + state_bak = state;
1408 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1409 +
1410 + switch (mblength)
1411 + {
1412 + case (size_t)-1:
1413 + case (size_t)-2:
1414 + convfail++;
1415 + state = state_bak;
1416 + /* Fall through. */
1417 +
1418 + case 0:
1419 + mblength = 1;
1420 + break;
1421 + }
1422 +
1423 +rescan:
1424 + if (operating_mode == byte_mode) /* byte mode */
1425 + increment = mblength;
1426 + else if (operating_mode == character_mode) /* character mode */
1427 + increment = 1;
1428 + else /* column mode */
1429 + {
1430 + if (convfail)
1431 + increment = 1;
1432 + else
1433 + {
1434 + switch (wc)
1435 + {
1436 + case L'\n':
1437 + fwrite (line_out, sizeof(char), offset_out, stdout);
1438 + START_NEW_LINE;
1439 + continue;
1440 +
1441 + case L'\b':
1442 + increment = (column > 0) ? -1 : 0;
1443 + break;
1444 +
1445 + case L'\r':
1446 + increment = -1 * column;
1447 + break;
1448 +
1449 + case L'\t':
1450 + increment = 8 - column % 8;
1451 + break;
1452 +
1453 + default:
1454 + increment = wcwidth (wc);
1455 + increment = (increment < 0) ? 0 : increment;
1456 + }
1457 + }
1458 + }
1459 +
1460 + if (column + increment > width && break_spaces && last_blank_pos)
1461 + {
1462 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1463 + putchar ('\n');
1464 +
1465 + offset_out = offset_out - last_blank_pos;
1466 + column = column - last_blank_column + ((is_cr_after_last_blank)
1467 + ? last_blank_increment : bs_following_last_blank_num);
1468 + memmove (line_out, line_out + last_blank_pos, offset_out);
1469 + CLEAR_FLAGS;
1470 + goto rescan;
1471 + }
1472 +
1473 + if (column + increment > width && column != 0)
1474 + {
1475 + fwrite (line_out, sizeof(char), offset_out, stdout);
1476 + START_NEW_LINE;
1477 + goto rescan;
1478 + }
1479 +
1480 + if (allocated_out < offset_out + mblength)
1481 + {
1482 + line_out = X2REALLOC (line_out, &allocated_out);
1483 + }
1484 +
1485 + memcpy (line_out + offset_out, bufpos, mblength);
1486 + offset_out += mblength;
1487 + column += increment;
1488 +
1489 + if (is_blank_seen && !convfail && wc == L'\r')
1490 + is_cr_after_last_blank = 1;
1491 +
1492 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1493 + ++bs_following_last_blank_num;
1494 + else
1495 + is_bs_following_last_blank = 0;
1496 +
1497 + if (break_spaces && !convfail && iswblank (wc))
1498 + {
1499 + last_blank_pos = offset_out;
1500 + last_blank_column = column;
1501 + is_blank_seen = 1;
1502 + last_blank_increment = increment;
1503 + is_bs_following_last_blank = 1;
1504 + bs_following_last_blank_num = 0;
1505 + is_cr_after_last_blank = 0;
1506 + }
1507 + }
1508 +
1509 + *saved_errno = errno;
1510
1511 if (offset_out)
1512 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1513
1514 +}
1515 +#endif
1516 +
1517 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1518 + to stdout, with maximum line length WIDTH.
1519 + Return 0 if successful, 1 if an error occurs. */
1520 +
1521 +static bool
1522 +fold_file (char *filename, size_t width)
1523 +{
1524 + FILE *istream;
1525 + int saved_errno;
1526 +
1527 + if (STREQ (filename, "-"))
1528 + {
1529 + istream = stdin;
1530 + have_read_stdin = 1;
1531 + }
1532 + else
1533 + istream = fopen (filename, "r");
1534 +
1535 + if (istream == NULL)
1536 + {
1537 + error (0, errno, "%s", filename);
1538 + return 1;
1539 + }
1540 +
1541 + /* Define how ISTREAM is being folded. */
1542 +#if HAVE_MBRTOWC
1543 + if (MB_CUR_MAX > 1)
1544 + fold_multibyte_text (istream, width, &saved_errno);
1545 + else
1546 +#endif
1547 + fold_text (istream, width, &saved_errno);
1548 +
1549 if (ferror (istream))
1550 {
1551 error (0, saved_errno, "%s", filename);
1552 @@ -255,7 +506,8 @@
1553
1554 atexit (close_stdout);
1555
1556 - break_spaces = count_bytes = have_read_stdin = false;
1557 + operating_mode = column_mode;
1558 + break_spaces = have_read_stdin = false;
1559
1560 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1561 {
1562 @@ -264,7 +516,15 @@
1563 switch (optc)
1564 {
1565 case 'b': /* Count bytes rather than columns. */
1566 - count_bytes = true;
1567 + if (operating_mode != column_mode)
1568 + FATAL_ERROR (_("only one way of folding may be specified"));
1569 + operating_mode = byte_mode;
1570 + break;
1571 +
1572 + case 'c':
1573 + if (operating_mode != column_mode)
1574 + FATAL_ERROR (_("only one way of folding may be specified"));
1575 + operating_mode = character_mode;
1576 break;
1577
1578 case 's': /* Break at word boundaries. */
1579 --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1580 +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1581 @@ -23,10 +23,19 @@
1582
1583 #include <config.h>
1584
1585 +#include <assert.h>
1586 #include <getopt.h>
1587 #include <sys/types.h>
1588 #include <sys/wait.h>
1589 #include <signal.h>
1590 +#if HAVE_WCHAR_H
1591 +# include <wchar.h>
1592 +#endif
1593 +/* Get isw* functions. */
1594 +#if HAVE_WCTYPE_H
1595 +# include <wctype.h>
1596 +#endif
1597 +
1598 #include "system.h"
1599 #include "argmatch.h"
1600 #include "error.h"
1601 @@ -116,14 +125,38 @@
1602 /* Thousands separator; if -1, then there isn't one. */
1603 static int thousands_sep;
1604
1605 +static int force_general_numcompare = 0;
1606 +
1607 /* Nonzero if the corresponding locales are hard. */
1608 static bool hard_LC_COLLATE;
1609 -#if HAVE_NL_LANGINFO
1610 +#if HAVE_LANGINFO_CODESET
1611 static bool hard_LC_TIME;
1612 #endif
1613
1614 #define NONZERO(x) ((x) != 0)
1615
1616 +/* get a multibyte character's byte length. */
1617 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1618 + do \
1619 + { \
1620 + wchar_t wc; \
1621 + mbstate_t state_bak; \
1622 + \
1623 + state_bak = STATE; \
1624 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1625 + \
1626 + switch (MBLENGTH) \
1627 + { \
1628 + case (size_t)-1: \
1629 + case (size_t)-2: \
1630 + STATE = state_bak; \
1631 + /* Fall through. */ \
1632 + case 0: \
1633 + MBLENGTH = 1; \
1634 + } \
1635 + } \
1636 + while (0)
1637 +
1638 /* The kind of blanks for '-b' to skip in various options. */
1639 enum blanktype { bl_start, bl_end, bl_both };
1640
1641 @@ -261,13 +294,11 @@
1642 they were read if all keys compare equal. */
1643 static bool stable;
1644
1645 -/* If TAB has this value, blanks separate fields. */
1646 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
1647 -
1648 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
1649 +/* Tab character separating fields. If tab_length is 0, then fields are
1650 separated by the empty string between a non-blank character and a blank
1651 character. */
1652 -static int tab = TAB_DEFAULT;
1653 +static char tab[MB_LEN_MAX + 1];
1654 +static size_t tab_length = 0;
1655
1656 /* Flag to remove consecutive duplicate lines from the output.
1657 Only the last of a sequence of equal lines will be output. */
1658 @@ -639,6 +670,44 @@
1659 update_proc (pid);
1660 }
1661
1662 +/* Function pointers. */
1663 +static void
1664 +(*inittables) (void);
1665 +static char *
1666 +(*begfield) (const struct line*, const struct keyfield *);
1667 +static char *
1668 +(*limfield) (const struct line*, const struct keyfield *);
1669 +static int
1670 +(*getmonth) (char const *, size_t);
1671 +static int
1672 +(*keycompare) (const struct line *, const struct line *);
1673 +static int
1674 +(*numcompare) (const char *, const char *);
1675 +
1676 +/* Test for white space multibyte character.
1677 + Set LENGTH the byte length of investigated multibyte character. */
1678 +#if HAVE_MBRTOWC
1679 +static int
1680 +ismbblank (const char *str, size_t len, size_t *length)
1681 +{
1682 + size_t mblength;
1683 + wchar_t wc;
1684 + mbstate_t state;
1685 +
1686 + memset (&state, '\0', sizeof(mbstate_t));
1687 + mblength = mbrtowc (&wc, str, len, &state);
1688 +
1689 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1690 + {
1691 + *length = 1;
1692 + return 0;
1693 + }
1694 +
1695 + *length = (mblength < 1) ? 1 : mblength;
1696 + return iswblank (wc);
1697 +}
1698 +#endif
1699 +
1700 /* Clean up any remaining temporary files. */
1701
1702 static void
1703 @@ -978,7 +1047,7 @@
1704 free (node);
1705 }
1706
1707 -#if HAVE_NL_LANGINFO
1708 +#if HAVE_LANGINFO_CODESET
1709
1710 static int
1711 struct_month_cmp (const void *m1, const void *m2)
1712 @@ -993,7 +1062,7 @@
1713 /* Initialize the character class tables. */
1714
1715 static void
1716 -inittables (void)
1717 +inittables_uni (void)
1718 {
1719 size_t i;
1720
1721 @@ -1005,7 +1074,7 @@
1722 fold_toupper[i] = toupper (i);
1723 }
1724
1725 -#if HAVE_NL_LANGINFO
1726 +#if HAVE_LANGINFO_CODESET
1727 /* If we're not in the "C" locale, read different names for months. */
1728 if (hard_LC_TIME)
1729 {
1730 @@ -1031,6 +1100,64 @@
1731 xstrtol_fatal (e, oi, c, long_options, s);
1732 }
1733
1734 +#if HAVE_MBRTOWC
1735 +static void
1736 +inittables_mb (void)
1737 +{
1738 + int i, j, k, l;
1739 + char *name, *s;
1740 + size_t s_len, mblength;
1741 + char mbc[MB_LEN_MAX];
1742 + wchar_t wc, pwc;
1743 + mbstate_t state_mb, state_wc;
1744 +
1745 + for (i = 0; i < MONTHS_PER_YEAR; i++)
1746 + {
1747 + s = (char *) nl_langinfo (ABMON_1 + i);
1748 + s_len = strlen (s);
1749 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
1750 + monthtab[i].val = i + 1;
1751 +
1752 + memset (&state_mb, '\0', sizeof (mbstate_t));
1753 + memset (&state_wc, '\0', sizeof (mbstate_t));
1754 +
1755 + for (j = 0; j < s_len;)
1756 + {
1757 + if (!ismbblank (s + j, s_len - j, &mblength))
1758 + break;
1759 + j += mblength;
1760 + }
1761 +
1762 + for (k = 0; j < s_len;)
1763 + {
1764 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1765 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1766 + if (mblength == 0)
1767 + break;
1768 +
1769 + pwc = towupper (wc);
1770 + if (pwc == wc)
1771 + {
1772 + memcpy (mbc, s + j, mblength);
1773 + j += mblength;
1774 + }
1775 + else
1776 + {
1777 + j += mblength;
1778 + mblength = wcrtomb (mbc, pwc, &state_wc);
1779 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
1780 + }
1781 +
1782 + for (l = 0; l < mblength; l++)
1783 + name[k++] = mbc[l];
1784 + }
1785 + name[k] = '\0';
1786 + }
1787 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
1788 + sizeof (struct month), struct_month_cmp);
1789 +}
1790 +#endif
1791 +
1792 /* Specify the amount of main memory to use when sorting. */
1793 static void
1794 specify_sort_size (int oi, char c, char const *s)
1795 @@ -1241,7 +1368,7 @@
1796 by KEY in LINE. */
1797
1798 static char *
1799 -begfield (const struct line *line, const struct keyfield *key)
1800 +begfield_uni (const struct line *line, const struct keyfield *key)
1801 {
1802 char *ptr = line->text, *lim = ptr + line->length - 1;
1803 size_t sword = key->sword;
1804 @@ -1251,10 +1378,10 @@
1805 /* The leading field separator itself is included in a field when -t
1806 is absent. */
1807
1808 - if (tab != TAB_DEFAULT)
1809 + if (tab_length)
1810 while (ptr < lim && sword--)
1811 {
1812 - while (ptr < lim && *ptr != tab)
1813 + while (ptr < lim && *ptr != tab[0])
1814 ++ptr;
1815 if (ptr < lim)
1816 ++ptr;
1817 @@ -1282,11 +1409,70 @@
1818 return ptr;
1819 }
1820
1821 +#if HAVE_MBRTOWC
1822 +static char *
1823 +begfield_mb (const struct line *line, const struct keyfield *key)
1824 +{
1825 + int i;
1826 + char *ptr = line->text, *lim = ptr + line->length - 1;
1827 + size_t sword = key->sword;
1828 + size_t schar = key->schar;
1829 + size_t mblength;
1830 + mbstate_t state;
1831 +
1832 + memset (&state, '\0', sizeof(mbstate_t));
1833 +
1834 + if (tab_length)
1835 + while (ptr < lim && sword--)
1836 + {
1837 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1838 + {
1839 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1840 + ptr += mblength;
1841 + }
1842 + if (ptr < lim)
1843 + {
1844 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1845 + ptr += mblength;
1846 + }
1847 + }
1848 + else
1849 + while (ptr < lim && sword--)
1850 + {
1851 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1852 + ptr += mblength;
1853 + if (ptr < lim)
1854 + {
1855 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1856 + ptr += mblength;
1857 + }
1858 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1859 + ptr += mblength;
1860 + }
1861 +
1862 + if (key->skipsblanks)
1863 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1864 + ptr += mblength;
1865 +
1866 + for (i = 0; i < schar; i++)
1867 + {
1868 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1869 +
1870 + if (ptr + mblength > lim)
1871 + break;
1872 + else
1873 + ptr += mblength;
1874 + }
1875 +
1876 + return ptr;
1877 +}
1878 +#endif
1879 +
1880 /* Return the limit of (a pointer to the first character after) the field
1881 in LINE specified by KEY. */
1882
1883 static char *
1884 -limfield (const struct line *line, const struct keyfield *key)
1885 +limfield_uni (const struct line *line, const struct keyfield *key)
1886 {
1887 char *ptr = line->text, *lim = ptr + line->length - 1;
1888 size_t eword = key->eword, echar = key->echar;
1889 @@ -1299,10 +1485,10 @@
1890 `beginning' is the first character following the delimiting TAB.
1891 Otherwise, leave PTR pointing at the first `blank' character after
1892 the preceding field. */
1893 - if (tab != TAB_DEFAULT)
1894 + if (tab_length)
1895 while (ptr < lim && eword--)
1896 {
1897 - while (ptr < lim && *ptr != tab)
1898 + while (ptr < lim && *ptr != tab[0])
1899 ++ptr;
1900 if (ptr < lim && (eword | echar))
1901 ++ptr;
1902 @@ -1348,10 +1534,10 @@
1903 */
1904
1905 /* Make LIM point to the end of (one byte past) the current field. */
1906 - if (tab != TAB_DEFAULT)
1907 + if (tab_length)
1908 {
1909 char *newlim;
1910 - newlim = memchr (ptr, tab, lim - ptr);
1911 + newlim = memchr (ptr, tab[0], lim - ptr);
1912 if (newlim)
1913 lim = newlim;
1914 }
1915 @@ -1384,6 +1570,113 @@
1916 return ptr;
1917 }
1918
1919 +#if HAVE_MBRTOWC
1920 +static char *
1921 +limfield_mb (const struct line *line, const struct keyfield *key)
1922 +{
1923 + char *ptr = line->text, *lim = ptr + line->length - 1;
1924 + size_t eword = key->eword, echar = key->echar;
1925 + int i;
1926 + size_t mblength;
1927 + mbstate_t state;
1928 +
1929 + if (echar == 0)
1930 + eword++; /* skip all of end field. */
1931 +
1932 + memset (&state, '\0', sizeof(mbstate_t));
1933 +
1934 + if (tab_length)
1935 + while (ptr < lim && eword--)
1936 + {
1937 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1938 + {
1939 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1940 + ptr += mblength;
1941 + }
1942 + if (ptr < lim && (eword | echar))
1943 + {
1944 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1945 + ptr += mblength;
1946 + }
1947 + }
1948 + else
1949 + while (ptr < lim && eword--)
1950 + {
1951 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1952 + ptr += mblength;
1953 + if (ptr < lim)
1954 + {
1955 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1956 + ptr += mblength;
1957 + }
1958 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1959 + ptr += mblength;
1960 + }
1961 +
1962 +
1963 +# ifdef POSIX_UNSPECIFIED
1964 + /* Make LIM point to the end of (one byte past) the current field. */
1965 + if (tab_length)
1966 + {
1967 + char *newlim, *p;
1968 +
1969 + newlim = NULL;
1970 + for (p = ptr; p < lim;)
1971 + {
1972 + if (memcmp (p, tab, tab_length) == 0)
1973 + {
1974 + newlim = p;
1975 + break;
1976 + }
1977 +
1978 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1979 + p += mblength;
1980 + }
1981 + }
1982 + else
1983 + {
1984 + char *newlim;
1985 + newlim = ptr;
1986 +
1987 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
1988 + newlim += mblength;
1989 + if (ptr < lim)
1990 + {
1991 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1992 + ptr += mblength;
1993 + }
1994 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
1995 + newlim += mblength;
1996 + lim = newlim;
1997 + }
1998 +# endif
1999 +
2000 + if (echar != 0)
2001 + {
2002 + /* If we're skipping leading blanks, don't start counting characters
2003 + * until after skipping past any leading blanks. */
2004 + if (key->skipsblanks)
2005 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2006 + ptr += mblength;
2007 +
2008 + memset (&state, '\0', sizeof(mbstate_t));
2009 +
2010 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2011 + for (i = 0; i < echar; i++)
2012 + {
2013 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2014 +
2015 + if (ptr + mblength > lim)
2016 + break;
2017 + else
2018 + ptr += mblength;
2019 + }
2020 + }
2021 +
2022 + return ptr;
2023 +}
2024 +#endif
2025 +
2026 /* Fill BUF reading from FP, moving buf->left bytes from the end
2027 of buf->buf to the beginning first. If EOF is reached and the
2028 file wasn't terminated by a newline, supply one. Set up BUF's line
2029 @@ -1466,8 +1753,24 @@
2030 else
2031 {
2032 if (key->skipsblanks)
2033 - while (blanks[to_uchar (*line_start)])
2034 - line_start++;
2035 + {
2036 +#if HAVE_MBRTOWC
2037 + if (MB_CUR_MAX > 1)
2038 + {
2039 + size_t mblength;
2040 + mbstate_t state;
2041 + memset (&state, '\0', sizeof(mbstate_t));
2042 + while (line_start < line->keylim &&
2043 + ismbblank (line_start,
2044 + line->keylim - line_start,
2045 + &mblength))
2046 + line_start += mblength;
2047 + }
2048 + else
2049 +#endif
2050 + while (blanks[to_uchar (*line_start)])
2051 + line_start++;
2052 + }
2053 line->keybeg = line_start;
2054 }
2055 }
2056 @@ -1500,7 +1803,7 @@<