/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.40 - (show annotations) (download) (as text)
Tue Oct 6 13:43:24 2009 UTC (6 weeks, 5 days ago) by ovasik
Branch: MAIN
Changes since 1.39: +2 -1 lines
File MIME type: text/x-patch
New upstream release 8.0 (beta), defuzz patches, remove applied patches
1 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2 --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3 +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4 @@ -26,7 +26,7 @@
5 my $prog = 'cut';
6 my $try = "Try \`$prog --help' for more information.\n";
7 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8 -my $inval = "$prog: invalid byte or field list\n$try";
9 +my $inval = "$prog: invalid byte, character or field list\n$try";
10 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11
12 my @Tests =
13 @@ -140,8 +140,8 @@
14 ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15
16 # None of the following invalid ranges provoked an error up to coreutils-6.9.
17 - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18 - {ERR=>"$prog: invalid decreasing range\n$try"}],
19 + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 @@ -0,0 +1,58 @@
27 +#! /bin/sh
28 +case $# in
29 + 0) xx='../src/sort';;
30 + *) xx="$1";;
31 +esac
32 +test "$VERBOSE" && echo=echo || echo=:
33 +$echo testing program: $xx
34 +errors=0
35 +test "$srcdir" || srcdir=.
36 +test "$VERBOSE" && $xx --version 2> /dev/null
37 +
38 +export LC_ALL=en_US.UTF-8
39 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40 +errors=0
41 +
42 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 +code=$?
44 +if test $code != 0; then
45 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46 + errors=`expr $errors + 1`
47 +else
48 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 + case $? in
50 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 + errors=`expr $errors + 1`;;
54 + 2) $echo "Test mb1 may have failed." 1>&2
55 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 + errors=`expr $errors + 1`;;
57 + esac
58 +fi
59 +
60 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 +code=$?
62 +if test $code != 0; then
63 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64 + errors=`expr $errors + 1`
65 +else
66 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 + case $? in
68 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 + errors=`expr $errors + 1`;;
72 + 2) $echo "Test mb2 may have failed." 1>&2
73 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 + errors=`expr $errors + 1`;;
75 + esac
76 +fi
77 +
78 +if test $errors = 0; then
79 + $echo Passed all 113 tests. 1>&2
80 +else
81 + $echo Failed $errors tests. 1>&2
82 +fi
83 +test $errors = 0 || errors=1
84 +exit $errors
85 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 @@ -0,0 +1,4 @@
88 +Apple@AA10@@20
89 +Banana@AA5@@30
90 +Citrus@AA20@@5
91 +Cherry@AA30@@10
92 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 @@ -0,0 +1,4 @@
95 +Citrus@AA20@@5
96 +Cherry@AA30@@10
97 +Apple@AA10@@20
98 +Banana@AA5@@30
99 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 @@ -0,0 +1,4 @@
102 +Apple@10
103 +Banana@5
104 +Citrus@20
105 +Cherry@30
106 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 @@ -0,0 +1,4 @@
109 +Banana@5
110 +Apple@10
111 +Citrus@20
112 +Cherry@30
113 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114 --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115 +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 @@ -192,6 +192,7 @@
117 misc/sort-compress \
118 misc/sort-continue \
119 misc/sort-files0-from \
120 + misc/sort-mb-tests \
121 misc/sort-merge \
122 misc/sort-merge-fdlimit \
123 misc/sort-rand \
124 @@ -391,6 +392,10 @@
125 $(root_tests)
126
127 pr_data = \
128 + misc/mb1.X \
129 + misc/mb1.I \
130 + misc/mb2.X \
131 + misc/mb2.I \
132 pr/0F \
133 pr/0FF \
134 pr/0FFnt \
135 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136 +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 @@ -22,6 +22,11 @@
138
139 # include <stdio.h>
140
141 +/* Get mbstate_t. */
142 +# if HAVE_WCHAR_H
143 +# include <wchar.h>
144 +# endif
145 +
146 /* A `struct linebuffer' holds a line of text. */
147
148 struct linebuffer
149 @@ -29,6 +34,9 @@
150 size_t size; /* Allocated. */
151 size_t length; /* Used. */
152 char *buffer;
153 +# if HAVE_WCHAR_H
154 + mbstate_t state;
155 +# endif
156 };
157
158 /* Initialize linebuffer LINEBUFFER for use. */
159 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160 +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 @@ -38,11 +38,28 @@
162 #include <stdio.h>
163 #include <getopt.h>
164 #include <sys/types.h>
165 +
166 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 +#if HAVE_WCHAR_H
168 +# include <wchar.h>
169 +#endif
170 +
171 #include "system.h"
172 #include "error.h"
173 #include "quote.h"
174 #include "xstrndup.h"
175
176 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 + installation; work around this configuration error. */
178 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179 +# define MB_LEN_MAX 16
180 +#endif
181 +
182 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183 +#if HAVE_MBRTOWC && defined mbstate_t
184 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185 +#endif
186 +
187 /* The official name of this program (e.g., no `g' prefix). */
188 #define PROGRAM_NAME "expand"
189
190 @@ -365,6 +383,142 @@
191 }
192 }
193
194 +#if HAVE_MBRTOWC
195 +static void
196 +expand_multibyte (void)
197 +{
198 + FILE *fp; /* Input strem. */
199 + mbstate_t i_state; /* Current shift state of the input stream. */
200 + mbstate_t i_state_bak; /* Back up the I_STATE. */
201 + mbstate_t o_state; /* Current shift state of the output stream. */
202 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
203 + char *bufpos; /* Next read position of BUF. */
204 + size_t buflen = 0; /* The length of the byte sequence in buf. */
205 + wchar_t wc; /* A gotten wide character. */
206 + size_t mblength; /* The byte size of a multibyte character
207 + which shows as same character as WC. */
208 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
209 + int column = 0; /* Column on screen of the next char. */
210 + int next_tab_column; /* Column the next tab stop is on. */
211 + int convert = 1; /* If nonzero, perform translations. */
212 +
213 + fp = next_file ((FILE *) NULL);
214 + if (fp == NULL)
215 + return;
216 +
217 + memset (&o_state, '\0', sizeof(mbstate_t));
218 + memset (&i_state, '\0', sizeof(mbstate_t));
219 +
220 + for (;;)
221 + {
222 + /* Refill the buffer BUF. */
223 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
224 + {
225 + memmove (buf, bufpos, buflen);
226 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
227 + bufpos = buf;
228 + }
229 +
230 + /* No character is left in BUF. */
231 + if (buflen < 1)
232 + {
233 + fp = next_file (fp);
234 +
235 + if (fp == NULL)
236 + break; /* No more files. */
237 + else
238 + {
239 + memset (&i_state, '\0', sizeof(mbstate_t));
240 + continue;
241 + }
242 + }
243 +
244 + /* Get a wide character. */
245 + i_state_bak = i_state;
246 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
247 +
248 + switch (mblength)
249 + {
250 + case (size_t)-1: /* illegal byte sequence. */
251 + case (size_t)-2:
252 + mblength = 1;
253 + i_state = i_state_bak;
254 + if (convert)
255 + {
256 + ++column;
257 + if (convert_entire_line == 0)
258 + convert = 0;
259 + }
260 + putchar (*bufpos);
261 + break;
262 +
263 + case 0: /* null. */
264 + mblength = 1;
265 + if (convert && convert_entire_line == 0)
266 + convert = 0;
267 + putchar ('\0');
268 + break;
269 +
270 + default:
271 + if (wc == L'\n') /* LF. */
272 + {
273 + tab_index = 0;
274 + column = 0;
275 + convert = 1;
276 + putchar ('\n');
277 + }
278 + else if (wc == L'\t' && convert) /* Tab. */
279 + {
280 + if (tab_size == 0)
281 + {
282 + /* Do not let tab_index == first_free_tab;
283 + stop when it is 1 less. */
284 + while (tab_index < first_free_tab - 1
285 + && column >= tab_list[tab_index])
286 + tab_index++;
287 + next_tab_column = tab_list[tab_index];
288 + if (tab_index < first_free_tab - 1)
289 + tab_index++;
290 + if (column >= next_tab_column)
291 + next_tab_column = column + 1;
292 + }
293 + else
294 + next_tab_column = column + tab_size - column % tab_size;
295 +
296 + while (column < next_tab_column)
297 + {
298 + putchar (' ');
299 + ++column;
300 + }
301 + }
302 + else /* Others. */
303 + {
304 + if (convert)
305 + {
306 + if (wc == L'\b')
307 + {
308 + if (column > 0)
309 + --column;
310 + }
311 + else
312 + {
313 + int width; /* The width of WC. */
314 +
315 + width = wcwidth (wc);
316 + column += (width > 0) ? width : 0;
317 + if (convert_entire_line == 0)
318 + convert = 0;
319 + }
320 + }
321 + fwrite (bufpos, sizeof(char), mblength, stdout);
322 + }
323 + }
324 + buflen -= mblength;
325 + bufpos += mblength;
326 + }
327 +}
328 +#endif
329 +
330 int
331 main (int argc, char **argv)
332 {
333 @@ -429,7 +583,12 @@
334
335 file_list = (optind < argc ? &argv[optind] : stdin_argv);
336
337 - expand ();
338 +#if HAVE_MBRTOWC
339 + if (MB_CUR_MAX > 1)
340 + expand_multibyte ();
341 + else
342 +#endif
343 + expand ();
344
345 if (have_read_stdin && fclose (stdin) != 0)
346 error (EXIT_FAILURE, errno, "-");
347 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
348 +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
349 @@ -23,17 +23,31 @@
350 #include <sys/types.h>
351 #include <getopt.h>
352
353 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
354 +#if HAVE_WCHAR_H
355 +# include <wchar.h>
356 +#endif
357 +
358 +/* Get iswblank(), towupper. */
359 +#if HAVE_WCTYPE_H
360 +# include <wctype.h>
361 +#endif
362 +
363 #include "system.h"
364 #include "error.h"
365 #include "hard-locale.h"
366 #include "linebuffer.h"
367 -#include "memcasecmp.h"
368 #include "quote.h"
369 #include "stdio--.h"
370 #include "xmemcoll.h"
371 #include "xstrtol.h"
372 #include "argmatch.h"
373
374 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
375 +#if HAVE_MBRTOWC && defined mbstate_t
376 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
377 +#endif
378 +
379 /* The official name of this program (e.g., no `g' prefix). */
380 #define PROGRAM_NAME "join"
381
382 @@ -104,10 +118,12 @@
383 /* Last element in `outlist', where a new element can be added. */
384 static struct outlist *outlist_end = &outlist_head;
385
386 -/* Tab character separating fields. If negative, fields are separated
387 - by any nonempty string of blanks, otherwise by exactly one
388 - tab character whose value (when cast to unsigned char) equals TAB. */
389 -static int tab = -1;
390 +/* Tab character separating fields. If NULL, fields are separated
391 + by any nonempty string of blanks. */
392 +static char *tab = NULL;
393 +
394 +/* The number of bytes used for tab. */
395 +static size_t tablen = 0;
396
397 /* If nonzero, check that the input is correctly ordered. */
398 static enum
399 @@ -199,10 +217,11 @@
400 if (ptr == lim)
401 return;
402
403 - if (0 <= tab)
404 + if (tab != NULL)
405 {
406 + unsigned char t = tab[0];
407 char *sep;
408 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
409 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
410 extract_field (line, ptr, sep - ptr);
411 }
412 else
413 @@ -229,6 +248,148 @@
414 extract_field (line, ptr, lim - ptr);
415 }
416
417 +#if HAVE_MBRTOWC
418 +static void
419 +xfields_multibyte (struct line *line)
420 +{
421 + char *ptr = line->buf.buffer;
422 + char const *lim = ptr + line->buf.length - 1;
423 + wchar_t wc = 0;
424 + size_t mblength = 1;
425 + mbstate_t state, state_bak;
426 +
427 + memset (&state, 0, sizeof (mbstate_t));
428 +
429 + if (ptr >= lim)
430 + return;
431 +
432 + if (tab != NULL)
433 + {
434 + unsigned char t = tab[0];
435 + char *sep = ptr;
436 + for (; ptr < lim; ptr = sep + mblength)
437 + {
438 + sep = ptr;
439 + while (sep < lim)
440 + {
441 + state_bak = state;
442 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
443 +
444 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
445 + {
446 + mblength = 1;
447 + state = state_bak;
448 + }
449 + mblength = (mblength < 1) ? 1 : mblength;
450 +
451 + if (mblength == tablen && !memcmp (sep, tab, mblength))
452 + break;
453 + else
454 + {
455 + sep += mblength;
456 + continue;
457 + }
458 + }
459 +
460 + if (sep >= lim)
461 + break;
462 +
463 + extract_field (line, ptr, sep - ptr);
464 + }
465 + }
466 + else
467 + {
468 + /* Skip leading blanks before the first field. */
469 + while(ptr < lim)
470 + {
471 + state_bak = state;
472 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
473 +
474 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
475 + {
476 + mblength = 1;
477 + state = state_bak;
478 + break;
479 + }
480 + mblength = (mblength < 1) ? 1 : mblength;
481 +
482 + if (!iswblank(wc))
483 + break;
484 + ptr += mblength;
485 + }
486 +
487 + do
488 + {
489 + char *sep;
490 + state_bak = state;
491 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
492 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
493 + {
494 + mblength = 1;
495 + state = state_bak;
496 + break;
497 + }
498 + mblength = (mblength < 1) ? 1 : mblength;
499 +
500 + sep = ptr + mblength;
501 + while (sep < lim)
502 + {
503 + state_bak = state;
504 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
505 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
506 + {
507 + mblength = 1;
508 + state = state_bak;
509 + break;
510 + }
511 + mblength = (mblength < 1) ? 1 : mblength;
512 +
513 + if (iswblank (wc))
514 + break;
515 +
516 + sep += mblength;
517 + }
518 +
519 + extract_field (line, ptr, sep - ptr);
520 + if (sep >= lim)
521 + return;
522 +
523 + state_bak = state;
524 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
525 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
526 + {
527 + mblength = 1;
528 + state = state_bak;
529 + break;
530 + }
531 + mblength = (mblength < 1) ? 1 : mblength;
532 +
533 + ptr = sep + mblength;
534 + while (ptr < lim)
535 + {
536 + state_bak = state;
537 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
538 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
539 + {
540 + mblength = 1;
541 + state = state_bak;
542 + break;
543 + }
544 + mblength = (mblength < 1) ? 1 : mblength;
545 +
546 + if (!iswblank (wc))
547 + break;
548 +
549 + ptr += mblength;
550 + }
551 + }
552 + while (ptr < lim);
553 + }
554 +
555 + extract_field (line, ptr, lim - ptr);
556 +}
557 +#endif
558 +
559 static void
560 freeline (struct line *line)
561 {
562 @@ -377,11 +601,18 @@
563
564 /* Print the join of LINE1 and LINE2. */
565
566 +#define PUT_TAB_CHAR \
567 + do \
568 + { \
569 + (tab != NULL) ? \
570 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
571 + } \
572 + while (0)
573 +
574 static void
575 prjoin (struct line const *line1, struct line const *line2)
576 {
577 const struct outlist *outlist;
578 - char output_separator = tab < 0 ? ' ' : tab;
579
580 outlist = outlist_head.next;
581 if (outlist)
582 @@ -416,7 +647,7 @@
583 o = o->next;
584 if (o == NULL)
585 break;
586 - putchar (output_separator);
587 + PUT_TAB_CHAR;
588 }
589 putchar ('\n');
590 }
591 @@ -434,23 +665,23 @@
592 prfield (join_field_1, line1);
593 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
594 {
595 - putchar (output_separator);
596 + PUT_TAB_CHAR;
597 prfield (i, line1);
598 }
599 for (i = join_field_1 + 1; i < line1->nfields; ++i)
600 {
601 - putchar (output_separator);
602 + PUT_TAB_CHAR;
603 prfield (i, line1);
604 }
605
606 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
607 {
608 - putchar (output_separator);
609 + PUT_TAB_CHAR;
610 prfield (i, line2);
611 }
612 for (i = join_field_2 + 1; i < line2->nfields; ++i)
613 {
614 - putchar (output_separator);
615 + PUT_TAB_CHAR;
616 prfield (i, line2);
617 }
618 putchar ('\n');
619 @@ -859,20 +1090,41 @@
620
621 case 't':
622 {
623 - unsigned char newtab = optarg[0];
624 - if (! newtab)
625 + char *newtab;
626 + size_t newtablen;
627 + if (! optarg[0])
628 error (EXIT_FAILURE, 0, _("empty tab"));
629 - if (optarg[1])
630 + newtab = xstrdup (optarg);
631 +#if HAVE_MBRTOWC
632 + if (MB_CUR_MAX > 1)
633 + {
634 + mbstate_t state;
635 +
636 + memset (&state, 0, sizeof (mbstate_t));
637 + newtablen = mbrtowc (NULL, newtab,
638 + strnlen (newtab, MB_LEN_MAX),
639 + &state);
640 + if (newtablen == (size_t) 0
641 + || newtablen == (size_t) -1
642 + || newtablen == (size_t) -2)
643 + newtablen = 1;
644 + }
645 + else
646 +#endif
647 + newtablen = 1;
648 +
649 + if (newtablen == 1 && newtab[1])
650 + {
651 + if (STREQ (newtab, "\\0"))
652 + newtab[0] = '\0';
653 + }
654 + if (tab != NULL && strcmp (tab, newtab))
655 {
656 - if (STREQ (optarg, "\\0"))
657 - newtab = '\0';
658 - else
659 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
660 - quote (optarg));
661 + free (newtab);
662 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
663 }
664 - if (0 <= tab && tab != newtab)
665 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
666 tab = newtab;
667 + tablen = newtablen;
668 }
669 break;
670
671 diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
672 --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
673 +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
674 @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
675 size_t jf_1, size_t jf_2)
676 {
677 /* Start of field to compare in each file. */
678 - char *beg1;
679 - char *beg2;
680 -
681 - size_t len1;
682 - size_t len2; /* Length of fields to compare. */
683 + char *beg[2];
684 + char *copy[2];
685 + size_t len[2]; /* Length of fields to compare. */
686 int diff;
687 + int i, j;
688
689 if (jf_1 < line1->nfields)
690 {
691 - beg1 = line1->fields[jf_1].beg;
692 - len1 = line1->fields[jf_1].len;
693 + beg[0] = line1->fields[jf_1].beg;
694 + len[0] = line1->fields[jf_1].len;
695 }
696 else
697 {
698 - beg1 = NULL;
699 - len1 = 0;
700 + beg[0] = NULL;
701 + len[0] = 0;
702 }
703
704 if (jf_2 < line2->nfields)
705 {
706 - beg2 = line2->fields[jf_2].beg;
707 - len2 = line2->fields[jf_2].len;
708 + beg[1] = line2->fields[jf_2].beg;
709 + len[1] = line2->fields[jf_2].len;
710 }
711 else
712 {
713 - beg2 = NULL;
714 - len2 = 0;
715 + beg[1] = NULL;
716 + len[1] = 0;
717 }
718
719 - if (len1 == 0)
720 - return len2 == 0 ? 0 : -1;
721 - if (len2 == 0)
722 + if (len[0] == 0)
723 + return len[1] == 0 ? 0 : -1;
724 + if (len[1] == 0)
725 return 1;
726
727 if (ignore_case)
728 {
729 - /* FIXME: ignore_case does not work with NLS (in particular,
730 - with multibyte chars). */
731 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
732 +#ifdef HAVE_MBRTOWC
733 + if (MB_CUR_MAX > 1)
734 + {
735 + size_t mblength;
736 + wchar_t wc, uwc;
737 + mbstate_t state, state_bak;
738 +
739 + memset (&state, '\0', sizeof (mbstate_t));
740 +
741 + for (i = 0; i < 2; i++)
742 + {
743 + copy[i] = alloca (len[i] + 1);
744 +
745 + for (j = 0; j < MIN (len[0], len[1]);)
746 + {
747 + state_bak = state;
748 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
749 +
750 + switch (mblength)
751 + {
752 + case (size_t) -1:
753 + case (size_t) -2:
754 + state = state_bak;
755 + /* Fall through */
756 + case 0:
757 + mblength = 1;
758 + break;
759 +
760 + default:
761 + uwc = towupper (wc);
762 +
763 + if (uwc != wc)
764 + {
765 + mbstate_t state_wc;
766 +
767 + memset (&state_wc, '\0', sizeof (mbstate_t));
768 + wcrtomb (copy[i] + j, uwc, &state_wc);
769 + }
770 + else
771 + memcpy (copy[i] + j, beg[i] + j, mblength);
772 + }
773 + j += mblength;
774 + }
775 + copy[i][j] = '\0';
776 + }
777 + }
778 + else
779 +#endif
780 + {
781 + for (i = 0; i < 2; i++)
782 + {
783 + copy[i] = alloca (len[i] + 1);
784 +
785 + for (j = 0; j < MIN (len[0], len[1]); j++)
786 + copy[i][j] = toupper (beg[i][j]);
787 +
788 + copy[i][j] = '\0';
789 + }
790 + }
791 }
792 else
793 {
794 - if (hard_LC_COLLATE)
795 - return xmemcoll (beg1, len1, beg2, len2);
796 - diff = memcmp (beg1, beg2, MIN (len1, len2));
797 + copy[0] = (unsigned char *) beg[0];
798 + copy[1] = (unsigned char *) beg[1];
799 }
800
801 + if (hard_LC_COLLATE)
802 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
803 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
804 +
805 +
806 if (diff)
807 return diff;
808 - return len1 < len2 ? -1 : len1 != len2;
809 + return len[0] - len[1];
810 }
811
812 /* Check that successive input lines PREV and CURRENT from input file
813 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
814 +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
815 @@ -23,6 +23,16 @@
816 #include <getopt.h>
817 #include <sys/types.h>
818
819 +/* Get mbstate_t, mbrtowc(). */
820 +#if HAVE_WCHAR_H
821 +# include <wchar.h>
822 +#endif
823 +
824 +/* Get isw* functions. */
825 +#if HAVE_WCTYPE_H
826 +# include <wctype.h>
827 +#endif
828 +
829 #include "system.h"
830 #include "argmatch.h"
831 #include "linebuffer.h"
832 @@ -32,7 +42,19 @@
833 #include "quote.h"
834 #include "xmemcoll.h"
835 #include "xstrtol.h"
836 -#include "memcasecmp.h"
837 +#include "xmemcoll.h"
838 +
839 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
840 + installation; work around this configuration error. */
841 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
842 +# define MB_LEN_MAX 16
843 +#endif
844 +
845 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
846 +#if HAVE_MBRTOWC && defined mbstate_t
847 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
848 +#endif
849 +
850
851 /* The official name of this program (e.g., no `g' prefix). */
852 #define PROGRAM_NAME "uniq"
853 @@ -109,6 +131,10 @@
854 /* Select whether/how to delimit groups of duplicate lines. */
855 static enum delimit_method delimit_groups;
856
857 +/* Function pointers. */
858 +static char *
859 +(*find_field) (struct linebuffer *line);
860 +
861 static struct option const longopts[] =
862 {
863 {"count", no_argument, NULL, 'c'},
864 @@ -198,7 +224,7 @@
865 return a pointer to the beginning of the line's field to be compared. */
866
867 static char *
868 -find_field (struct linebuffer const *line)
869 +find_field_uni (struct linebuffer *line)
870 {
871 size_t count;
872 char const *lp = line->buffer;
873 @@ -219,6 +245,83 @@
874 return line->buffer + i;
875 }
876
877 +#if HAVE_MBRTOWC
878 +
879 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
880 + do \
881 + { \
882 + mbstate_t state_bak; \
883 + \
884 + CONVFAIL = 0; \
885 + state_bak = *STATEP; \
886 + \
887 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
888 + \
889 + switch (MBLENGTH) \
890 + { \
891 + case (size_t)-2: \
892 + case (size_t)-1: \
893 + *STATEP = state_bak; \
894 + CONVFAIL++; \
895 + /* Fall through */ \
896 + case 0: \
897 + MBLENGTH = 1; \
898 + } \
899 + } \
900 + while (0)
901 +
902 +static char *
903 +find_field_multi (struct linebuffer *line)
904 +{
905 + size_t count;
906 + char *lp = line->buffer;
907 + size_t size = line->length - 1;
908 + size_t pos;
909 + size_t mblength;
910 + wchar_t wc;
911 + mbstate_t *statep;
912 + int convfail;
913 +
914 + pos = 0;
915 + statep = &(line->state);
916 +
917 + /* skip fields. */
918 + for (count = 0; count < skip_fields && pos < size; count++)
919 + {
920 + while (pos < size)
921 + {
922 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
923 +
924 + if (convfail || !iswblank (wc))
925 + {
926 + pos += mblength;
927 + break;
928 + }
929 + pos += mblength;
930 + }
931 +
932 + while (pos < size)
933 + {
934 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
935 +
936 + if (!convfail && iswblank (wc))
937 + break;
938 +
939 + pos += mblength;
940 + }
941 + }
942 +
943 + /* skip fields. */
944 + for (count = 0; count < skip_chars && pos < size; count++)
945 + {
946 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
947 + pos += mblength;
948 + }
949 +
950 + return lp + pos;
951 +}
952 +#endif
953 +
954 /* Return false if two strings OLD and NEW match, true if not.
955 OLD and NEW point not to the beginnings of the lines
956 but rather to the beginnings of the fields to compare.
957 @@ -227,6 +330,8 @@
958 static bool
959 different (char *old, char *new, size_t oldlen, size_t newlen)
960 {
961 + char *copy_old, *copy_new;
962 +
963 if (check_chars < oldlen)
964 oldlen = check_chars;
965 if (check_chars < newlen)
966 @@ -234,14 +339,92 @@
967
968 if (ignore_case)
969 {
970 - /* FIXME: This should invoke strcoll somehow. */
971 - return oldlen != newlen || memcasecmp (old, new, oldlen);
972 + size_t i;
973 +
974 + copy_old = alloca (oldlen + 1);
975 + copy_new = alloca (oldlen + 1);
976 +
977 + for (i = 0; i < oldlen; i++)
978 + {
979 + copy_old[i] = toupper (old[i]);
980 + copy_new[i] = toupper (new[i]);
981 + }
982 }
983 - else if (hard_LC_COLLATE)
984 - return xmemcoll (old, oldlen, new, newlen) != 0;
985 else
986 - return oldlen != newlen || memcmp (old, new, oldlen);
987 + {
988 + copy_old = (char *)old;
989 + copy_new = (char *)new;
990 + }
991 +
992 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
993 +}
994 +
995 +#if HAVE_MBRTOWC
996 +static int
997 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
998 +{
999 + size_t i, j, chars;
1000 + const char *str[2];
1001 + char *copy[2];
1002 + size_t len[2];
1003 + mbstate_t state[2];
1004 + size_t mblength;
1005 + wchar_t wc, uwc;
1006 + mbstate_t state_bak;
1007 +
1008 + str[0] = old;
1009 + str[1] = new;
1010 + len[0] = oldlen;
1011 + len[1] = newlen;
1012 + state[0] = oldstate;
1013 + state[1] = newstate;
1014 +
1015 + for (i = 0; i < 2; i++)
1016 + {
1017 + copy[i] = alloca (len[i] + 1);
1018 +
1019 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1020 + {
1021 + state_bak = state[i];
1022 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1023 +
1024 + switch (mblength)
1025 + {
1026 + case (size_t)-1:
1027 + case (size_t)-2:
1028 + state[i] = state_bak;
1029 + /* Fall through */
1030 + case 0:
1031 + mblength = 1;
1032 + break;
1033 +
1034 + default:
1035 + if (ignore_case)
1036 + {
1037 + uwc = towupper (wc);
1038 +
1039 + if (uwc != wc)
1040 + {
1041 + mbstate_t state_wc;
1042 +
1043 + memset (&state_wc, '\0', sizeof(mbstate_t));
1044 + wcrtomb (copy[i] + j, uwc, &state_wc);
1045 + }
1046 + else
1047 + memcpy (copy[i] + j, str[i] + j, mblength);
1048 + }
1049 + else
1050 + memcpy (copy[i] + j, str[i] + j, mblength);
1051 + }
1052 + j += mblength;
1053 + }
1054 + copy[i][j] = '\0';
1055 + len[i] = j;
1056 + }
1057 +
1058 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1059 }
1060 +#endif
1061
1062 /* Output the line in linebuffer LINE to standard output
1063 provided that the switches say it should be output.
1064 @@ -295,15 +478,43 @@
1065 {
1066 char *prevfield IF_LINT (= NULL);
1067 size_t prevlen IF_LINT (= 0);
1068 +#if HAVE_MBRTOWC
1069 + mbstate_t prevstate;
1070 +
1071 + memset (&prevstate, '\0', sizeof (mbstate_t));
1072 +#endif
1073
1074 while (!feof (stdin))
1075 {
1076 char *thisfield;
1077 size_t thislen;
1078 +#if HAVE_MBRTOWC
1079 + mbstate_t thisstate;
1080 +#endif
1081 +
1082 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1083 break;
1084 thisfield = find_field (thisline);
1085 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1086 +#if HAVE_MBRTOWC
1087 + if (MB_CUR_MAX > 1)
1088 + {
1089 + thisstate = thisline->state;
1090 +
1091 + if (prevline->length == 0 || different_multi
1092 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1093 + {
1094 + fwrite (thisline->buffer, sizeof (char),
1095 + thisline->length, stdout);
1096 +
1097 + SWAP_LINES (prevline, thisline);
1098 + prevfield = thisfield;
1099 + prevlen = thislen;
1100 + prevstate = thisstate;
1101 + }
1102 + }
1103 + else
1104 +#endif
1105 if (prevline->length == 0
1106 || different (thisfield, prevfield, thislen, prevlen))
1107 {
1108 @@ -322,17 +533,26 @@
1109 size_t prevlen;
1110 uintmax_t match_count = 0;
1111 bool first_delimiter = true;
1112 +#if HAVE_MBRTOWC
1113 + mbstate_t prevstate;
1114 +#endif
1115
1116 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1117 goto closefiles;
1118 prevfield = find_field (prevline);
1119 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1120 +#if HAVE_MBRTOWC
1121 + prevstate = prevline->state;
1122 +#endif
1123
1124 while (!feof (stdin))
1125 {
1126 bool match;
1127 char *thisfield;
1128 size_t thislen;
1129 +#if HAVE_MBRTOWC
1130 + mbstate_t thisstate;
1131 +#endif
1132 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1133 {
1134 if (ferror (stdin))
1135 @@ -341,6 +561,15 @@
1136 }
1137 thisfield = find_field (thisline);
1138 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1139 +#if HAVE_MBRTOWC
1140 + if (MB_CUR_MAX > 1)
1141 + {
1142 + thisstate = thisline->state;
1143 + match = !different_multi (thisfield, prevfield,
1144 + thislen, prevlen, thisstate, prevstate);
1145 + }
1146 + else
1147 +#endif
1148 match = !different (thisfield, prevfield, thislen, prevlen);
1149 match_count += match;
1150
1151 @@ -373,6 +602,9 @@
1152 SWAP_LINES (prevline, thisline);
1153 prevfield = thisfield;
1154 prevlen = thislen;
1155 +#if HAVE_MBRTOWC
1156 + prevstate = thisstate;
1157 +#endif
1158 if (!match)
1159 match_count = 0;
1160 }
1161 @@ -417,6 +649,19 @@
1162
1163 atexit (close_stdout);
1164
1165 +#if HAVE_MBRTOWC
1166 + if (MB_CUR_MAX > 1)
1167 + {
1168 + find_field = find_field_multi;
1169 + }
1170 + else
1171 +#endif
1172 + {
1173 + find_field = find_field_uni;
1174 + }
1175 +
1176 +
1177 +
1178 skip_chars = 0;
1179 skip_fields = 0;
1180 check_chars = SIZE_MAX;
1181 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1182 +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1183 @@ -23,11 +23,33 @@
1184 #include <getopt.h>
1185 #include <sys/types.h>
1186
1187 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1188 +#if HAVE_WCHAR_H
1189 +# include <wchar.h>
1190 +#endif
1191 +
1192 +/* Get iswprint(), iswblank(), wcwidth(). */
1193 +#if HAVE_WCTYPE_H
1194 +# include <wctype.h>
1195 +#endif
1196 +
1197 #include "system.h"
1198 #include "error.h"
1199 #include "quote.h"
1200 #include "xstrtol.h"
1201
1202 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1203 + installation; work around this configuration error. */
1204 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1205 +# undef MB_LEN_MAX
1206 +# define MB_LEN_MAX 16
1207 +#endif
1208 +
1209 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1210 +#if HAVE_MBRTOWC && defined mbstate_t
1211 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1212 +#endif
1213 +
1214 #define TAB_WIDTH 8
1215
1216 /* The official name of this program (e.g., no `g' prefix). */
1217 @@ -35,20 +57,41 @@
1218
1219 #define AUTHORS proper_name ("David MacKenzie")
1220
1221 +#define FATAL_ERROR(Message) \
1222 + do \
1223 + { \
1224 + error (0, 0, (Message)); \
1225 + usage (2); \
1226 + } \
1227 + while (0)
1228 +
1229 +enum operating_mode
1230 +{
1231 + /* Fold texts by columns that are at the given positions. */
1232 + column_mode,
1233 +
1234 + /* Fold texts by bytes that are at the given positions. */
1235 + byte_mode,
1236 +
1237 + /* Fold texts by characters that are at the given positions. */
1238 + character_mode,
1239 +};
1240 +
1241 +/* The argument shows current mode. (Default: column_mode) */
1242 +static enum operating_mode operating_mode;
1243 +
1244 /* If nonzero, try to break on whitespace. */
1245 static bool break_spaces;
1246
1247 -/* If nonzero, count bytes, not column positions. */
1248 -static bool count_bytes;
1249 -
1250 /* If nonzero, at least one of the files we read was standard input. */
1251 static bool have_read_stdin;
1252
1253 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1254 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1255
1256 static struct option const longopts[] =
1257 {
1258 {"bytes", no_argument, NULL, 'b'},
1259 + {"characters", no_argument, NULL, 'c'},
1260 {"spaces", no_argument, NULL, 's'},
1261 {"width", required_argument, NULL, 'w'},
1262 {GETOPT_HELP_OPTION_DECL},
1263 @@ -81,6 +124,7 @@
1264 "), stdout);
1265 fputs (_("\
1266 -b, --bytes count bytes rather than columns\n\
1267 + -c, --characters count characters rather than columns\n\
1268 -s, --spaces break at spaces\n\
1269 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1270 "), stdout);
1271 @@ -98,7 +142,7 @@
1272 static size_t
1273 adjust_column (size_t column, char c)
1274 {
1275 - if (!count_bytes)
1276 + if (operating_mode != byte_mode)
1277 {
1278 if (c == '\b')
1279 {
1280 @@ -121,30 +165,14 @@
1281 to stdout, with maximum line length WIDTH.
1282 Return true if successful. */
1283
1284 -static bool
1285 -fold_file (char const *filename, size_t width)
1286 +static void
1287 +fold_text (FILE *istream, size_t width, int *saved_errno)
1288 {
1289 - FILE *istream;
1290 int c;
1291 size_t column = 0; /* Screen column where next char will go. */
1292 size_t offset_out = 0; /* Index in `line_out' for next char. */
1293 static char *line_out = NULL;
1294 static size_t allocated_out = 0;
1295 - int saved_errno;
1296 -
1297 - if (STREQ (filename, "-"))
1298 - {
1299 - istream = stdin;
1300 - have_read_stdin = true;
1301 - }
1302 - else
1303 - istream = fopen (filename, "r");
1304 -
1305 - if (istream == NULL)
1306 - {
1307 - error (0, errno, "%s", filename);
1308 - return false;
1309 - }
1310
1311 while ((c = getc (istream)) != EOF)
1312 {
1313 @@ -172,6 +200,15 @@
1314 bool found_blank = false;
1315 size_t logical_end = offset_out;
1316
1317 + /* If LINE_OUT has no wide character,
1318 + put a new wide character in LINE_OUT
1319 + if column is bigger than width. */
1320 + if (offset_out == 0)
1321 + {
1322 + line_out[offset_out++] = c;
1323 + continue;
1324 + }
1325 +
1326 /* Look for the last blank. */
1327 while (logical_end)
1328 {
1329 @@ -218,11 +255,222 @@
1330 line_out[offset_out++] = c;
1331 }
1332
1333 - saved_errno = errno;
1334 + *saved_errno = errno;
1335 +
1336 + if (offset_out)
1337 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1338 +
1339 +}
1340 +
1341 +#if HAVE_MBRTOWC
1342 +static void
1343 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1344 +{
1345 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1346 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1347 + char *bufpos = NULL; /* Next read position of BUF. */
1348 + wint_t wc; /* A gotten wide character. */
1349 + size_t mblength; /* The byte size of a multibyte character which shows
1350 + as same character as WC. */
1351 + mbstate_t state, state_bak; /* State of the stream. */
1352 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
1353 +
1354 + static char *line_out = NULL;
1355 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1356 + static size_t allocated_out = 0;
1357 +
1358 + int increment;
1359 + size_t column = 0;
1360 +
1361 + size_t last_blank_pos;
1362 + size_t last_blank_column;
1363 + int is_blank_seen;
1364 + int last_blank_increment = 0;
1365 + int is_bs_following_last_blank;
1366 + size_t bs_following_last_blank_num;
1367 + int is_cr_after_last_blank;
1368 +
1369 +#define CLEAR_FLAGS \
1370 + do \
1371 + { \
1372 + last_blank_pos = 0; \
1373 + last_blank_column = 0; \
1374 + is_blank_seen = 0; \
1375 + is_bs_following_last_blank = 0; \
1376 + bs_following_last_blank_num = 0; \
1377 + is_cr_after_last_blank = 0; \
1378 + } \
1379 + while (0)
1380 +
1381 +#define START_NEW_LINE \
1382 + do \
1383 + { \
1384 + putchar ('\n'); \
1385 + column = 0; \
1386 + offset_out = 0; \
1387 + CLEAR_FLAGS; \
1388 + } \
1389 + while (0)
1390 +
1391 + CLEAR_FLAGS;
1392 + memset (&state, '\0', sizeof(mbstate_t));
1393 +
1394 + for (;; bufpos += mblength, buflen -= mblength)
1395 + {
1396 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1397 + {
1398 + memmove (buf, bufpos, buflen);
1399 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1400 + bufpos = buf;
1401 + }
1402 +
1403 + if (buflen < 1)
1404 + break;
1405 +
1406 + /* Get a wide character. */
1407 + convfail = 0;
1408 + state_bak = state;
1409 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1410 +
1411 + switch (mblength)
1412 + {
1413 + case (size_t)-1:
1414 + case (size_t)-2:
1415 + convfail++;
1416 + state = state_bak;
1417 + /* Fall through. */
1418 +
1419 + case 0:
1420 + mblength = 1;
1421 + break;
1422 + }
1423 +
1424 +rescan:
1425 + if (operating_mode == byte_mode) /* byte mode */
1426 + increment = mblength;
1427 + else if (operating_mode == character_mode) /* character mode */
1428 + increment = 1;
1429 + else /* column mode */
1430 + {
1431 + if (convfail)
1432 + increment = 1;
1433 + else
1434 + {
1435 + switch (wc)
1436 + {
1437 + case L'\n':
1438 + fwrite (line_out, sizeof(char), offset_out, stdout);
1439 + START_NEW_LINE;
1440 + continue;
1441 +
1442 + case L'\b':
1443 + increment = (column > 0) ? -1 : 0;
1444 + break;
1445 +
1446 + case L'\r':
1447 + increment = -1 * column;
1448 + break;
1449 +
1450 + case L'\t':
1451 + increment = 8 - column % 8;
1452 + break;
1453 +
1454 + default:
1455 + increment = wcwidth (wc);
1456 + increment = (increment < 0) ? 0 : increment;
1457 + }
1458 + }
1459 + }
1460 +
1461 + if (column + increment > width && break_spaces && last_blank_pos)
1462 + {
1463 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1464 + putchar ('\n');
1465 +
1466 + offset_out = offset_out - last_blank_pos;
1467 + column = column - last_blank_column + ((is_cr_after_last_blank)
1468 + ? last_blank_increment : bs_following_last_blank_num);
1469 + memmove (line_out, line_out + last_blank_pos, offset_out);
1470 + CLEAR_FLAGS;
1471 + goto rescan;
1472 + }
1473 +
1474 + if (column + increment > width && column != 0)
1475 + {
1476 + fwrite (line_out, sizeof(char), offset_out, stdout);
1477 + START_NEW_LINE;
1478 + goto rescan;
1479 + }
1480 +
1481 + if (allocated_out < offset_out + mblength)
1482 + {
1483 + line_out = X2REALLOC (line_out, &allocated_out);
1484 + }
1485 +
1486 + memcpy (line_out + offset_out, bufpos, mblength);
1487 + offset_out += mblength;
1488 + column += increment;
1489 +
1490 + if (is_blank_seen && !convfail && wc == L'\r')
1491 + is_cr_after_last_blank = 1;
1492 +
1493 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1494 + ++bs_following_last_blank_num;
1495 + else
1496 + is_bs_following_last_blank = 0;
1497 +
1498 + if (break_spaces && !convfail && iswblank (wc))
1499 + {
1500 + last_blank_pos = offset_out;
1501 + last_blank_column = column;
1502 + is_blank_seen = 1;
1503 + last_blank_increment = increment;
1504 + is_bs_following_last_blank = 1;
1505 + bs_following_last_blank_num = 0;
1506 + is_cr_after_last_blank = 0;
1507 + }
1508 + }
1509 +
1510 + *saved_errno = errno;
1511
1512 if (offset_out)
1513 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1514
1515 +}
1516 +#endif
1517 +
1518 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1519 + to stdout, with maximum line length WIDTH.
1520 + Return 0 if successful, 1 if an error occurs. */
1521 +
1522 +static bool
1523 +fold_file (char *filename, size_t width)
1524 +{
1525 + FILE *istream;
1526 + int saved_errno;
1527 +
1528 + if (STREQ (filename, "-"))
1529 + {
1530 + istream = stdin;
1531 + have_read_stdin = 1;
1532 + }
1533 + else
1534 + istream = fopen (filename, "r");
1535 +
1536 + if (istream == NULL)
1537 + {
1538 + error (0, errno, "%s", filename);
1539 + return 1;
1540 + }
1541 +
1542 + /* Define how ISTREAM is being folded. */
1543 +#if HAVE_MBRTOWC
1544 + if (MB_CUR_MAX > 1)
1545 + fold_multibyte_text (istream, width, &saved_errno);
1546 + else
1547 +#endif
1548 + fold_text (istream, width, &saved_errno);
1549 +
1550 if (ferror (istream))
1551 {
1552 error (0, saved_errno, "%s", filename);
1553 @@ -255,7 +506,8 @@
1554
1555 atexit (close_stdout);
1556
1557 - break_spaces = count_bytes = have_read_stdin = false;
1558 + operating_mode = column_mode;
1559 + break_spaces = have_read_stdin = false;
1560
1561 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1562 {
1563 @@ -264,7 +516,15 @@
1564 switch (optc)
1565 {
1566 case 'b': /* Count bytes rather than columns. */
1567 - count_bytes = true;
1568 + if (operating_mode != column_mode)
1569 + FATAL_ERROR (_("only one way of folding may be specified"));
1570 + operating_mode = byte_mode;
1571 + break;
1572 +
1573 + case 'c':
1574 + if (operating_mode != column_mode)
1575 + FATAL_ERROR (_("only one way of folding may be specified"));
1576 + operating_mode = character_mode;
1577 break;
1578
1579 case 's': /* Break at word boundaries. */
1580 --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1581 +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1582 @@ -23,10 +23,19 @@
1583
1584 #include <config.h>
1585
1586 +#include <assert.h>
1587 #include <getopt.h>
1588 #include <sys/types.h>
1589 #include <sys/wait.h>
1590 #include <signal.h>
1591 +#if HAVE_WCHAR_H
1592 +# include <wchar.h>
1593 +#endif
1594 +/* Get isw* functions. */
1595 +#if HAVE_WCTYPE_H
1596 +# include <wctype.h>
1597 +#endif
1598 +
1599 #include "system.h"
1600 #include "argmatch.h"
1601 #include "error.h"
1602 @@ -116,14 +125,38 @@
1603 /* Thousands separator; if -1, then there isn't one. */
1604 static int thousands_sep;
1605
1606 +static int force_general_numcompare = 0;
1607 +
1608 /* Nonzero if the corresponding locales are hard. */
1609 static bool hard_LC_COLLATE;
1610 -#if HAVE_NL_LANGINFO
1611 +#if HAVE_LANGINFO_CODESET
1612 static bool hard_LC_TIME;
1613 #endif
1614
1615 #define NONZERO(x) ((x) != 0)
1616
1617 +/* get a multibyte character's byte length. */
1618 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1619 + do \
1620 + { \
1621 + wchar_t wc; \
1622 + mbstate_t state_bak; \
1623 + \
1624 + state_bak = STATE; \
1625 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1626 + \
1627 + switch (MBLENGTH) \
1628 + { \
1629 + case (size_t)-1: \
1630 + case (size_t)-2: \
1631 + STATE = state_bak; \
1632 + /* Fall through. */ \
1633 + case 0: \
1634 + MBLENGTH = 1; \
1635 + } \
1636 + } \
1637 + while (0)
1638 +
1639 /* The kind of blanks for '-b' to skip in various options. */
1640 enum blanktype { bl_start, bl_end, bl_both };
1641
1642 @@ -261,13 +294,11 @@
1643 they were read if all keys compare equal. */
1644 static bool stable;
1645
1646 -/* If TAB has this value, blanks separate fields. */
1647 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
1648 -
1649 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
1650 +/* Tab character separating fields. If tab_length is 0, then fields are
1651 separated by the empty string between a non-blank character and a blank
1652 character. */
1653 -static int tab = TAB_DEFAULT;
1654 +static char tab[MB_LEN_MAX + 1];
1655 +static size_t tab_length = 0;
1656
1657 /* Flag to remove consecutive duplicate lines from the output.
1658 Only the last of a sequence of equal lines will be output. */
1659 @@ -639,6 +670,44 @@
1660 update_proc (pid);
1661 }
1662
1663 +/* Function pointers. */
1664 +static void
1665 +(*inittables) (void);
1666 +static char *
1667 +(*begfield) (const struct line*, const struct keyfield *);
1668 +static char *
1669 +(*limfield) (const struct line*, const struct keyfield *);
1670 +static int
1671 +(*getmonth) (char const *, size_t);
1672 +static int
1673 +(*keycompare) (const struct line *, const struct line *);
1674 +static int
1675 +(*numcompare) (const char *, const char *);
1676 +
1677 +/* Test for white space multibyte character.
1678 + Set LENGTH the byte length of investigated multibyte character. */
1679 +#if HAVE_MBRTOWC
1680 +static int
1681 +ismbblank (const char *str, size_t len, size_t *length)
1682 +{
1683 + size_t mblength;
1684 + wchar_t wc;
1685 + mbstate_t state;
1686 +
1687 + memset (&state, '\0', sizeof(mbstate_t));
1688 + mblength = mbrtowc (&wc, str, len, &state);
1689 +
1690 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1691 + {
1692 + *length = 1;
1693 + return 0;
1694 + }
1695 +
1696 + *length = (mblength < 1) ? 1 : mblength;
1697 + return iswblank (wc);
1698 +}
1699 +#endif
1700 +
1701 /* Clean up any remaining temporary files. */
1702
1703 static void
1704 @@ -978,7 +1047,7 @@
1705 free (node);
1706 }
1707
1708 -#if HAVE_NL_LANGINFO
1709 +#if HAVE_LANGINFO_CODESET
1710
1711 static int
1712 struct_month_cmp (const void *m1, const void *m2)
1713 @@ -993,7 +1062,7 @@
1714 /* Initialize the character class tables. */
1715
1716 static void
1717 -inittables (void)
1718 +inittables_uni (void)
1719 {
1720 size_t i;
1721
1722 @@ -1005,7 +1074,7 @@
1723 fold_toupper[i] = toupper (i);
1724 }
1725
1726 -#if HAVE_NL_LANGINFO
1727 +#if HAVE_LANGINFO_CODESET
1728 /* If we're not in the "C" locale, read different names for months. */
1729 if (hard_LC_TIME)
1730 {
1731 @@ -1031,6 +1100,64 @@
1732 xstrtol_fatal (e, oi, c, long_options, s);
1733 }
1734
1735 +#if HAVE_MBRTOWC
1736 +static void
1737 +inittables_mb (void)
1738 +{
1739 + int i, j, k, l;
1740 + char *name, *s;
1741 + size_t s_len, mblength;
1742 + char mbc[MB_LEN_MAX];
1743 + wchar_t wc, pwc;
1744 + mbstate_t state_mb, state_wc;
1745 +
1746 + for (i = 0; i < MONTHS_PER_YEAR; i++)
1747 + {
1748 + s = (char *) nl_langinfo (ABMON_1 + i);
1749 + s_len = strlen (s);
1750 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
1751 + monthtab[i].val = i + 1;
1752 +
1753 + memset (&state_mb, '\0', sizeof (mbstate_t));
1754 + memset (&state_wc, '\0', sizeof (mbstate_t));
1755 +
1756 + for (j = 0; j < s_len;)
1757 + {
1758 + if (!ismbblank (s + j, s_len - j, &mblength))
1759 + break;
1760 + j += mblength;
1761 + }
1762 +
1763 + for (k = 0; j < s_len;)
1764 + {
1765 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1766 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1767 + if (mblength == 0)
1768 + break;
1769 +
1770 + pwc = towupper (wc);
1771 + if (pwc == wc)
1772 + {
1773 + memcpy (mbc, s + j, mblength);
1774 + j += mblength;
1775 + }
1776 + else
1777 + {
1778 + j += mblength;
1779 + mblength = wcrtomb (mbc, pwc, &state_wc);
1780 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
1781 + }
1782 +
1783 + for (l = 0; l < mblength; l++)
1784 + name[k++] = mbc[l];
1785 + }
1786 + name[k] = '\0';
1787 + }
1788 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
1789 + sizeof (struct month), struct_month_cmp);
1790 +}
1791 +#endif
1792 +
1793 /* Specify the amount of main memory to use when sorting. */
1794 static void
1795 specify_sort_size (int oi, char c, char const *s)
1796 @@ -1241,7 +1368,7 @@
1797 by KEY in LINE. */
1798
1799 static char *
1800 -begfield (const struct line *line, const struct keyfield *key)
1801 +begfield_uni (const struct line *line, const struct keyfield *key)
1802 {
1803 char *ptr = line->text, *lim = ptr + line->length - 1;
1804 size_t sword = key->sword;
1805 @@ -1251,10 +1378,10 @@
1806 /* The leading field separator itself is included in a field when -t
1807 is absent. */
1808
1809 - if (tab != TAB_DEFAULT)
1810 + if (tab_length)
1811 while (ptr < lim && sword--)
1812 {
1813 - while (ptr < lim && *ptr != tab)
1814 + while (ptr < lim && *ptr != tab[0])
1815 ++ptr;
1816 if (ptr < lim)
1817 ++ptr;
1818 @@ -1282,11 +1409,70 @@
1819 return ptr;
1820 }
1821
1822 +#if HAVE_MBRTOWC
1823 +static char *
1824 +begfield_mb (const struct line *line, const struct keyfield *key)
1825 +{
1826 + int i;
1827 + char *ptr = line->text, *lim = ptr + line->length - 1;
1828 + size_t sword = key->sword;
1829 + size_t schar = key->schar;
1830 + size_t mblength;
1831 + mbstate_t state;
1832 +
1833 + memset (&state, '\0', sizeof(mbstate_t));
1834 +
1835 + if (tab_length)
1836 + while (ptr < lim && sword--)
1837 + {
1838 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1839 + {
1840 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1841 + ptr += mblength;
1842 + }
1843 + if (ptr < lim)
1844 + {
1845 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1846 + ptr += mblength;
1847 + }
1848 + }
1849 + else
1850 + while (ptr < lim && sword--)
1851 + {
1852 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1853 + ptr += mblength;
1854 + if (ptr < lim)
1855 + {
1856 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1857 + ptr += mblength;
1858 + }
1859 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1860 + ptr += mblength;
1861 + }
1862 +
1863 + if (key->skipsblanks)
1864 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1865 + ptr += mblength;
1866 +
1867 + for (i = 0; i < schar; i++)
1868 + {
1869 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1870 +
1871 + if (ptr + mblength > lim)
1872 + break;
1873 + else
1874 + ptr += mblength;
1875 + }
1876 +
1877 + return ptr;
1878 +}
1879 +#endif
1880 +
1881 /* Return the limit of (a pointer to the first character after) the field
1882 in LINE specified by KEY. */
1883
1884 static char *
1885 -limfield (const struct line *line, const struct keyfield *key)
1886 +limfield_uni (const struct line *line, const struct keyfield *key)
1887 {
1888 char *ptr = line->text, *lim = ptr + line->length - 1;
1889 size_t eword = key->eword, echar = key->echar;
1890 @@ -1299,10 +1485,10 @@
1891 `beginning' is the first character following the delimiting TAB.
1892 Otherwise, leave PTR pointing at the first `blank' character after
1893 the preceding field. */
1894 - if (tab != TAB_DEFAULT)
1895 + if (tab_length)
1896 while (ptr < lim && eword--)
1897 {
1898 - while (ptr < lim && *ptr != tab)
1899 + while (ptr < lim && *ptr != tab[0])
1900 ++ptr;
1901 if (ptr < lim && (eword | echar))
1902 ++ptr;
1903 @@ -1348,10 +1534,10 @@
1904 */
1905
1906 /* Make LIM point to the end of (one byte past) the current field. */
1907 - if (tab != TAB_DEFAULT)
1908 + if (tab_length)
1909 {
1910 char *newlim;
1911 - newlim = memchr (ptr, tab, lim - ptr);
1912 + newlim = memchr (ptr, tab[0], lim - ptr);
1913 if (newlim)
1914 lim = newlim;
1915 }
1916 @@ -1384,6 +1570,113 @@
1917 return ptr;
1918 }
1919
1920 +#if HAVE_MBRTOWC
1921 +static char *
1922 +limfield_mb (const struct line *line, const struct keyfield *key)
1923 +{
1924 + char *ptr = line->text, *lim = ptr + line->length - 1;
1925 + size_t eword = key->eword, echar = key->echar;
1926 + int i;
1927 + size_t mblength;
1928 + mbstate_t state;
1929 +
1930 + if (echar == 0)
1931 + eword++; /* skip all of end field. */
1932 +
1933 + memset (&state, '\0', sizeof(mbstate_t));
1934 +
1935 + if (tab_length)
1936 + while (ptr < lim && eword--)
1937 + {
1938 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1939 + {
1940 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1941 + ptr += mblength;
1942 + }
1943 + if (ptr < lim && (eword | echar))
1944 + {
1945 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1946 + ptr += mblength;
1947 + }
1948 + }
1949 + else
1950 + while (ptr < lim && eword--)
1951 + {
1952 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1953 + ptr += mblength;
1954 + if (ptr < lim)
1955 + {
1956 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1957 + ptr += mblength;
1958 + }
1959 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1960 + ptr += mblength;
1961 + }
1962 +
1963 +
1964 +# ifdef POSIX_UNSPECIFIED
1965 + /* Make LIM point to the end of (one byte past) the current field. */
1966 + if (tab_length)
1967 + {
1968 + char *newlim, *p;
1969 +
1970 + newlim = NULL;
1971 + for (p = ptr; p < lim;)
1972 + {
1973 + if (memcmp (p, tab, tab_length) == 0)
1974 + {
1975 + newlim = p;
1976 + break;
1977 + }
1978 +
1979 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1980 + p += mblength;
1981 + }
1982 + }
1983 + else
1984 + {
1985 + char *newlim;
1986 + newlim = ptr;
1987 +
1988 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
1989 + newlim += mblength;
1990 + if (ptr < lim)
1991 + {
1992 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1993 + ptr += mblength;
1994 + }
1995 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
1996 + newlim += mblength;
1997 + lim = newlim;
1998 + }
1999 +# endif
2000 +
2001 + if (echar != 0)
2002 + {
2003 + /* If we're skipping leading blanks, don't start counting characters
2004 + * until after skipping past any leading blanks. */
2005 + if (key->skipsblanks)
2006 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2007 + ptr += mblength;
2008 +
2009 + memset (&state, '\0', sizeof(mbstate_t));
2010 +
2011 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2012 + for (i = 0; i < echar; i++)
2013 + {
2014 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2015 +
2016 + if (ptr + mblength > lim)
2017 + break;
2018 + else
2019 + ptr += mblength;
2020 + }
2021 + }
2022 +
2023 + return ptr;
2024 +}
2025 +#endif
2026 +
2027 /* Fill BUF reading from FP, moving buf->left bytes from the end
2028 of buf->buf to the beginning first. If EOF is reached and the
2029 file wasn't terminated by a newline, supply one. Set up BUF's line
2030 @@ -1466,8 +1753,24 @@
2031 else
2032 {
2033 if (key->skipsblanks)
2034 - while (blanks[to_uchar (*line_start)])
2035 - line_start++;
2036 + {
2037 +#if HAVE_MBRTOWC
2038 + if (MB_CUR_MAX > 1)
2039 + {
2040 + size_t mblength;
2041 + mbstate_t state;
2042 + memset (&state, '\0', sizeof(mbstate_t));
2043 + while (line_start < line->keylim &&
2044 + ismbblank (line_start,
2045 + line->keylim - line_start,
2046 + &mblength))
2047 + line_start += mblength;
2048 + }
2049 + else
2050 +#endif
2051 + while (blanks[to_uchar (*line_start)])
2052 + line_start++;
2053 + }
2054 line->keybeg = line_start;
2055 }
2056 }
2057 @@ -1500,7 +