/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.42 - (show annotations) (download) (as text)
Wed Nov 18 14:47:59 2009 UTC (4 days, 15 hours ago) by ovasik
Branch: MAIN
CVS Tags: HEAD
Changes since 1.41: +2321 -13698 lines
File MIME type: text/x-patch
remove accidently added .orig files from patches :(
1 diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
2 --- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
3 +++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
4 @@ -21,6 +21,11 @@
5
6 # include <stdio.h>
7
8 +/* Get mbstate_t. */
9 +# if HAVE_WCHAR_H
10 +# include <wchar.h>
11 +# endif
12 +
13 /* A `struct linebuffer' holds a line of text. */
14
15 struct linebuffer
16 @@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
19 char *buffer;
20 +# if HAVE_WCHAR_H
21 + mbstate_t state;
22 +# endif
23 };
24
25 /* Initialize linebuffer LINEBUFFER for use. */
26 diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
27 --- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
28 +++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
29 @@ -28,6 +28,11 @@
30 #include <assert.h>
31 #include <getopt.h>
32 #include <sys/types.h>
33 +
34 +/* Get mbstate_t, mbrtowc(). */
35 +#if HAVE_WCHAR_H
36 +# include <wchar.h>
37 +#endif
38 #include "system.h"
39
40 #include "error.h"
41 @@ -36,6 +41,18 @@
42 #include "quote.h"
43 #include "xstrndup.h"
44
45 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46 + installation; work around this configuration error. */
47 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
48 +# undef MB_LEN_MAX
49 +# define MB_LEN_MAX 16
50 +#endif
51 +
52 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53 +#if HAVE_MBRTOWC && defined mbstate_t
54 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
55 +#endif
56 +
57 /* The official name of this program (e.g., no `g' prefix). */
58 #define PROGRAM_NAME "cut"
59
60 @@ -71,6 +88,52 @@
61 } \
62 while (0)
63
64 +/* Refill the buffer BUF to get a multibyte character. */
65 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
66 + do \
67 + { \
68 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
69 + { \
70 + memmove (BUF, BUFPOS, BUFLEN); \
71 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
72 + BUFPOS = BUF; \
73 + } \
74 + } \
75 + while (0)
76 +
77 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
78 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
79 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
80 + do \
81 + { \
82 + mbstate_t state_bak; \
83 + \
84 + if (BUFLEN < 1) \
85 + { \
86 + WC = WEOF; \
87 + break; \
88 + } \
89 + \
90 + /* Get a wide character. */ \
91 + CONVFAIL = 0; \
92 + state_bak = STATE; \
93 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
94 + \
95 + switch (MBLENGTH) \
96 + { \
97 + case (size_t)-1: \
98 + case (size_t)-2: \
99 + CONVFAIL++; \
100 + STATE = state_bak; \
101 + /* Fall througn. */ \
102 + \
103 + case 0: \
104 + MBLENGTH = 1; \
105 + break; \
106 + } \
107 + } \
108 + while (0)
109 +
110 struct range_pair
111 {
112 size_t lo;
113 @@ -89,7 +152,7 @@ static char *field_1_buffer;
114 /* The number of bytes allocated for FIELD_1_BUFFER. */
115 static size_t field_1_bufsize;
116
117 -/* The largest field or byte index used as an endpoint of a closed
118 +/* The largest byte, character or field index used as an endpoint of a closed
119 or degenerate range specification; this doesn't include the starting
120 index of right-open-ended ranges. For example, with either range spec
121 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
122 @@ -101,10 +164,11 @@ static size_t eol_range_start;
123
124 /* This is a bit vector.
125 In byte mode, which bytes to output.
126 + In character mode, which characters to output.
127 In field mode, which DELIM-separated fields to output.
128 - Both bytes and fields are numbered starting with 1,
129 + Bytes, characters and fields are numbered starting with 1,
130 so the zeroth bit of this array is unused.
131 - A field or byte K has been selected if
132 + A byte, character or field K has been selected if
133 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135 static unsigned char *printable_field;
136 @@ -113,15 +177,25 @@ enum operating_mode
137 {
138 undefined_mode,
139
140 - /* Output characters that are in the given bytes. */
141 + /* Output bytes that are at the given positions. */
142 byte_mode,
143
144 + /* Output characters that are at the given positions. */
145 + character_mode,
146 +
147 /* Output the given delimeter-separated fields. */
148 field_mode
149 };
150
151 static enum operating_mode operating_mode;
152
153 +/* If nonzero, when in byte mode, don't split multibyte characters. */
154 +static int byte_mode_character_aware;
155 +
156 +/* If nonzero, the function for single byte locale is work
157 + if this program runs on multibyte locale. */
158 +static int force_singlebyte_mode;
159 +
160 /* If true do not output lines containing no delimeter characters.
161 Otherwise, all such lines are printed. This option is valid only
162 with field mode. */
163 @@ -133,6 +207,9 @@ static bool complement;
164
165 /* The delimeter character for field mode. */
166 static unsigned char delim;
167 +#if HAVE_WCHAR_H
168 +static wchar_t wcdelim;
169 +#endif
170
171 /* True if the --output-delimiter=STRING option was specified. */
172 static bool output_delimiter_specified;
173 @@ -206,7 +283,7 @@ Mandatory arguments to long options are
174 -f, --fields=LIST select only these fields; also print any line\n\
175 that contains no delimiter character, unless\n\
176 the -s option is specified\n\
177 - -n (ignored)\n\
178 + -n with -b: don't split multibyte characters\n\
179 "), stdout);
180 fputs (_("\
181 --complement complement the set of selected bytes, characters\n\
182 @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
183 in_digits = false;
184 /* Starting a range. */
185 if (dash_found)
186 - FATAL_ERROR (_("invalid byte or field list"));
187 + FATAL_ERROR (_("invalid byte, character or field list"));
188 dash_found = true;
189 fieldstr++;
190
191 @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
192 if (!rhs_specified)
193 {
194 /* `n-'. From `initial' to end of line. */
195 - eol_range_start = initial;
196 + if (eol_range_start == 0 ||
197 + (eol_range_start != 0 && eol_range_start > initial))
198 + eol_range_start = initial;
199 field_found = true;
200 }
201 else
202 {
203 /* `m-n' or `-n' (1-n). */
204 if (value < initial)
205 - FATAL_ERROR (_("invalid decreasing range"));
206 + FATAL_ERROR (_("invalid byte, character or field list"));
207
208 /* Is there already a range going to end of line? */
209 if (eol_range_start != 0)
210 @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
211 if (operating_mode == byte_mode)
212 error (0, 0,
213 _("byte offset %s is too large"), quote (bad_num));
214 + else if (operating_mode == character_mode)
215 + error (0, 0,
216 + _("character offset %s is too large"), quote (bad_num));
217 else
218 error (0, 0,
219 _("field number %s is too large"), quote (bad_num));
220 @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
221 fieldstr++;
222 }
223 else
224 - FATAL_ERROR (_("invalid byte or field list"));
225 + FATAL_ERROR (_("invalid byte, character or field list"));
226 }
227
228 max_range_endpoint = 0;
229 @@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
230 }
231 }
232
233 +#if HAVE_MBRTOWC
234 +/* This function is in use for the following case.
235 +
236 + 1. Read from the stream STREAM, printing to standard output any selected
237 + characters.
238 +
239 + 2. Read from stream STREAM, printing to standard output any selected bytes,
240 + without splitting multibyte characters. */
241 +
242 +static void
243 +cut_characters_or_cut_bytes_no_split (FILE *stream)
244 +{
245 + int idx; /* number of bytes or characters in the line so far. */
246 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247 + char *bufpos; /* Next read position of BUF. */
248 + size_t buflen; /* The length of the byte sequence in buf. */
249 + wint_t wc; /* A gotten wide character. */
250 + size_t mblength; /* The byte size of a multibyte character which shows
251 + as same character as WC. */
252 + mbstate_t state; /* State of the stream. */
253 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
254 +
255 + idx = 0;
256 + buflen = 0;
257 + bufpos = buf;
258 + memset (&state, '\0', sizeof(mbstate_t));
259 +
260 + while (1)
261 + {
262 + REFILL_BUFFER (buf, bufpos, buflen, stream);
263 +
264 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
265 +
266 + if (wc == WEOF)
267 + {
268 + if (idx > 0)
269 + putchar ('\n');
270 + break;
271 + }
272 + else if (wc == L'\n')
273 + {
274 + putchar ('\n');
275 + idx = 0;
276 + }
277 + else
278 + {
279 + idx += (operating_mode == byte_mode) ? mblength : 1;
280 + if (print_kth (idx, NULL))
281 + fwrite (bufpos, mblength, sizeof(char), stdout);
282 + }
283 +
284 + buflen -= mblength;
285 + bufpos += mblength;
286 + }
287 +}
288 +#endif
289 +
290 /* Read from stream STREAM, printing to standard output any selected fields. */
291
292 static void
293 @@ -701,13 +840,192 @@ cut_fields (FILE *stream)
294 }
295 }
296
297 +#if HAVE_MBRTOWC
298 +static void
299 +cut_fields_mb (FILE *stream)
300 +{
301 + int c;
302 + unsigned int field_idx;
303 + int found_any_selected_field;
304 + int buffer_first_field;
305 + int empty_input;
306 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
307 + char *bufpos; /* Next read position of BUF. */
308 + size_t buflen; /* The length of the byte sequence in buf. */
309 + wint_t wc = 0; /* A gotten wide character. */
310 + size_t mblength; /* The byte size of a multibyte character which shows
311 + as same character as WC. */
312 + mbstate_t state; /* State of the stream. */
313 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
314 +
315 + found_any_selected_field = 0;
316 + field_idx = 1;
317 + bufpos = buf;
318 + buflen = 0;
319 + memset (&state, '\0', sizeof(mbstate_t));
320 +
321 + c = getc (stream);
322 + empty_input = (c == EOF);
323 + if (c != EOF)
324 + ungetc (c, stream);
325 + else
326 + wc = WEOF;
327 +
328 + /* To support the semantics of the -s flag, we may have to buffer
329 + all of the first field to determine whether it is `delimited.'
330 + But that is unnecessary if all non-delimited lines must be printed
331 + and the first field has been selected, or if non-delimited lines
332 + must be suppressed and the first field has *not* been selected.
333 + That is because a non-delimited line has exactly one field. */
334 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
335 +
336 + while (1)
337 + {
338 + if (field_idx == 1 && buffer_first_field)
339 + {
340 + int len = 0;
341 +
342 + while (1)
343 + {
344 + REFILL_BUFFER (buf, bufpos, buflen, stream);
345 +
346 + GET_NEXT_WC_FROM_BUFFER
347 + (wc, bufpos, buflen, mblength, state, convfail);
348 +
349 + if (wc == WEOF)
350 + break;
351 +
352 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
353 + memcpy (field_1_buffer + len, bufpos, mblength);
354 + len += mblength;
355 + buflen -= mblength;
356 + bufpos += mblength;
357 +
358 + if (!convfail && (wc == L'\n' || wc == wcdelim))
359 + break;
360 + }
361 +
362 + if (wc == WEOF)
363 + break;
364 +
365 + /* If the first field extends to the end of line (it is not
366 + delimited) and we are printing all non-delimited lines,
367 + print this one. */
368 + if (convfail || (!convfail && wc != wcdelim))
369 + {
370 + if (suppress_non_delimited)
371 + {
372 + /* Empty. */
373 + }
374 + else
375 + {
376 + fwrite (field_1_buffer, sizeof (char), len, stdout);
377 + /* Make sure the output line is newline terminated. */
378 + if (convfail || (!convfail && wc != L'\n'))
379 + putchar ('\n');
380 + }
381 + continue;
382 + }
383 +
384 + if (print_kth (1, NULL))
385 + {
386 + /* Print the field, but not the trailing delimiter. */
387 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
388 + found_any_selected_field = 1;
389 + }
390 + ++field_idx;
391 + }
392 +
393 + if (wc != WEOF)
394 + {
395 + if (print_kth (field_idx, NULL))
396 + {
397 + if (found_any_selected_field)
398 + {
399 + fwrite (output_delimiter_string, sizeof (char),
400 + output_delimiter_length, stdout);
401 + }
402 + found_any_selected_field = 1;
403 + }
404 +
405 + while (1)
406 + {
407 + REFILL_BUFFER (buf, bufpos, buflen, stream);
408 +
409 + GET_NEXT_WC_FROM_BUFFER
410 + (wc, bufpos, buflen, mblength, state, convfail);
411 +
412 + if (wc == WEOF)
413 + break;
414 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
415 + {
416 + buflen -= mblength;
417 + bufpos += mblength;
418 + break;
419 + }
420 +
421 + if (print_kth (field_idx, NULL))
422 + fwrite (bufpos, mblength, sizeof(char), stdout);
423 +
424 + buflen -= mblength;
425 + bufpos += mblength;
426 + }
427 + }
428 +
429 + if ((!convfail || wc == L'\n') && buflen < 1)
430 + wc = WEOF;
431 +
432 + if (!convfail && wc == wcdelim)
433 + ++field_idx;
434 + else if (wc == WEOF || (!convfail && wc == L'\n'))
435 + {
436 + if (found_any_selected_field
437 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
438 + putchar ('\n');
439 + if (wc == WEOF)
440 + break;
441 + field_idx = 1;
442 + found_any_selected_field = 0;
443 + }
444 + }
445 +}
446 +#endif
447 +
448 static void
449 cut_stream (FILE *stream)
450 {
451 - if (operating_mode == byte_mode)
452 - cut_bytes (stream);
453 +#if HAVE_MBRTOWC
454 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
455 + {
456 + switch (operating_mode)
457 + {
458 + case byte_mode:
459 + if (byte_mode_character_aware)
460 + cut_characters_or_cut_bytes_no_split (stream);
461 + else
462 + cut_bytes (stream);
463 + break;
464 +
465 + case character_mode:
466 + cut_characters_or_cut_bytes_no_split (stream);
467 + break;
468 +
469 + case field_mode:
470 + cut_fields_mb (stream);
471 + break;
472 +
473 + default:
474 + abort ();
475 + }
476 + }
477 else
478 - cut_fields (stream);
479 +#endif
480 + {
481 + if (operating_mode == field_mode)
482 + cut_fields (stream);
483 + else
484 + cut_bytes (stream);
485 + }
486 }
487
488 /* Process file FILE to standard output.
489 @@ -757,6 +1075,8 @@ main (int argc, char **argv)
490 bool ok;
491 bool delim_specified = false;
492 char *spec_list_string IF_LINT(= NULL);
493 + char mbdelim[MB_LEN_MAX + 1];
494 + size_t delimlen = 0;
495
496 initialize_main (&argc, &argv);
497 set_program_name (argv[0]);
498 @@ -779,7 +1099,6 @@ main (int argc, char **argv)
499 switch (optc)
500 {
501 case 'b':
502 - case 'c':
503 /* Build the byte list. */
504 if (operating_mode != undefined_mode)
505 FATAL_ERROR (_("only one type of list may be specified"));
506 @@ -787,6 +1106,14 @@ main (int argc, char **argv)
507 spec_list_string = optarg;
508 break;
509
510 + case 'c':
511 + /* Build the character list. */
512 + if (operating_mode != undefined_mode)
513 + FATAL_ERROR (_("only one type of list may be specified"));
514 + operating_mode = character_mode;
515 + spec_list_string = optarg;
516 + break;
517 +
518 case 'f':
519 /* Build the field list. */
520 if (operating_mode != undefined_mode)
521 @@ -798,10 +1125,35 @@ main (int argc, char **argv)
522 case 'd':
523 /* New delimiter. */
524 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
525 - if (optarg[0] != '\0' && optarg[1] != '\0')
526 - FATAL_ERROR (_("the delimiter must be a single character"));
527 - delim = optarg[0];
528 - delim_specified = true;
529 + {
530 +#if HAVE_MBRTOWC
531 + if(MB_CUR_MAX > 1)
532 + {
533 + mbstate_t state;
534 +
535 + memset (&state, '\0', sizeof(mbstate_t));
536 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
537 +
538 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
539 + ++force_singlebyte_mode;
540 + else
541 + {
542 + delimlen = (delimlen < 1) ? 1 : delimlen;
543 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
544 + FATAL_ERROR (_("the delimiter must be a single character"));
545 + memcpy (mbdelim, optarg, delimlen);
546 + }
547 + }
548 +
549 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
550 +#endif
551 + {
552 + if (optarg[0] != '\0' && optarg[1] != '\0')
553 + FATAL_ERROR (_("the delimiter must be a single character"));
554 + delim = (unsigned char) optarg[0];
555 + }
556 + delim_specified = true;
557 + }
558 break;
559
560 case OUTPUT_DELIMITER_OPTION:
561 @@ -814,6 +1166,7 @@ main (int argc, char **argv)
562 break;
563
564 case 'n':
565 + byte_mode_character_aware = 1;
566 break;
567
568 case 's':
569 @@ -836,7 +1189,7 @@ main (int argc, char **argv)
570 if (operating_mode == undefined_mode)
571 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
572
573 - if (delim != '\0' && operating_mode != field_mode)
574 + if (delim_specified && operating_mode != field_mode)
575 FATAL_ERROR (_("an input delimiter may be specified only\
576 when operating on fields"));
577
578 @@ -863,15 +1216,34 @@ main (int argc, char **argv)
579 }
580
581 if (!delim_specified)
582 - delim = '\t';
583 + {
584 + delim = '\t';
585 +#ifdef HAVE_MBRTOWC
586 + wcdelim = L'\t';
587 + mbdelim[0] = '\t';
588 + mbdelim[1] = '\0';
589 + delimlen = 1;
590 +#endif
591 + }
592
593 if (output_delimiter_string == NULL)
594 {
595 - static char dummy[2];
596 - dummy[0] = delim;
597 - dummy[1] = '\0';
598 - output_delimiter_string = dummy;
599 - output_delimiter_length = 1;
600 +#ifdef HAVE_MBRTOWC
601 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
602 + {
603 + output_delimiter_string = xstrdup(mbdelim);
604 + output_delimiter_length = delimlen;
605 + }
606 +
607 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
608 +#endif
609 + {
610 + static char dummy[2];
611 + dummy[0] = delim;
612 + dummy[1] = '\0';
613 + output_delimiter_string = dummy;
614 + output_delimiter_length = 1;
615 + }
616 }
617
618 if (optind == argc)
619 diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
620 --- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
621 +++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
622 @@ -37,11 +37,28 @@
623 #include <stdio.h>
624 #include <getopt.h>
625 #include <sys/types.h>
626 +
627 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
628 +#if HAVE_WCHAR_H
629 +# include <wchar.h>
630 +#endif
631 +
632 #include "system.h"
633 #include "error.h"
634 #include "quote.h"
635 #include "xstrndup.h"
636
637 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
638 + installation; work around this configuration error. */
639 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
640 +# define MB_LEN_MAX 16
641 +#endif
642 +
643 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
644 +#if HAVE_MBRTOWC && defined mbstate_t
645 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
646 +#endif
647 +
648 /* The official name of this program (e.g., no `g' prefix). */
649 #define PROGRAM_NAME "expand"
650
651 @@ -357,6 +374,142 @@ expand (void)
652 }
653 }
654
655 +#if HAVE_MBRTOWC
656 +static void
657 +expand_multibyte (void)
658 +{
659 + FILE *fp; /* Input strem. */
660 + mbstate_t i_state; /* Current shift state of the input stream. */
661 + mbstate_t i_state_bak; /* Back up the I_STATE. */
662 + mbstate_t o_state; /* Current shift state of the output stream. */
663 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
664 + char *bufpos; /* Next read position of BUF. */
665 + size_t buflen = 0; /* The length of the byte sequence in buf. */
666 + wchar_t wc; /* A gotten wide character. */
667 + size_t mblength; /* The byte size of a multibyte character
668 + which shows as same character as WC. */
669 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
670 + int column = 0; /* Column on screen of the next char. */
671 + int next_tab_column; /* Column the next tab stop is on. */
672 + int convert = 1; /* If nonzero, perform translations. */
673 +
674 + fp = next_file ((FILE *) NULL);
675 + if (fp == NULL)
676 + return;
677 +
678 + memset (&o_state, '\0', sizeof(mbstate_t));
679 + memset (&i_state, '\0', sizeof(mbstate_t));
680 +
681 + for (;;)
682 + {
683 + /* Refill the buffer BUF. */
684 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
685 + {
686 + memmove (buf, bufpos, buflen);
687 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
688 + bufpos = buf;
689 + }
690 +
691 + /* No character is left in BUF. */
692 + if (buflen < 1)
693 + {
694 + fp = next_file (fp);
695 +
696 + if (fp == NULL)
697 + break; /* No more files. */
698 + else
699 + {
700 + memset (&i_state, '\0', sizeof(mbstate_t));
701 + continue;
702 + }
703 + }
704 +
705 + /* Get a wide character. */
706 + i_state_bak = i_state;
707 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
708 +
709 + switch (mblength)
710 + {
711 + case (size_t)-1: /* illegal byte sequence. */
712 + case (size_t)-2:
713 + mblength = 1;
714 + i_state = i_state_bak;
715 + if (convert)
716 + {
717 + ++column;
718 + if (convert_entire_line == 0)
719 + convert = 0;
720 + }
721 + putchar (*bufpos);
722 + break;
723 +
724 + case 0: /* null. */
725 + mblength = 1;
726 + if (convert && convert_entire_line == 0)
727 + convert = 0;
728 + putchar ('\0');
729 + break;
730 +
731 + default:
732 + if (wc == L'\n') /* LF. */
733 + {
734 + tab_index = 0;
735 + column = 0;
736 + convert = 1;
737 + putchar ('\n');
738 + }
739 + else if (wc == L'\t' && convert) /* Tab. */
740 + {
741 + if (tab_size == 0)
742 + {
743 + /* Do not let tab_index == first_free_tab;
744 + stop when it is 1 less. */
745 + while (tab_index < first_free_tab - 1
746 + && column >= tab_list[tab_index])
747 + tab_index++;
748 + next_tab_column = tab_list[tab_index];
749 + if (tab_index < first_free_tab - 1)
750 + tab_index++;
751 + if (column >= next_tab_column)
752 + next_tab_column = column + 1;
753 + }
754 + else
755 + next_tab_column = column + tab_size - column % tab_size;
756 +
757 + while (column < next_tab_column)
758 + {
759 + putchar (' ');
760 + ++column;
761 + }
762 + }
763 + else /* Others. */
764 + {
765 + if (convert)
766 + {
767 + if (wc == L'\b')
768 + {
769 + if (column > 0)
770 + --column;
771 + }
772 + else
773 + {
774 + int width; /* The width of WC. */
775 +
776 + width = wcwidth (wc);
777 + column += (width > 0) ? width : 0;
778 + if (convert_entire_line == 0)
779 + convert = 0;
780 + }
781 + }
782 + fwrite (bufpos, sizeof(char), mblength, stdout);
783 + }
784 + }
785 + buflen -= mblength;
786 + bufpos += mblength;
787 + }
788 +}
789 +#endif
790 +
791 int
792 main (int argc, char **argv)
793 {
794 @@ -421,7 +574,12 @@ main (int argc, char **argv)
795
796 file_list = (optind < argc ? &argv[optind] : stdin_argv);
797
798 - expand ();
799 +#if HAVE_MBRTOWC
800 + if (MB_CUR_MAX > 1)
801 + expand_multibyte ();
802 + else
803 +#endif
804 + expand ();
805
806 if (have_read_stdin && fclose (stdin) != 0)
807 error (EXIT_FAILURE, errno, "-");
808 diff -urNp coreutils-8.0-orig/src/fold.c coreutils-8.0/src/fold.c
809 --- coreutils-8.0-orig/src/fold.c 2009-09-23 10:25:44.000000000 +0200
810 +++ coreutils-8.0/src/fold.c 2009-10-07 10:07:16.000000000 +0200
811 @@ -22,11 +22,33 @@
812 #include <getopt.h>
813 #include <sys/types.h>
814
815 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
816 +#if HAVE_WCHAR_H
817 +# include <wchar.h>
818 +#endif
819 +
820 +/* Get iswprint(), iswblank(), wcwidth(). */
821 +#if HAVE_WCTYPE_H
822 +# include <wctype.h>
823 +#endif
824 +
825 #include "system.h"
826 #include "error.h"
827 #include "quote.h"
828 #include "xstrtol.h"
829
830 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
831 + installation; work around this configuration error. */
832 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
833 +# undef MB_LEN_MAX
834 +# define MB_LEN_MAX 16
835 +#endif
836 +
837 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
838 +#if HAVE_MBRTOWC && defined mbstate_t
839 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
840 +#endif
841 +
842 #define TAB_WIDTH 8
843
844 /* The official name of this program (e.g., no `g' prefix). */
845 @@ -34,20 +56,41 @@
846
847 #define AUTHORS proper_name ("David MacKenzie")
848
849 +#define FATAL_ERROR(Message) \
850 + do \
851 + { \
852 + error (0, 0, (Message)); \
853 + usage (2); \
854 + } \
855 + while (0)
856 +
857 +enum operating_mode
858 +{
859 + /* Fold texts by columns that are at the given positions. */
860 + column_mode,
861 +
862 + /* Fold texts by bytes that are at the given positions. */
863 + byte_mode,
864 +
865 + /* Fold texts by characters that are at the given positions. */
866 + character_mode,
867 +};
868 +
869 +/* The argument shows current mode. (Default: column_mode) */
870 +static enum operating_mode operating_mode;
871 +
872 /* If nonzero, try to break on whitespace. */
873 static bool break_spaces;
874
875 -/* If nonzero, count bytes, not column positions. */
876 -static bool count_bytes;
877 -
878 /* If nonzero, at least one of the files we read was standard input. */
879 static bool have_read_stdin;
880
881 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
882 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
883
884 static struct option const longopts[] =
885 {
886 {"bytes", no_argument, NULL, 'b'},
887 + {"characters", no_argument, NULL, 'c'},
888 {"spaces", no_argument, NULL, 's'},
889 {"width", required_argument, NULL, 'w'},
890 {GETOPT_HELP_OPTION_DECL},
891 @@ -77,6 +120,7 @@ Mandatory arguments to long options are
892 "), stdout);
893 fputs (_("\
894 -b, --bytes count bytes rather than columns\n\
895 + -c, --characters count characters rather than columns\n\
896 -s, --spaces break at spaces\n\
897 -w, --width=WIDTH use WIDTH columns instead of 80\n\
898 "), stdout);
899 @@ -94,7 +138,7 @@ Mandatory arguments to long options are
900 static size_t
901 adjust_column (size_t column, char c)
902 {
903 - if (!count_bytes)
904 + if (operating_mode != byte_mode)
905 {
906 if (c == '\b')
907 {
908 @@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
909 to stdout, with maximum line length WIDTH.
910 Return true if successful. */
911
912 -static bool
913 -fold_file (char const *filename, size_t width)
914 +static void
915 +fold_text (FILE *istream, size_t width, int *saved_errno)
916 {
917 - FILE *istream;
918 int c;
919 size_t column = 0; /* Screen column where next char will go. */
920 size_t offset_out = 0; /* Index in `line_out' for next char. */
921 static char *line_out = NULL;
922 static size_t allocated_out = 0;
923 - int saved_errno;
924 -
925 - if (STREQ (filename, "-"))
926 - {
927 - istream = stdin;
928 - have_read_stdin = true;
929 - }
930 - else
931 - istream = fopen (filename, "r");
932 -
933 - if (istream == NULL)
934 - {
935 - error (0, errno, "%s", filename);
936 - return false;
937 - }
938
939 while ((c = getc (istream)) != EOF)
940 {
941 @@ -168,6 +196,15 @@ fold_file (char const *filename, size_t
942 bool found_blank = false;
943 size_t logical_end = offset_out;
944
945 + /* If LINE_OUT has no wide character,
946 + put a new wide character in LINE_OUT
947 + if column is bigger than width. */
948 + if (offset_out == 0)
949 + {
950 + line_out[offset_out++] = c;
951 + continue;
952 + }
953 +
954 /* Look for the last blank. */
955 while (logical_end)
956 {
957 @@ -214,11 +251,222 @@ fold_file (char const *filename, size_t
958 line_out[offset_out++] = c;
959 }
960
961 - saved_errno = errno;
962 + *saved_errno = errno;
963
964 if (offset_out)
965 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
966
967 +}
968 +
969 +#if HAVE_MBRTOWC
970 +static void
971 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
972 +{
973 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
974 + size_t buflen = 0; /* The length of the byte sequence in buf. */
975 + char *bufpos = NULL; /* Next read position of BUF. */
976 + wint_t wc; /* A gotten wide character. */
977 + size_t mblength; /* The byte size of a multibyte character which shows
978 + as same character as WC. */
979 + mbstate_t state, state_bak; /* State of the stream. */
980 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
981 +
982 + static char *line_out = NULL;
983 + size_t offset_out = 0; /* Index in `line_out' for next char. */
984 + static size_t allocated_out = 0;
985 +
986 + int increment;
987 + size_t column = 0;
988 +
989 + size_t last_blank_pos;
990 + size_t last_blank_column;
991 + int is_blank_seen;
992 + int last_blank_increment = 0;
993 + int is_bs_following_last_blank;
994 + size_t bs_following_last_blank_num;
995 + int is_cr_after_last_blank;
996 +
997 +#define CLEAR_FLAGS \
998 + do \
999 + { \
1000 + last_blank_pos = 0; \
1001 + last_blank_column = 0; \
1002 + is_blank_seen = 0; \
1003 + is_bs_following_last_blank = 0; \
1004 + bs_following_last_blank_num = 0; \
1005 + is_cr_after_last_blank = 0; \
1006 + } \
1007 + while (0)
1008 +
1009 +#define START_NEW_LINE \
1010 + do \
1011 + { \
1012 + putchar ('\n'); \
1013 + column = 0; \
1014 + offset_out = 0; \
1015 + CLEAR_FLAGS; \
1016 + } \
1017 + while (0)
1018 +
1019 + CLEAR_FLAGS;
1020 + memset (&state, '\0', sizeof(mbstate_t));
1021 +
1022 + for (;; bufpos += mblength, buflen -= mblength)
1023 + {
1024 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1025 + {
1026 + memmove (buf, bufpos, buflen);
1027 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1028 + bufpos = buf;
1029 + }
1030 +
1031 + if (buflen < 1)
1032 + break;
1033 +
1034 + /* Get a wide character. */
1035 + convfail = 0;
1036 + state_bak = state;
1037 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1038 +
1039 + switch (mblength)
1040 + {
1041 + case (size_t)-1:
1042 + case (size_t)-2:
1043 + convfail++;
1044 + state = state_bak;
1045 + /* Fall through. */
1046 +
1047 + case 0:
1048 + mblength = 1;
1049 + break;
1050 + }
1051 +
1052 +rescan:
1053 + if (operating_mode == byte_mode) /* byte mode */
1054 + increment = mblength;
1055 + else if (operating_mode == character_mode) /* character mode */
1056 + increment = 1;
1057 + else /* column mode */
1058 + {
1059 + if (convfail)
1060 + increment = 1;
1061 + else
1062 + {
1063 + switch (wc)
1064 + {
1065 + case L'\n':
1066 + fwrite (line_out, sizeof(char), offset_out, stdout);
1067 + START_NEW_LINE;
1068 + continue;
1069 +
1070 + case L'\b':
1071 + increment = (column > 0) ? -1 : 0;
1072 + break;
1073 +
1074 + case L'\r':
1075 + increment = -1 * column;
1076 + break;
1077 +
1078 + case L'\t':
1079 + increment = 8 - column % 8;
1080 + break;
1081 +
1082 + default:
1083 + increment = wcwidth (wc);
1084 + increment = (increment < 0) ? 0 : increment;
1085 + }
1086 + }
1087 + }
1088 +
1089 + if (column + increment > width && break_spaces && last_blank_pos)
1090 + {
1091 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1092 + putchar ('\n');
1093 +
1094 + offset_out = offset_out - last_blank_pos;
1095 + column = column - last_blank_column + ((is_cr_after_last_blank)
1096 + ? last_blank_increment : bs_following_last_blank_num);
1097 + memmove (line_out, line_out + last_blank_pos, offset_out);
1098 + CLEAR_FLAGS;
1099 + goto rescan;
1100 + }
1101 +
1102 + if (column + increment > width && column != 0)
1103 + {
1104 + fwrite (line_out, sizeof(char), offset_out, stdout);
1105 + START_NEW_LINE;
1106 + goto rescan;
1107 + }
1108 +
1109 + if (allocated_out < offset_out + mblength)
1110 + {
1111 + line_out = X2REALLOC (line_out, &allocated_out);
1112 + }
1113 +
1114 + memcpy (line_out + offset_out, bufpos, mblength);
1115 + offset_out += mblength;
1116 + column += increment;
1117 +
1118 + if (is_blank_seen && !convfail && wc == L'\r')
1119 + is_cr_after_last_blank = 1;
1120 +
1121 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1122 + ++bs_following_last_blank_num;
1123 + else
1124 + is_bs_following_last_blank = 0;
1125 +
1126 + if (break_spaces && !convfail && iswblank (wc))
1127 + {
1128 + last_blank_pos = offset_out;
1129 + last_blank_column = column;
1130 + is_blank_seen = 1;
1131 + last_blank_increment = increment;
1132 + is_bs_following_last_blank = 1;
1133 + bs_following_last_blank_num = 0;
1134 + is_cr_after_last_blank = 0;
1135 + }
1136 + }
1137 +
1138 + *saved_errno = errno;
1139 +
1140 + if (offset_out)
1141 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1142 +
1143 +}
1144 +#endif
1145 +
1146 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1147 + to stdout, with maximum line length WIDTH.
1148 + Return 0 if successful, 1 if an error occurs. */
1149 +
1150 +static bool
1151 +fold_file (char *filename, size_t width)
1152 +{
1153 + FILE *istream;
1154 + int saved_errno;
1155 +
1156 + if (STREQ (filename, "-"))
1157 + {
1158 + istream = stdin;
1159 + have_read_stdin = 1;
1160 + }
1161 + else
1162 + istream = fopen (filename, "r");
1163 +
1164 + if (istream == NULL)
1165 + {
1166 + error (0, errno, "%s", filename);
1167 + return 1;
1168 + }
1169 +
1170 + /* Define how ISTREAM is being folded. */
1171 +#if HAVE_MBRTOWC
1172 + if (MB_CUR_MAX > 1)
1173 + fold_multibyte_text (istream, width, &saved_errno);
1174 + else
1175 +#endif
1176 + fold_text (istream, width, &saved_errno);
1177 +
1178 if (ferror (istream))
1179 {
1180 error (0, saved_errno, "%s", filename);
1181 @@ -251,7 +499,8 @@ main (int argc, char **argv)
1182
1183 atexit (close_stdout);
1184
1185 - break_spaces = count_bytes = have_read_stdin = false;
1186 + operating_mode = column_mode;
1187 + break_spaces = have_read_stdin = false;
1188
1189 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1190 {
1191 @@ -260,7 +509,15 @@ main (int argc, char **argv)
1192 switch (optc)
1193 {
1194 case 'b': /* Count bytes rather than columns. */
1195 - count_bytes = true;
1196 + if (operating_mode != column_mode)
1197 + FATAL_ERROR (_("only one way of folding may be specified"));
1198 + operating_mode = byte_mode;
1199 + break;
1200 +
1201 + case 'c':
1202 + if (operating_mode != column_mode)
1203 + FATAL_ERROR (_("only one way of folding may be specified"));
1204 + operating_mode = character_mode;
1205 break;
1206
1207 case 's': /* Break at word boundaries. */
1208 diff -urNp coreutils-8.0-orig/src/join.c coreutils-8.0/src/join.c
1209 --- coreutils-8.0-orig/src/join.c 2009-09-23 10:25:44.000000000 +0200
1210 +++ coreutils-8.0/src/join.c 2009-10-07 10:07:16.000000000 +0200
1211 @@ -22,17 +22,31 @@
1212 #include <sys/types.h>
1213 #include <getopt.h>
1214
1215 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1216 +#if HAVE_WCHAR_H
1217 +# include <wchar.h>
1218 +#endif
1219 +
1220 +/* Get iswblank(), towupper. */
1221 +#if HAVE_WCTYPE_H
1222 +# include <wctype.h>
1223 +#endif
1224 +
1225 #include "system.h"
1226 #include "error.h"
1227 #include "hard-locale.h"
1228 #include "linebuffer.h"
1229 -#include "memcasecmp.h"
1230 #include "quote.h"
1231 #include "stdio--.h"
1232 #include "xmemcoll.h"
1233 #include "xstrtol.h"
1234 #include "argmatch.h"
1235
1236 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1237 +#if HAVE_MBRTOWC && defined mbstate_t
1238 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1239 +#endif
1240 +
1241 /* The official name of this program (e.g., no `g' prefix). */
1242 #define PROGRAM_NAME "join"
1243
1244 @@ -121,10 +135,12 @@ static struct outlist outlist_head;
1245 /* Last element in `outlist', where a new element can be added. */
1246 static struct outlist *outlist_end = &outlist_head;
1247
1248 -/* Tab character separating fields. If negative, fields are separated
1249 - by any nonempty string of blanks, otherwise by exactly one
1250 - tab character whose value (when cast to unsigned char) equals TAB. */
1251 -static int tab = -1;
1252 +/* Tab character separating fields. If NULL, fields are separated
1253 + by any nonempty string of blanks. */
1254 +static char *tab = NULL;
1255 +
1256 +/* The number of bytes used for tab. */
1257 +static size_t tablen = 0;
1258
1259 /* If nonzero, check that the input is correctly ordered. */
1260 static enum
1261 @@ -239,10 +255,11 @@ xfields (struct line *line)
1262 if (ptr == lim)
1263 return;
1264
1265 - if (0 <= tab)
1266 + if (tab != NULL)
1267 {
1268 + unsigned char t = tab[0];
1269 char *sep;
1270 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1271 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1272 extract_field (line, ptr, sep - ptr);
1273 }
1274 else
1275 @@ -269,6 +286,148 @@ xfields (struct line *line)
1276 extract_field (line, ptr, lim - ptr);
1277 }
1278
1279 +#if HAVE_MBRTOWC
1280 +static void
1281 +xfields_multibyte (struct line *line)
1282 +{
1283 + char *ptr = line->buf.buffer;
1284 + char const *lim = ptr + line->buf.length - 1;
1285 + wchar_t wc = 0;
1286 + size_t mblength = 1;
1287 + mbstate_t state, state_bak;
1288 +
1289 + memset (&state, 0, sizeof (mbstate_t));
1290 +
1291 + if (ptr >= lim)
1292 + return;
1293 +
1294 + if (tab != NULL)
1295 + {
1296 + unsigned char t = tab[0];
1297 + char *sep = ptr;
1298 + for (; ptr < lim; ptr = sep + mblength)
1299 + {
1300 + sep = ptr;
1301 + while (sep < lim)
1302 + {
1303 + state_bak = state;
1304 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1305 +
1306 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1307 + {
1308 + mblength = 1;
1309 + state = state_bak;
1310 + }
1311 + mblength = (mblength < 1) ? 1 : mblength;
1312 +
1313 + if (mblength == tablen && !memcmp (sep, tab, mblength))
1314 + break;
1315 + else
1316 + {
1317 + sep += mblength;
1318 + continue;
1319 + }
1320 + }
1321 +
1322 + if (sep >= lim)
1323 + break;
1324 +
1325 + extract_field (line, ptr, sep - ptr);
1326 + }
1327 + }
1328 + else
1329 + {
1330 + /* Skip leading blanks before the first field. */
1331 + while(ptr < lim)
1332 + {
1333 + state_bak = state;
1334 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1335 +
1336 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1337 + {
1338 + mblength = 1;
1339 + state = state_bak;
1340 + break;
1341 + }
1342 + mblength = (mblength < 1) ? 1 : mblength;
1343 +
1344 + if (!iswblank(wc))
1345 + break;
1346 + ptr += mblength;
1347 + }
1348 +
1349 + do
1350 + {
1351 + char *sep;
1352 + state_bak = state;
1353 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1354 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1355 + {
1356 + mblength = 1;
1357 + state = state_bak;
1358 + break;
1359 + }
1360 + mblength = (mblength < 1) ? 1 : mblength;
1361 +
1362 + sep = ptr + mblength;
1363 + while (sep < lim)
1364 + {
1365 + state_bak = state;
1366 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1367 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1368 + {
1369 + mblength = 1;
1370 + state = state_bak;
1371 + break;
1372 + }
1373 + mblength = (mblength < 1) ? 1 : mblength;
1374 +
1375 + if (iswblank (wc))
1376 + break;
1377 +
1378 + sep += mblength;
1379 + }
1380 +
1381 + extract_field (line, ptr, sep - ptr);
1382 + if (sep >= lim)
1383 + return;
1384 +
1385 + state_bak = state;
1386 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1387 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1388 + {
1389 + mblength = 1;
1390 + state = state_bak;
1391 + break;
1392 + }
1393 + mblength = (mblength < 1) ? 1 : mblength;
1394 +
1395 + ptr = sep + mblength;
1396 + while (ptr < lim)
1397 + {
1398 + state_bak = state;
1399 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1400 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1401 + {
1402 + mblength = 1;
1403 + state = state_bak;
1404 + break;
1405 + }
1406 + mblength = (mblength < 1) ? 1 : mblength;
1407 +
1408 + if (!iswblank (wc))
1409 + break;
1410 +
1411 + ptr += mblength;
1412 + }
1413 + }
1414 + while (ptr < lim);
1415 + }
1416 +
1417 + extract_field (line, ptr, lim - ptr);
1418 +}
1419 +#endif
1420 +
1421 static void
1422 freeline (struct line *line)
1423 {
1424 @@ -287,56 +446,115 @@ keycmp (struct line const *line1, struct
1425 size_t jf_1, size_t jf_2)
1426 {
1427 /* Start of field to compare in each file. */
1428 - char *beg1;
1429 - char *beg2;
1430 -
1431 - size_t len1;
1432 - size_t len2; /* Length of fields to compare. */
1433 + char *beg[2];
1434 + char *copy[2];
1435 + size_t len[2]; /* Length of fields to compare. */
1436 int diff;
1437 + int i, j;
1438
1439 if (jf_1 < line1->nfields)
1440 {
1441 - beg1 = line1->fields[jf_1].beg;
1442 - len1 = line1->fields[jf_1].len;
1443 + beg[0] = line1->fields[jf_1].beg;
1444 + len[0] = line1->fields[jf_1].len;
1445 }
1446 else
1447 {
1448 - beg1 = NULL;
1449 - len1 = 0;
1450 + beg[0] = NULL;
1451 + len[0] = 0;
1452 }
1453
1454 if (jf_2 < line2->nfields)
1455 {
1456 - beg2 = line2->fields[jf_2].beg;
1457 - len2 = line2->fields[jf_2].len;
1458 + beg[1] = line2->fields[jf_2].beg;
1459 + len[1] = line2->fields[jf_2].len;
1460 }
1461 else
1462 {
1463 - beg2 = NULL;
1464 - len2 = 0;
1465 + beg[1] = NULL;
1466 + len[1] = 0;
1467 }
1468
1469 - if (len1 == 0)
1470 - return len2 == 0 ? 0 : -1;
1471 - if (len2 == 0)
1472 + if (len[0] == 0)
1473 + return len[1] == 0 ? 0 : -1;
1474 + if (len[1] == 0)
1475 return 1;
1476
1477 if (ignore_case)
1478 {
1479 - /* FIXME: ignore_case does not work with NLS (in particular,
1480 - with multibyte chars). */
1481 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1482 +#ifdef HAVE_MBRTOWC
1483 + if (MB_CUR_MAX > 1)
1484 + {
1485 + size_t mblength;
1486 + wchar_t wc, uwc;
1487 + mbstate_t state, state_bak;
1488 +
1489 + memset (&state, '\0', sizeof (mbstate_t));
1490 +
1491 + for (i = 0; i < 2; i++)
1492 + {
1493 + copy[i] = alloca (len[i] + 1);
1494 +
1495 + for (j = 0; j < MIN (len[0], len[1]);)
1496 + {
1497 + state_bak = state;
1498 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1499 +
1500 + switch (mblength)
1501 + {
1502 + case (size_t) -1:
1503 + case (size_t) -2:
1504 + state = state_bak;
1505 + /* Fall through */
1506 + case 0:
1507 + mblength = 1;
1508 + break;
1509 +
1510 + default:
1511 + uwc = towupper (wc);
1512 +
1513 + if (uwc != wc)
1514 + {
1515 + mbstate_t state_wc;
1516 +
1517 + memset (&state_wc, '\0', sizeof (mbstate_t));
1518 + wcrtomb (copy[i] + j, uwc, &state_wc);
1519 + }
1520 + else
1521 + memcpy (copy[i] + j, beg[i] + j, mblength);
1522 + }
1523 + j += mblength;
1524 + }
1525 + copy[i][j] = '\0';
1526 + }
1527 + }
1528 + else
1529 +#endif
1530 + {
1531 + for (i = 0; i < 2; i++)
1532 + {
1533 + copy[i] = alloca (len[i] + 1);
1534 +
1535 + for (j = 0; j < MIN (len[0], len[1]); j++)
1536 + copy[i][j] = toupper (beg[i][j]);
1537 +
1538 + copy[i][j] = '\0';
1539 + }
1540 + }
1541 }
1542 else
1543 {
1544 - if (hard_LC_COLLATE)
1545 - return xmemcoll (beg1, len1, beg2, len2);
1546 - diff = memcmp (beg1, beg2, MIN (len1, len2));
1547 + copy[0] = (unsigned char *) beg[0];
1548 + copy[1] = (unsigned char *) beg[1];
1549 }
1550
1551 + if (hard_LC_COLLATE)
1552 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1553 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1554 +
1555 +
1556 if (diff)
1557 return diff;
1558 - return len1 < len2 ? -1 : len1 != len2;
1559 + return len[0] - len[1];
1560 }
1561
1562 /* Check that successive input lines PREV and CURRENT from input file
1563 @@ -417,6 +635,11 @@ get_line (FILE *fp, struct line **linep,
1564 return false;
1565 }
1566
1567 +#if HAVE_MBRTOWC
1568 + if (MB_CUR_MAX > 1)
1569 + xfields_multibyte (line);
1570 + else
1571 +#endif
1572 xfields (line);
1573
1574 if (prevline[which - 1])
1575 @@ -518,11 +741,18 @@ prfield (size_t n, struct line const *li
1576
1577 /* Print the join of LINE1 and LINE2. */
1578
1579 +#define PUT_TAB_CHAR \
1580 + do \
1581 + { \
1582 + (tab != NULL) ? \
1583 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1584 + } \
1585 + while (0)
1586 +
1587 static void
1588 prjoin (struct line const *line1, struct line const *line2)
1589 {
1590 const struct outlist *outlist;
1591 - char output_separator = tab < 0 ? ' ' : tab;
1592
1593 outlist = outlist_head.next;
1594 if (outlist)
1595 @@ -557,7 +787,7 @@ prjoin (struct line const *line1, struct
1596 o = o->next;
1597 if (o == NULL)
1598 break;
1599 - putchar (output_separator);
1600 + PUT_TAB_CHAR;
1601 }
1602 putchar ('\n');
1603 }
1604 @@ -575,23 +805,23 @@ prjoin (struct line const *line1, struct
1605 prfield (join_field_1, line1);
1606 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
1607 {
1608 - putchar (output_separator);
1609 + PUT_TAB_CHAR;
1610 prfield (i, line1);
1611 }
1612 for (i = join_field_1 + 1; i < line1->nfields; ++i)
1613 {
1614 - putchar (output_separator);
1615 + PUT_TAB_CHAR;
1616 prfield (i, line1);
1617 }
1618
1619 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
1620 {
1621 - putchar (output_separator);
1622 + PUT_TAB_CHAR;
1623 prfield (i, line2);
1624 }
1625 for (i = join_field_2 + 1; i < line2->nfields; ++i)
1626 {
1627 - putchar (output_separator);
1628 + PUT_TAB_CHAR;
1629 prfield (i, line2);
1630 }
1631 putchar ('\n');
1632 @@ -1022,20 +1252,41 @@ main (int argc, char **argv)
1633
1634 case 't':
1635 {
1636 - unsigned char newtab = optarg[0];
1637 - if (! newtab)
1638 + char *newtab;
1639 + size_t newtablen;
1640 + if (! optarg[0])
1641 error (EXIT_FAILURE, 0, _("empty tab"));
1642 - if (optarg[1])
1643 + newtab = xstrdup (optarg);
1644 +#if HAVE_MBRTOWC
1645 + if (MB_CUR_MAX > 1)
1646 + {
1647 + mbstate_t state;
1648 +
1649 + memset (&state, 0, sizeof (mbstate_t));
1650 + newtablen = mbrtowc (NULL, newtab,
1651 + strnlen (newtab, MB_LEN_MAX),
1652 + &state);
1653 + if (newtablen == (size_t) 0
1654 + || newtablen == (size_t) -1
1655 + || newtablen == (size_t) -2)
1656 + newtablen = 1;
1657 + }
1658 + else
1659 +#endif
1660 + newtablen = 1;
1661 +
1662 + if (newtablen == 1 && newtab[1])
1663 + {
1664 + if (STREQ (newtab, "\\0"))
1665 + newtab[0] = '\0';
1666 + }
1667 + if (tab != NULL && strcmp (tab, newtab))
1668 {
1669 - if (STREQ (optarg, "\\0"))
1670 - newtab = '\0';
1671 - else
1672 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1673 - quote (optarg));
1674 + free (newtab);
1675 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
1676 }
1677 - if (0 <= tab && tab != newtab)
1678 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
1679 tab = newtab;
1680 + tablen = newtablen;
1681 }
1682 break;
1683
1684 diff -urNp coreutils-8.0-orig/src/pr.c coreutils-8.0/src/pr.c
1685 --- coreutils-8.0-orig/src/pr.c 2009-09-29 15:27:54.000000000 +0200
1686 +++ coreutils-8.0/src/pr.c 2009-10-07 10:07:16.000000000 +0200
1687 @@ -312,6 +312,32 @@
1688
1689 #include <getopt.h>
1690 #include <sys/types.h>
1691 +
1692 +/* Get MB_LEN_MAX. */
1693 +#include <limits.h>
1694 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1695 + installation; work around this configuration error. */
1696 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1697 +# define MB_LEN_MAX 16
1698 +#endif
1699 +
1700 +/* Get MB_CUR_MAX. */
1701 +#include <stdlib.h>
1702 +
1703 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1704 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1705 +#if HAVE_WCHAR_H
1706 +# include <wchar.h>
1707 +#endif
1708 +
1709 +/* Get iswprint(). -- for wcwidth(). */
1710 +#if HAVE_WCTYPE_H
1711 +# include <wctype.h>
1712 +#endif
1713 +#if !defined iswprint && !HAVE_ISWPRINT
1714 +# define iswprint(wc) 1
1715 +#endif
1716 +
1717 #include "system.h"
1718 #include "error.h"
1719 #include "hard-locale.h"
1720 @@ -322,6 +348,18 @@
1721 #include "strftime.h"
1722 #include "xstrtol.h"
1723
1724 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1725 +#if HAVE_MBRTOWC && defined mbstate_t
1726 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1727 +#endif
1728 +
1729 +#ifndef HAVE_DECL_WCWIDTH
1730 +"this configure-time declaration test was not run"
1731 +#endif
1732 +#if !HAVE_DECL_WCWIDTH
1733 +extern int wcwidth ();
1734 +#endif
1735 +
1736 /* The official name of this program (e.g., no `g' prefix). */
1737 #define PROGRAM_NAME "pr"
1738
1739 @@ -414,7 +452,20 @@ struct COLUMN
1740
1741 typedef struct COLUMN COLUMN;
1742
1743 -static int char_to_clump (char c);
1744 +/* Funtion pointers to switch functions for single byte locale or for
1745 + multibyte locale. If multibyte functions do not exist in your sysytem,
1746 + these pointers always point the function for single byte locale. */
1747 +static void (*print_char) (char c);
1748 +static int (*char_to_clump) (char c);
1749 +
1750 +/* Functions for single byte locale. */
1751 +static void print_char_single (char c);
1752 +static int char_to_clump_single (char c);
1753 +
1754 +/* Functions for multibyte locale. */
1755 +static void print_char_multi (char c);
1756 +static int char_to_clump_multi (char c);
1757 +
1758 static bool read_line (COLUMN *p);
1759 static bool print_page (void);
1760 static bool print_stored (COLUMN *p);
1761 @@ -424,6 +475,7 @@ static void print_header (void);
1762 static void pad_across_to (int position);
1763 static void add_line_number (COLUMN *p);
1764 static void getoptarg (char *arg, char switch_char, char *character,
1765 + int *character_length, int *character_width,
1766 int *number);
1767 void usage (int status);
1768 static void print_files (int number_of_files, char **av);
1769 @@ -438,7 +490,6 @@ static void store_char (char c);
1770 static void pad_down (int lines);
1771 static void read_rest_of_line (COLUMN *p);
1772 static void skip_read (COLUMN *p, int column_number);
1773 -static void print_char (char c);
1774 static void cleanup (void);
1775 static void print_sep_string (void);
1776 static void separator_string (const char *optarg_S);
1777 @@ -450,7 +501,7 @@ static COLUMN *column_vector;
1778 we store the leftmost columns contiguously in buff.
1779 To print a line from buff, get the index of the first character
1780 from line_vector[i], and print up to line_vector[i + 1]. */
1781 -static char *buff;
1782 +static unsigned char *buff;
1783
1784 /* Index of the position in buff where the next character
1785 will be stored. */
1786 @@ -554,7 +605,7 @@ static int chars_per_column;
1787 static bool untabify_input = false;
1788
1789 /* (-e) The input tab character. */
1790 -static char input_tab_char = '\t';
1791 +static char input_tab_char[MB_LEN_MAX] = "\t";
1792
1793 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1794 where the leftmost column is 1. */
1795 @@ -564,7 +615,10 @@ static int chars_per_input_tab = 8;
1796 static bool tabify_output = false;
1797
1798 /* (-i) The output tab character. */
1799 -static char output_tab_char = '\t';
1800 +static char output_tab_char[MB_LEN_MAX] = "\t";
1801 +
1802 +/* (-i) The byte length of output tab character. */
1803 +static int output_tab_char_length = 1;
1804
1805 /* (-i) The width of the output tab. */
1806 static int chars_per_output_tab = 8;
1807 @@ -638,7 +692,13 @@ static int power_10;
1808 static bool numbered_lines = false;
1809
1810 /* (-n) Character which follows each line number. */
1811 -static char number_separator = '\t';
1812 +static char number_separator[MB_LEN_MAX] = "\t";
1813 +
1814 +/* (-n) The byte length of the character which follows each line number. */
1815 +static int number_separator_length = 1;
1816 +
1817 +/* (-n) The character width of the character which follows each line number. */
1818 +static int number_separator_width = 0;
1819
1820 /* (-n) line counting starts with 1st line of input file (not with 1st
1821 line of 1st page printed). */
1822 @@ -691,6 +751,7 @@ static bool use_col_separator = false;
1823 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
1824 static char *col_sep_string = (char *) "";
1825 static int col_sep_length = 0;
1826 +static int col_sep_width = 0;
1827 static char *column_separator = (char *) " ";
1828 static char *line_separator = (char *) "\t";
1829
1830 @@ -847,6 +908,13 @@ separator_string (const char *optarg_S)
1831 col_sep_length = (int) strlen (optarg_S);
1832 col_sep_string = xmalloc (col_sep_length + 1);
1833 strcpy (col_sep_string, optarg_S);
1834 +
1835 +#if HAVE_MBRTOWC
1836 + if (MB_CUR_MAX > 1)
1837 + col_sep_width = mbswidth (col_sep_string, 0);
1838 + else
1839 +#endif
1840 + col_sep_width = col_sep_length;
1841 }
1842
1843 int
1844 @@ -871,6 +939,21 @@ main (int argc, char **argv)
1845
1846 atexit (close_stdout);
1847
1848 +/* Define which functions are used, the ones for single byte locale or the ones
1849 + for multibyte locale. */
1850 +#if HAVE_MBRTOWC
1851 + if (MB_CUR_MAX > 1)
1852 + {
1853 + print_char = print_char_multi;
1854 + char_to_clump = char_to_clump_multi;
1855 + }
1856 + else
1857 +#endif
1858 + {
1859 + print_char = print_char_single;
1860 + char_to_clump = char_to_clump_single;
1861 + }
1862 +
1863 n_files = 0;
1864 file_names = (argc > 1
1865 ? xmalloc ((argc - 1) * sizeof (char *))
1866 @@ -947,8 +1030,12 @@ main (int argc, char **argv)
1867 break;
1868 case 'e':
1869 if (optarg)
1870 - getoptarg (optarg, 'e', &input_tab_char,
1871 - &chars_per_input_tab);
1872 + {
1873 + int dummy_length, dummy_width;
1874 +
1875 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1876 + &dummy_width, &chars_per_input_tab);
1877 + }
1878 /* Could check tab width > 0. */
1879 untabify_input = true;
1880 break;
1881 @@ -961,8 +1048,12 @@ main (int argc, char **argv)
1882 break;
1883 case 'i':
1884 if (optarg)
1885 - getoptarg (optarg, 'i', &output_tab_char,
1886 - &chars_per_output_tab);
1887 + {
1888 + int dummy_width;
1889 +
1890 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1891 + &dummy_width, &chars_per_output_tab);
1892 + }
1893 /* Could check tab width > 0. */
1894 tabify_output = true;
1895 break;
1896 @@ -989,8 +1080,8 @@ main (int argc, char **argv)
1897 case 'n':
1898 numbered_lines = true;
1899 if (optarg)
1900 - getoptarg (optarg, 'n', &number_separator,
1901 - &chars_per_number);
1902 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
1903 + &number_separator_width, &chars_per_number);
1904 break;
1905 case 'N':
1906 skip_count = false;
1907 @@ -1029,7 +1120,7 @@ main (int argc, char **argv)
1908 old_s = false;
1909 /* Reset an additional input of -s, -S dominates -s */
1910 col_sep_string = bad_cast ("");
1911 - col_sep_length = 0;
1912 + col_sep_length = col_sep_width = 0;
1913 use_col_separator = true;
1914 if (optarg)
1915 separator_string (optarg);
1916 @@ -1186,10 +1277,45 @@ main (int argc, char **argv)
1917 a number. */
1918
1919 static void
1920 -getoptarg (char *arg, char switch_char, char *character, int *number)
1921 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
1922 + int *character_width, int *number)
1923 {
1924 if (!ISDIGIT (*arg))
1925 - *character = *arg++;
1926 + {
1927 +#ifdef HAVE_MBRTOWC
1928 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
1929 + {
1930 + wchar_t wc;
1931 + size_t mblength;
1932 + int width;
1933 + mbstate_t state = {'\0'};
1934 +
1935 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1936 +
1937 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1938 + {
1939 + *character_length = 1;
1940 + *character_width = 1;
1941 + }
1942 + else
1943 + {
1944 + *character_length = (mblength < 1) ? 1 : mblength;
1945 + width = wcwidth (wc);
1946 + *character_width = (width < 0) ? 0 : width;
1947 + }
1948 +
1949 + strncpy (character, arg, *character_length);
1950 + arg += *character_length;
1951 + }
1952 + else /* for single byte locale. */
1953 +#endif
1954 + {
1955 + *character = *arg++;
1956 + *character_length = 1;
1957 + *character_width = 1;
1958 + }
1959 + }
1960 +
1961 if (*arg)
1962 {
1963 long int tmp_long;
1964 @@ -1248,7 +1374,7 @@ init_parameters (int number_of_files)
1965 else
1966 col_sep_string = column_separator;
1967
1968 - col_sep_length = 1;
1969 + col_sep_length = col_sep_width = 1;
1970 use_col_separator = true;
1971 }
1972 /* It's rather pointless to define a TAB separator with column
1973 @@ -1279,11 +1405,11 @@ init_parameters (int number_of_files)
1974 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
1975
1976 /* Estimate chars_per_text without any margin and keep it constant. */
1977 - if (number_separator == '\t')
1978 + if (number_separator[0] == '\t')
1979 number_width = chars_per_number +
1980 TAB_WIDTH (chars_per_default_tab, chars_per_number);
1981 else
1982 - number_width = chars_per_number + 1;
1983 + number_width = chars_per_number + number_separator_width;
1984
1985 /* The number is part of the column width unless we are
1986 printing files in parallel. */
1987 @@ -1298,7 +1424,7 @@ init_parameters (int number_of_files)
1988 }
1989
1990 chars_per_column = (chars_per_line - chars_used_by_number -
1991 - (columns - 1) * col_sep_length) / columns;
1992 + (columns - 1) * col_sep_width) / columns;
1993
1994 if (chars_per_column < 1)
1995 error (EXIT_FAILURE, 0, _("page width too narrow"));
1996 @@ -1423,7 +1549,7 @@ init_funcs (void)
1997
1998 /* Enlarge p->start_position of first column to use the same form of
1999 padding_not_printed with all columns. */
2000 - h = h + col_sep_length;
2001 + h = h + col_sep_width;
2002
2003 /* This loop takes care of all but the rightmost column. */
2004
2005 @@ -1457,7 +1583,7 @@ init_funcs (void)
2006 }
2007 else
2008 {
2009 - h = h_next + col_sep_length;
2010 + h = h_next + col_sep_width;
2011 h_next = h + chars_per_column;
2012 }
2013 }
2014 @@ -1747,9 +1873,9 @@ static void
2015 align_column (COLUMN *p)
2016 {
2017 padding_not_printed = p->start_position;
2018 - if (padding_not_printed - col_sep_length > 0)
2019 + if (padding_not_printed - col_sep_width > 0)
2020 {
2021 - pad_across_to (padding_not_printed - col_sep_length);
2022 + pad_across_to (padding_not_printed - col_sep_width);
2023 padding_not_printed = ANYWHERE;
2024 }
2025
2026 @@ -2020,13 +2146,13 @@ store_char (char c)
2027 /* May be too generous. */
2028 buff = X2REALLOC (buff, &buff_allocated);
2029 }
2030 - buff[buff_current++] = c;
2031 + buff[buff_current++] = (unsigned char) c;
2032 }
2033
2034 static void
2035 add_line_number (COLUMN *p)
2036 {
2037 - int i;
2038 + int i, j;
2039 char *s;
2040 int left_cut;
2041