/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.42 - (hide annotations) (download) (as text)
Wed Nov 18 14:47:59 2009 UTC (4 days, 14 hours ago) by ovasik
Branch: MAIN
CVS Tags: HEAD
Changes since 1.41: +2321 -13698 lines
File MIME type: text/x-patch
remove accidently added .orig files from patches :(
1 ovasik 1.41 diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
2     --- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
3     +++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
4     @@ -21,6 +21,11 @@
5 twaugh 1.20
6     # include <stdio.h>
7    
8     +/* Get mbstate_t. */
9     +# if HAVE_WCHAR_H
10     +# include <wchar.h>
11     +# endif
12     +
13     /* A `struct linebuffer' holds a line of text. */
14    
15     struct linebuffer
16 ovasik 1.41 @@ -28,6 +33,9 @@ struct linebuffer
17 twaugh 1.20 size_t size; /* Allocated. */
18     size_t length; /* Used. */
19     char *buffer;
20     +# if HAVE_WCHAR_H
21     + mbstate_t state;
22     +# endif
23     };
24    
25     /* Initialize linebuffer LINEBUFFER for use. */
26 ovasik 1.41 diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
27     --- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
28     +++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
29     @@ -28,6 +28,11 @@
30     #include <assert.h>
31 cvsdist 1.1 #include <getopt.h>
32     #include <sys/types.h>
33     +
34 ovasik 1.41 +/* Get mbstate_t, mbrtowc(). */
35 cvsdist 1.1 +#if HAVE_WCHAR_H
36     +# include <wchar.h>
37     +#endif
38     #include "system.h"
39 ovasik 1.41
40 cvsdist 1.1 #include "error.h"
41 ovasik 1.41 @@ -36,6 +41,18 @@
42 cvsdist 1.1 #include "quote.h"
43     #include "xstrndup.h"
44    
45     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46 ovasik 1.41 + installation; work around this configuration error. */
47 cvsdist 1.1 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
48 ovasik 1.41 +# undef MB_LEN_MAX
49 cvsdist 1.1 +# define MB_LEN_MAX 16
50     +#endif
51     +
52     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53     +#if HAVE_MBRTOWC && defined mbstate_t
54     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
55     +#endif
56     +
57     /* The official name of this program (e.g., no `g' prefix). */
58 ovasik 1.41 #define PROGRAM_NAME "cut"
59 cvsdist 1.1
60 ovasik 1.41 @@ -71,6 +88,52 @@
61     } \
62     while (0)
63 cvsdist 1.1
64 ovasik 1.41 +/* Refill the buffer BUF to get a multibyte character. */
65     +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
66     + do \
67     + { \
68     + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
69     + { \
70     + memmove (BUF, BUFPOS, BUFLEN); \
71     + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
72     + BUFPOS = BUF; \
73     + } \
74     + } \
75     + while (0)
76 twaugh 1.16 +
77 ovasik 1.41 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
78     + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
79     +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
80     + do \
81     + { \
82     + mbstate_t state_bak; \
83     + \
84     + if (BUFLEN < 1) \
85     + { \
86     + WC = WEOF; \
87     + break; \
88     + } \
89     + \
90     + /* Get a wide character. */ \
91     + CONVFAIL = 0; \
92     + state_bak = STATE; \
93     + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
94     + \
95     + switch (MBLENGTH) \
96     + { \
97     + case (size_t)-1: \
98     + case (size_t)-2: \
99     + CONVFAIL++; \
100     + STATE = state_bak; \
101     + /* Fall througn. */ \
102     + \
103     + case 0: \
104     + MBLENGTH = 1; \
105     + break; \
106     + } \
107     + } \
108     + while (0)
109 cvsdist 1.1 +
110 ovasik 1.41 struct range_pair
111     {
112     size_t lo;
113     @@ -89,7 +152,7 @@ static char *field_1_buffer;
114     /* The number of bytes allocated for FIELD_1_BUFFER. */
115     static size_t field_1_bufsize;
116    
117     -/* The largest field or byte index used as an endpoint of a closed
118     +/* The largest byte, character or field index used as an endpoint of a closed
119     or degenerate range specification; this doesn't include the starting
120     index of right-open-ended ranges. For example, with either range spec
121     `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
122     @@ -101,10 +164,11 @@ static size_t eol_range_start;
123    
124     /* This is a bit vector.
125     In byte mode, which bytes to output.
126     + In character mode, which characters to output.
127     In field mode, which DELIM-separated fields to output.
128     - Both bytes and fields are numbered starting with 1,
129     + Bytes, characters and fields are numbered starting with 1,
130     so the zeroth bit of this array is unused.
131     - A field or byte K has been selected if
132     + A byte, character or field K has been selected if
133     (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134     || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135     static unsigned char *printable_field;
136     @@ -113,15 +177,25 @@ enum operating_mode
137     {
138     undefined_mode,
139    
140     - /* Output characters that are in the given bytes. */
141     + /* Output bytes that are at the given positions. */
142     byte_mode,
143    
144     + /* Output characters that are at the given positions. */
145     + character_mode,
146 cvsdist 1.1 +
147 ovasik 1.41 /* Output the given delimeter-separated fields. */
148     field_mode
149     };
150    
151     static enum operating_mode operating_mode;
152    
153     +/* If nonzero, when in byte mode, don't split multibyte characters. */
154     +static int byte_mode_character_aware;
155 twaugh 1.16 +
156 ovasik 1.41 +/* If nonzero, the function for single byte locale is work
157     + if this program runs on multibyte locale. */
158     +static int force_singlebyte_mode;
159     +
160     /* If true do not output lines containing no delimeter characters.
161     Otherwise, all such lines are printed. This option is valid only
162     with field mode. */
163     @@ -133,6 +207,9 @@ static bool complement;
164    
165     /* The delimeter character for field mode. */
166     static unsigned char delim;
167     +#if HAVE_WCHAR_H
168     +static wchar_t wcdelim;
169     +#endif
170    
171     /* True if the --output-delimiter=STRING option was specified. */
172     static bool output_delimiter_specified;
173     @@ -206,7 +283,7 @@ Mandatory arguments to long options are
174     -f, --fields=LIST select only these fields; also print any line\n\
175     that contains no delimiter character, unless\n\
176     the -s option is specified\n\
177     - -n (ignored)\n\
178     + -n with -b: don't split multibyte characters\n\
179     "), stdout);
180     fputs (_("\
181     --complement complement the set of selected bytes, characters\n\
182     @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
183     in_digits = false;
184     /* Starting a range. */
185     if (dash_found)
186     - FATAL_ERROR (_("invalid byte or field list"));
187     + FATAL_ERROR (_("invalid byte, character or field list"));
188     dash_found = true;
189     fieldstr++;
190    
191     @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
192     if (!rhs_specified)
193     {
194     /* `n-'. From `initial' to end of line. */
195     - eol_range_start = initial;
196     + if (eol_range_start == 0 ||
197     + (eol_range_start != 0 && eol_range_start > initial))
198     + eol_range_start = initial;
199     field_found = true;
200     }
201     else
202     {
203     /* `m-n' or `-n' (1-n). */
204     if (value < initial)
205     - FATAL_ERROR (_("invalid decreasing range"));
206     + FATAL_ERROR (_("invalid byte, character or field list"));
207    
208     /* Is there already a range going to end of line? */
209     if (eol_range_start != 0)
210     @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
211     if (operating_mode == byte_mode)
212     error (0, 0,
213     _("byte offset %s is too large"), quote (bad_num));
214     + else if (operating_mode == character_mode)
215     + error (0, 0,
216     + _("character offset %s is too large"), quote (bad_num));
217     else
218     error (0, 0,
219     _("field number %s is too large"), quote (bad_num));
220     @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
221     fieldstr++;
222     }
223     else
224     - FATAL_ERROR (_("invalid byte or field list"));
225     + FATAL_ERROR (_("invalid byte, character or field list"));
226     }
227    
228     max_range_endpoint = 0;
229     @@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
230     }
231     }
232    
233     +#if HAVE_MBRTOWC
234     +/* This function is in use for the following case.
235 twaugh 1.16 +
236 ovasik 1.41 + 1. Read from the stream STREAM, printing to standard output any selected
237     + characters.
238 cvsdist 1.1 +
239 ovasik 1.41 + 2. Read from stream STREAM, printing to standard output any selected bytes,
240     + without splitting multibyte characters. */
241     +
242     +static void
243     +cut_characters_or_cut_bytes_no_split (FILE *stream)
244     +{
245     + int idx; /* number of bytes or characters in the line so far. */
246     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247     + char *bufpos; /* Next read position of BUF. */
248     + size_t buflen; /* The length of the byte sequence in buf. */
249     + wint_t wc; /* A gotten wide character. */
250     + size_t mblength; /* The byte size of a multibyte character which shows
251     + as same character as WC. */
252     + mbstate_t state; /* State of the stream. */
253     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
254 cvsdist 1.1 +
255 ovasik 1.41 + idx = 0;
256     + buflen = 0;
257     + bufpos = buf;
258     + memset (&state, '\0', sizeof(mbstate_t));
259 cvsdist 1.1 +
260 ovasik 1.41 + while (1)
261     + {
262     + REFILL_BUFFER (buf, bufpos, buflen, stream);
263 cvsdist 1.1 +
264 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
265 twaugh 1.16 +
266 ovasik 1.41 + if (wc == WEOF)
267     + {
268     + if (idx > 0)
269     + putchar ('\n');
270     + break;
271     + }
272     + else if (wc == L'\n')
273     + {
274     + putchar ('\n');
275     + idx = 0;
276     + }
277     + else
278     + {
279     + idx += (operating_mode == byte_mode) ? mblength : 1;
280     + if (print_kth (idx, NULL))
281     + fwrite (bufpos, mblength, sizeof(char), stdout);
282     + }
283 cvsdist 1.1 +
284 twaugh 1.16 + buflen -= mblength;
285     + bufpos += mblength;
286 cvsdist 1.1 + }
287     +}
288     +#endif
289 ovasik 1.41 +
290     /* Read from stream STREAM, printing to standard output any selected fields. */
291 twaugh 1.16
292 ovasik 1.41 static void
293     @@ -701,13 +840,192 @@ cut_fields (FILE *stream)
294     }
295     }
296 twaugh 1.16
297 cvsdist 1.1 +#if HAVE_MBRTOWC
298 ovasik 1.41 +static void
299     +cut_fields_mb (FILE *stream)
300     +{
301     + int c;
302     + unsigned int field_idx;
303     + int found_any_selected_field;
304     + int buffer_first_field;
305     + int empty_input;
306     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
307     + char *bufpos; /* Next read position of BUF. */
308     + size_t buflen; /* The length of the byte sequence in buf. */
309     + wint_t wc = 0; /* A gotten wide character. */
310     + size_t mblength; /* The byte size of a multibyte character which shows
311     + as same character as WC. */
312     + mbstate_t state; /* State of the stream. */
313     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
314     +
315     + found_any_selected_field = 0;
316     + field_idx = 1;
317     + bufpos = buf;
318     + buflen = 0;
319     + memset (&state, '\0', sizeof(mbstate_t));
320     +
321     + c = getc (stream);
322     + empty_input = (c == EOF);
323     + if (c != EOF)
324     + ungetc (c, stream);
325 twaugh 1.16 + else
326 ovasik 1.41 + wc = WEOF;
327 cvsdist 1.1 +
328 ovasik 1.41 + /* To support the semantics of the -s flag, we may have to buffer
329     + all of the first field to determine whether it is `delimited.'
330     + But that is unnecessary if all non-delimited lines must be printed
331     + and the first field has been selected, or if non-delimited lines
332     + must be suppressed and the first field has *not* been selected.
333     + That is because a non-delimited line has exactly one field. */
334     + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
335 cvsdist 1.1 +
336 ovasik 1.41 + while (1)
337     + {
338     + if (field_idx == 1 && buffer_first_field)
339     + {
340     + int len = 0;
341 cvsdist 1.1 +
342 ovasik 1.41 + while (1)
343     + {
344     + REFILL_BUFFER (buf, bufpos, buflen, stream);
345 cvsdist 1.1 +
346 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER
347     + (wc, bufpos, buflen, mblength, state, convfail);
348 cvsdist 1.1 +
349 ovasik 1.41 + if (wc == WEOF)
350     + break;
351 cvsdist 1.1 +
352 ovasik 1.41 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
353     + memcpy (field_1_buffer + len, bufpos, mblength);
354     + len += mblength;
355     + buflen -= mblength;
356     + bufpos += mblength;
357 cvsdist 1.1 +
358 ovasik 1.41 + if (!convfail && (wc == L'\n' || wc == wcdelim))
359     + break;
360     + }
361 twaugh 1.20 +
362 ovasik 1.41 + if (wc == WEOF)
363     + break;
364 twaugh 1.20 +
365 ovasik 1.41 + /* If the first field extends to the end of line (it is not
366     + delimited) and we are printing all non-delimited lines,
367     + print this one. */
368     + if (convfail || (!convfail && wc != wcdelim))
369     + {
370     + if (suppress_non_delimited)
371     + {
372     + /* Empty. */
373     + }
374     + else
375     + {
376     + fwrite (field_1_buffer, sizeof (char), len, stdout);
377     + /* Make sure the output line is newline terminated. */
378     + if (convfail || (!convfail && wc != L'\n'))
379     + putchar ('\n');
380     + }
381     + continue;
382     + }
383 twaugh 1.20 +
384 ovasik 1.41 + if (print_kth (1, NULL))
385     + {
386     + /* Print the field, but not the trailing delimiter. */
387     + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
388     + found_any_selected_field = 1;
389     + }
390     + ++field_idx;
391     + }
392 twaugh 1.20 +
393 ovasik 1.41 + if (wc != WEOF)
394     + {
395     + if (print_kth (field_idx, NULL))
396     + {
397     + if (found_any_selected_field)
398     + {
399     + fwrite (output_delimiter_string, sizeof (char),
400     + output_delimiter_length, stdout);
401     + }
402     + found_any_selected_field = 1;
403     + }
404 twaugh 1.20 +
405 ovasik 1.41 + while (1)
406     + {
407     + REFILL_BUFFER (buf, bufpos, buflen, stream);
408 twaugh 1.20 +
409 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER
410     + (wc, bufpos, buflen, mblength, state, convfail);
411 cvsdist 1.1 +
412 ovasik 1.41 + if (wc == WEOF)
413     + break;
414     + else if (!convfail && (wc == wcdelim || wc == L'\n'))
415     + {
416     + buflen -= mblength;
417     + bufpos += mblength;
418     + break;
419     + }
420 cvsdist 1.1 +
421 ovasik 1.41 + if (print_kth (field_idx, NULL))
422     + fwrite (bufpos, mblength, sizeof(char), stdout);
423 twaugh 1.20 +
424 ovasik 1.41 + buflen -= mblength;
425     + bufpos += mblength;
426     + }
427     + }
428 cvsdist 1.1 +
429 ovasik 1.41 + if ((!convfail || wc == L'\n') && buflen < 1)
430     + wc = WEOF;
431 cvsdist 1.1 +
432 ovasik 1.41 + if (!convfail && wc == wcdelim)
433     + ++field_idx;
434     + else if (wc == WEOF || (!convfail && wc == L'\n'))
435     + {
436     + if (found_any_selected_field
437     + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
438     + putchar ('\n');
439     + if (wc == WEOF)
440     + break;
441     + field_idx = 1;
442     + found_any_selected_field = 0;
443     + }
444     + }
445     +}
446     +#endif
447 cvsdist 1.1 +
448 ovasik 1.41 static void
449     cut_stream (FILE *stream)
450     {
451     - if (operating_mode == byte_mode)
452     - cut_bytes (stream);
453     +#if HAVE_MBRTOWC
454     + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
455     + {
456     + switch (operating_mode)
457     + {
458     + case byte_mode:
459     + if (byte_mode_character_aware)
460     + cut_characters_or_cut_bytes_no_split (stream);
461     + else
462     + cut_bytes (stream);
463     + break;
464 twaugh 1.20 +
465 ovasik 1.41 + case character_mode:
466     + cut_characters_or_cut_bytes_no_split (stream);
467     + break;
468 twaugh 1.20 +
469 ovasik 1.41 + case field_mode:
470     + cut_fields_mb (stream);
471     + break;
472 twaugh 1.20 +
473 ovasik 1.41 + default:
474     + abort ();
475     + }
476 twaugh 1.20 + }
477 ovasik 1.41 else
478     - cut_fields (stream);
479 cvsdist 1.1 +#endif
480 ovasik 1.41 + {
481     + if (operating_mode == field_mode)
482     + cut_fields (stream);
483     + else
484     + cut_bytes (stream);
485     + }
486     }
487 cvsdist 1.1
488 ovasik 1.41 /* Process file FILE to standard output.
489     @@ -757,6 +1075,8 @@ main (int argc, char **argv)
490     bool ok;
491     bool delim_specified = false;
492     char *spec_list_string IF_LINT(= NULL);
493     + char mbdelim[MB_LEN_MAX + 1];
494     + size_t delimlen = 0;
495 cvsdist 1.1
496 ovasik 1.41 initialize_main (&argc, &argv);
497     set_program_name (argv[0]);
498     @@ -779,7 +1099,6 @@ main (int argc, char **argv)
499     switch (optc)
500 ovasik 1.39 {
501 ovasik 1.41 case 'b':
502     - case 'c':
503     /* Build the byte list. */
504     if (operating_mode != undefined_mode)
505     FATAL_ERROR (_("only one type of list may be specified"));
506     @@ -787,6 +1106,14 @@ main (int argc, char **argv)
507     spec_list_string = optarg;
508     break;
509 cvsdist 1.1
510 ovasik 1.41 + case 'c':
511     + /* Build the character list. */
512     + if (operating_mode != undefined_mode)
513     + FATAL_ERROR (_("only one type of list may be specified"));
514     + operating_mode = character_mode;
515     + spec_list_string = optarg;
516     + break;
517     +
518     case 'f':
519     /* Build the field list. */
520     if (operating_mode != undefined_mode)
521     @@ -798,10 +1125,35 @@ main (int argc, char **argv)
522     case 'd':
523     /* New delimiter. */
524     /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
525     - if (optarg[0] != '\0' && optarg[1] != '\0')
526     - FATAL_ERROR (_("the delimiter must be a single character"));
527     - delim = optarg[0];
528     - delim_specified = true;
529     + {
530 ovasik 1.39 +#if HAVE_MBRTOWC
531 ovasik 1.41 + if(MB_CUR_MAX > 1)
532     + {
533     + mbstate_t state;
534     +
535     + memset (&state, '\0', sizeof(mbstate_t));
536     + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
537     +
538     + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
539     + ++force_singlebyte_mode;
540     + else
541     + {
542     + delimlen = (delimlen < 1) ? 1 : delimlen;
543     + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
544     + FATAL_ERROR (_("the delimiter must be a single character"));
545     + memcpy (mbdelim, optarg, delimlen);
546     + }
547     + }
548 ovasik 1.39 +
549 ovasik 1.41 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
550 ovasik 1.39 +#endif
551 ovasik 1.41 + {
552     + if (optarg[0] != '\0' && optarg[1] != '\0')
553     + FATAL_ERROR (_("the delimiter must be a single character"));
554     + delim = (unsigned char) optarg[0];
555     + }
556     + delim_specified = true;
557     + }
558     break;
559    
560     case OUTPUT_DELIMITER_OPTION:
561     @@ -814,6 +1166,7 @@ main (int argc, char **argv)
562     break;
563    
564     case 'n':
565     + byte_mode_character_aware = 1;
566 ovasik 1.39 break;
567    
568 ovasik 1.41 case 's':
569     @@ -836,7 +1189,7 @@ main (int argc, char **argv)
570     if (operating_mode == undefined_mode)
571     FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
572    
573     - if (delim != '\0' && operating_mode != field_mode)
574     + if (delim_specified && operating_mode != field_mode)
575     FATAL_ERROR (_("an input delimiter may be specified only\
576     when operating on fields"));
577 ovasik 1.25
578 ovasik 1.41 @@ -863,15 +1216,34 @@ main (int argc, char **argv)
579 ovasik 1.25 }
580    
581 ovasik 1.41 if (!delim_specified)
582     - delim = '\t';
583     + {
584     + delim = '\t';
585     +#ifdef HAVE_MBRTOWC
586     + wcdelim = L'\t';
587     + mbdelim[0] = '\t';
588     + mbdelim[1] = '\0';
589     + delimlen = 1;
590     +#endif
591     + }
592 ovasik 1.25
593 ovasik 1.41 if (output_delimiter_string == NULL)
594 ovasik 1.25 {
595 ovasik 1.41 - static char dummy[2];
596     - dummy[0] = delim;
597     - dummy[1] = '\0';
598     - output_delimiter_string = dummy;
599     - output_delimiter_length = 1;
600 ovasik 1.25 +#ifdef HAVE_MBRTOWC
601 ovasik 1.41 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
602     + {
603     + output_delimiter_string = xstrdup(mbdelim);
604     + output_delimiter_length = delimlen;
605     + }
606 ovasik 1.25 +
607 ovasik 1.41 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
608     +#endif
609     + {
610     + static char dummy[2];
611     + dummy[0] = delim;
612     + dummy[1] = '\0';
613     + output_delimiter_string = dummy;
614     + output_delimiter_length = 1;
615     + }
616     }
617    
618     if (optind == argc)
619 ovasik 1.42 diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
620     --- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
621     +++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
622     @@ -37,11 +37,28 @@
623     #include <stdio.h>
624     #include <getopt.h>
625     #include <sys/types.h>
626 ovasik 1.25 +
627 ovasik 1.42 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
628     +#if HAVE_WCHAR_H
629     +# include <wchar.h>
630     +#endif
631 ovasik 1.25 +
632 ovasik 1.42 #include "system.h"
633     #include "error.h"
634     #include "quote.h"
635     #include "xstrndup.h"
636    
637     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
638     + installation; work around this configuration error. */
639     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
640     +# define MB_LEN_MAX 16
641     +#endif
642 ovasik 1.25 +
643 ovasik 1.42 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
644     +#if HAVE_MBRTOWC && defined mbstate_t
645     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
646     +#endif
647 ovasik 1.25 +
648 ovasik 1.42 /* The official name of this program (e.g., no `g' prefix). */
649     #define PROGRAM_NAME "expand"
650    
651     @@ -357,6 +374,142 @@ expand (void)
652     }
653     }
654    
655     +#if HAVE_MBRTOWC
656     +static void
657     +expand_multibyte (void)
658     +{
659     + FILE *fp; /* Input strem. */
660     + mbstate_t i_state; /* Current shift state of the input stream. */
661     + mbstate_t i_state_bak; /* Back up the I_STATE. */
662     + mbstate_t o_state; /* Current shift state of the output stream. */
663     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
664     + char *bufpos; /* Next read position of BUF. */
665     + size_t buflen = 0; /* The length of the byte sequence in buf. */
666     + wchar_t wc; /* A gotten wide character. */
667     + size_t mblength; /* The byte size of a multibyte character
668     + which shows as same character as WC. */
669     + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
670     + int column = 0; /* Column on screen of the next char. */
671     + int next_tab_column; /* Column the next tab stop is on. */
672     + int convert = 1; /* If nonzero, perform translations. */
673 ovasik 1.25 +
674 ovasik 1.42 + fp = next_file ((FILE *) NULL);
675     + if (fp == NULL)
676     + return;
677 ovasik 1.25 +
678 ovasik 1.42 + memset (&o_state, '\0', sizeof(mbstate_t));
679     + memset (&i_state, '\0', sizeof(mbstate_t));
680 ovasik 1.25 +
681 ovasik 1.42 + for (;;)
682     + {
683     + /* Refill the buffer BUF. */
684     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
685     + {
686     + memmove (buf, bufpos, buflen);
687     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
688     + bufpos = buf;
689     + }
690 ovasik 1.25 +
691 ovasik 1.42 + /* No character is left in BUF. */
692     + if (buflen < 1)
693     + {
694     + fp = next_file (fp);
695 ovasik 1.25 +
696 ovasik 1.42 + if (fp == NULL)
697     + break; /* No more files. */
698     + else
699     + {
700     + memset (&i_state, '\0', sizeof(mbstate_t));
701     + continue;
702     + }
703     + }
704 ovasik 1.25 +
705 ovasik 1.42 + /* Get a wide character. */
706     + i_state_bak = i_state;
707     + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
708 twaugh 1.20 +
709 ovasik 1.42 + switch (mblength)
710     + {
711     + case (size_t)-1: /* illegal byte sequence. */
712     + case (size_t)-2:
713     + mblength = 1;
714     + i_state = i_state_bak;
715     + if (convert)
716     + {
717     + ++column;
718     + if (convert_entire_line == 0)
719     + convert = 0;
720     + }
721     + putchar (*bufpos);
722     + break;
723 twaugh 1.20 +
724 ovasik 1.42 + case 0: /* null. */
725     + mblength = 1;
726     + if (convert && convert_entire_line == 0)
727     + convert = 0;
728     + putchar ('\0');
729     + break;
730 twaugh 1.20 +
731 ovasik 1.42 + default:
732     + if (wc == L'\n') /* LF. */
733     + {
734     + tab_index = 0;
735     + column = 0;
736     + convert = 1;
737     + putchar ('\n');
738     + }
739     + else if (wc == L'\t' && convert) /* Tab. */
740     + {
741     + if (tab_size == 0)
742     + {
743     + /* Do not let tab_index == first_free_tab;
744     + stop when it is 1 less. */
745     + while (tab_index < first_free_tab - 1
746     + && column >= tab_list[tab_index])
747     + tab_index++;
748     + next_tab_column = tab_list[tab_index];
749     + if (tab_index < first_free_tab - 1)
750     + tab_index++;
751     + if (column >= next_tab_column)
752     + next_tab_column = column + 1;
753     + }
754     + else
755     + next_tab_column = column + tab_size - column % tab_size;
756 twaugh 1.20 +
757 ovasik 1.42 + while (column < next_tab_column)
758     + {
759     + putchar (' ');
760     + ++column;
761     + }
762     + }
763     + else /* Others. */
764     + {
765     + if (convert)
766     + {
767     + if (wc == L'\b')
768     + {
769     + if (column > 0)
770     + --column;
771     + }
772     + else
773     + {
774     + int width; /* The width of WC. */
775 twaugh 1.20 +
776 ovasik 1.42 + width = wcwidth (wc);
777     + column += (width > 0) ? width : 0;
778     + if (convert_entire_line == 0)
779     + convert = 0;
780     + }
781     + }
782     + fwrite (bufpos, sizeof(char), mblength, stdout);
783     + }
784     + }
785     + buflen -= mblength;
786     + bufpos += mblength;
787 ovasik 1.41 + }
788     +}
789 ovasik 1.42 +#endif
790 cvsdist 1.1 +
791 ovasik 1.42 int
792     main (int argc, char **argv)
793     {
794     @@ -421,7 +574,12 @@ main (int argc, char **argv)
795    
796     file_list = (optind < argc ? &argv[optind] : stdin_argv);
797    
798     - expand ();
799     +#if HAVE_MBRTOWC
800     + if (MB_CUR_MAX > 1)
801     + expand_multibyte ();
802     + else
803     +#endif
804     + expand ();
805    
806     if (have_read_stdin && fclose (stdin) != 0)
807     error (EXIT_FAILURE, errno, "-");
808     diff -urNp coreutils-8.0-orig/src/fold.c coreutils-8.0/src/fold.c
809     --- coreutils-8.0-orig/src/fold.c 2009-09-23 10:25:44.000000000 +0200
810     +++ coreutils-8.0/src/fold.c 2009-10-07 10:07:16.000000000 +0200
811     @@ -22,11 +22,33 @@
812     #include <getopt.h>
813     #include <sys/types.h>
814    
815     +/* Get mbstate_t, mbrtowc(), wcwidth(). */
816     +#if HAVE_WCHAR_H
817     +# include <wchar.h>
818     +#endif
819 cvsdist 1.1 +
820 ovasik 1.42 +/* Get iswprint(), iswblank(), wcwidth(). */
821     +#if HAVE_WCTYPE_H
822     +# include <wctype.h>
823     +#endif
824 cvsdist 1.1 +
825 ovasik 1.42 #include "system.h"
826     #include "error.h"
827     #include "quote.h"
828     #include "xstrtol.h"
829    
830     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
831     + installation; work around this configuration error. */
832     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
833     +# undef MB_LEN_MAX
834     +# define MB_LEN_MAX 16
835     +#endif
836 cvsdist 1.1 +
837 ovasik 1.42 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
838     +#if HAVE_MBRTOWC && defined mbstate_t
839     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
840 ovasik 1.41 +#endif
841 cvsdist 1.1 +
842 ovasik 1.42 #define TAB_WIDTH 8
843    
844     /* The official name of this program (e.g., no `g' prefix). */
845     @@ -34,20 +56,41 @@
846    
847     #define AUTHORS proper_name ("David MacKenzie")
848    
849     +#define FATAL_ERROR(Message) \
850     + do \
851     + { \
852     + error (0, 0, (Message)); \
853     + usage (2); \
854     + } \
855     + while (0)
856     +
857     +enum operating_mode
858 ovasik 1.41 +{
859 ovasik 1.42 + /* Fold texts by columns that are at the given positions. */
860     + column_mode,
861     +
862     + /* Fold texts by bytes that are at the given positions. */
863     + byte_mode,
864 cvsdist 1.1 +
865 ovasik 1.42 + /* Fold texts by characters that are at the given positions. */
866     + character_mode,
867     +};
868 cvsdist 1.1 +
869 ovasik 1.42 +/* The argument shows current mode. (Default: column_mode) */
870     +static enum operating_mode operating_mode;
871 twaugh 1.20 +
872 ovasik 1.42 /* If nonzero, try to break on whitespace. */
873     static bool break_spaces;
874    
875     -/* If nonzero, count bytes, not column positions. */
876     -static bool count_bytes;
877     -
878     /* If nonzero, at least one of the files we read was standard input. */
879     static bool have_read_stdin;
880    
881     -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
882     +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
883    
884     static struct option const longopts[] =
885     {
886     {"bytes", no_argument, NULL, 'b'},
887     + {"characters", no_argument, NULL, 'c'},
888     {"spaces", no_argument, NULL, 's'},
889     {"width", required_argument, NULL, 'w'},
890     {GETOPT_HELP_OPTION_DECL},
891     @@ -77,6 +120,7 @@ Mandatory arguments to long options are
892     "), stdout);
893     fputs (_("\
894     -b, --bytes count bytes rather than columns\n\
895     + -c, --characters count characters rather than columns\n\
896     -s, --spaces break at spaces\n\
897     -w, --width=WIDTH use WIDTH columns instead of 80\n\
898     "), stdout);
899     @@ -94,7 +138,7 @@ Mandatory arguments to long options are
900     static size_t
901     adjust_column (size_t column, char c)
902     {
903     - if (!count_bytes)
904     + if (operating_mode != byte_mode)
905     {
906     if (c == '\b')
907     {
908     @@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
909     to stdout, with maximum line length WIDTH.
910     Return true if successful. */
911    
912     -static bool
913     -fold_file (char const *filename, size_t width)
914     +static void
915     +fold_text (FILE *istream, size_t width, int *saved_errno)
916     {
917     - FILE *istream;
918     int c;
919     size_t column = 0; /* Screen column where next char will go. */
920     size_t offset_out = 0; /* Index in `line_out' for next char. */
921     static char *line_out = NULL;
922     static size_t allocated_out = 0;
923     - int saved_errno;
924     -
925     - if (STREQ (filename, "-"))
926     - {
927     - istream = stdin;
928     - have_read_stdin = true;
929     - }
930     - else
931     - istream = fopen (filename, "r");
932     -
933     - if (istream == NULL)
934     - {
935     - error (0, errno, "%s", filename);
936     - return false;
937     - }
938    
939     while ((c = getc (istream)) != EOF)
940     {
941     @@ -168,6 +196,15 @@ fold_file (char const *filename, size_t
942     bool found_blank = false;
943     size_t logical_end = offset_out;
944    
945     + /* If LINE_OUT has no wide character,
946     + put a new wide character in LINE_OUT
947     + if column is bigger than width. */
948     + if (offset_out == 0)
949     + {
950     + line_out[offset_out++] = c;
951     + continue;
952     + }
953 twaugh 1.20 +
954 ovasik 1.42 /* Look for the last blank. */
955     while (logical_end)
956     {
957     @@ -214,11 +251,222 @@ fold_file (char const *filename, size_t
958     line_out[offset_out++] = c;
959     }
960    
961     - saved_errno = errno;
962     + *saved_errno = errno;
963    
964     if (offset_out)
965     fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
966    
967 ovasik 1.41 +}
968 twaugh 1.20 +
969 ovasik 1.42 +#if HAVE_MBRTOWC
970     +static void
971     +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
972 ovasik 1.41 +{
973 ovasik 1.42 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
974     + size_t buflen = 0; /* The length of the byte sequence in buf. */
975     + char *bufpos = NULL; /* Next read position of BUF. */
976     + wint_t wc; /* A gotten wide character. */
977     + size_t mblength; /* The byte size of a multibyte character which shows
978     + as same character as WC. */
979     + mbstate_t state, state_bak; /* State of the stream. */
980     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
981     +
982     + static char *line_out = NULL;
983     + size_t offset_out = 0; /* Index in `line_out' for next char. */
984     + static size_t allocated_out = 0;
985     +
986     + int increment;
987     + size_t column = 0;
988     +
989     + size_t last_blank_pos;
990     + size_t last_blank_column;
991     + int is_blank_seen;
992     + int last_blank_increment = 0;
993     + int is_bs_following_last_blank;
994     + size_t bs_following_last_blank_num;
995     + int is_cr_after_last_blank;
996 twaugh 1.20 +
997 ovasik 1.42 +#define CLEAR_FLAGS \
998     + do \
999     + { \
1000     + last_blank_pos = 0; \
1001     + last_blank_column = 0; \
1002     + is_blank_seen = 0; \
1003     + is_bs_following_last_blank = 0; \
1004     + bs_following_last_blank_num = 0; \
1005     + is_cr_after_last_blank = 0; \
1006     + } \
1007     + while (0)
1008 twaugh 1.20 +
1009 ovasik 1.42 +#define START_NEW_LINE \
1010     + do \
1011     + { \
1012     + putchar ('\n'); \
1013     + column = 0; \
1014     + offset_out = 0; \
1015     + CLEAR_FLAGS; \
1016     + } \
1017     + while (0)
1018 twaugh 1.20 +
1019 ovasik 1.42 + CLEAR_FLAGS;
1020     + memset (&state, '\0', sizeof(mbstate_t));
1021 twaugh 1.20 +
1022 ovasik 1.42 + for (;; bufpos += mblength, buflen -= mblength)
1023 twaugh 1.20 + {
1024 ovasik 1.42 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1025 ovasik 1.41 + {
1026 ovasik 1.42 + memmove (buf, bufpos, buflen);
1027     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1028     + bufpos = buf;
1029     + }
1030 twaugh 1.20 +
1031 ovasik 1.42 + if (buflen < 1)
1032     + break;
1033 twaugh 1.20 +
1034 ovasik 1.42 + /* Get a wide character. */
1035     + convfail = 0;
1036     + state_bak = state;
1037     + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1038 ovasik 1.39 +
1039 ovasik 1.42 + switch (mblength)
1040     + {
1041     + case (size_t)-1:
1042     + case (size_t)-2:
1043     + convfail++;
1044     + state = state_bak;
1045     + /* Fall through. */
1046 ovasik 1.39 +
1047 ovasik 1.42 + case 0:
1048     + mblength = 1;
1049     + break;
1050     + }
1051 ovasik 1.39 +
1052 ovasik 1.42 +rescan:
1053     + if (operating_mode == byte_mode) /* byte mode */
1054     + increment = mblength;
1055     + else if (operating_mode == character_mode) /* character mode */
1056     + increment = 1;
1057     + else /* column mode */
1058     + {
1059     + if (convfail)
1060     + increment = 1;
1061 ovasik 1.41 + else
1062     + {
1063 ovasik 1.42 + switch (wc)
1064     + {
1065     + case L'\n':
1066     + fwrite (line_out, sizeof(char), offset_out, stdout);
1067     + START_NEW_LINE;
1068     + continue;
1069     +
1070     + case L'\b':
1071     + increment = (column > 0) ? -1 : 0;
1072     + break;
1073 ovasik 1.41 +
1074 ovasik 1.42 + case L'\r':
1075     + increment = -1 * column;
1076     + break;
1077 ovasik 1.41 +
1078 ovasik 1.42 + case L'\t':
1079     + increment = 8 - column % 8;
1080     + break;
1081 ovasik 1.41 +
1082 ovasik 1.42 + default:
1083     + increment = wcwidth (wc);
1084     + increment = (increment < 0) ? 0 : increment;
1085     + }
1086 ovasik 1.39 + }
1087     + }
1088 twaugh 1.20 +
1089 ovasik 1.42 + if (column + increment > width && break_spaces && last_blank_pos)
1090 ovasik 1.41 + {
1091 ovasik 1.42 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1092 ovasik 1.41 + putchar ('\n');
1093 ovasik 1.42 +
1094     + offset_out = offset_out - last_blank_pos;
1095     + column = column - last_blank_column + ((is_cr_after_last_blank)
1096     + ? last_blank_increment : bs_following_last_blank_num);
1097     + memmove (line_out, line_out + last_blank_pos, offset_out);
1098     + CLEAR_FLAGS;
1099     + goto rescan;
1100 ovasik 1.41 + }
1101 ovasik 1.42 +
1102     + if (column + increment > width && column != 0)
1103 ovasik 1.41 + {
1104 ovasik 1.42 + fwrite (line_out, sizeof(char), offset_out, stdout);
1105     + START_NEW_LINE;
1106     + goto rescan;
1107 ovasik 1.41 + }
1108 ovasik 1.42 +
1109     + if (allocated_out < offset_out + mblength)
1110 ovasik 1.41 + {
1111 ovasik 1.42 + line_out = X2REALLOC (line_out, &allocated_out);
1112 ovasik 1.41 + }
1113     +
1114 ovasik 1.42 + memcpy (line_out + offset_out, bufpos, mblength);
1115     + offset_out += mblength;
1116     + column += increment;
1117 ovasik 1.41 +
1118 ovasik 1.42 + if (is_blank_seen && !convfail && wc == L'\r')
1119     + is_cr_after_last_blank = 1;
1120 ovasik 1.41 +
1121 ovasik 1.42 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1122     + ++bs_following_last_blank_num;
1123     + else
1124     + is_bs_following_last_blank = 0;
1125 ovasik 1.41 +
1126 ovasik 1.42 + if (break_spaces && !convfail && iswblank (wc))
1127 ovasik 1.41 + {
1128 ovasik 1.42 + last_blank_pos = offset_out;
1129     + last_blank_column = column;
1130     + is_blank_seen = 1;
1131     + last_blank_increment = increment;
1132     + is_bs_following_last_blank = 1;
1133     + bs_following_last_blank_num = 0;
1134     + is_cr_after_last_blank = 0;
1135 ovasik 1.41 + }
1136 ovasik 1.42 + }
1137 ovasik 1.41 +
1138 ovasik 1.42 + *saved_errno = errno;
1139 ovasik 1.41 +
1140 ovasik 1.42 + if (offset_out)
1141     + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1142 ovasik 1.41 +
1143     +}
1144 ovasik 1.42 +#endif
1145 ovasik 1.41 +
1146 ovasik 1.42 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1147     + to stdout, with maximum line length WIDTH.
1148     + Return 0 if successful, 1 if an error occurs. */
1149 ovasik 1.41 +
1150     +static bool
1151 ovasik 1.42 +fold_file (char *filename, size_t width)
1152 ovasik 1.41 +{
1153 ovasik 1.42 + FILE *istream;
1154     + int saved_errno;
1155 ovasik 1.41 +
1156 ovasik 1.42 + if (STREQ (filename, "-"))
1157 ovasik 1.41 + {
1158 ovasik 1.42 + istream = stdin;
1159     + have_read_stdin = 1;
1160 ovasik 1.41 + }
1161     + else
1162 ovasik 1.42 + istream = fopen (filename, "r");
1163 ovasik 1.41 +
1164 ovasik 1.42 + if (istream == NULL)
1165 ovasik 1.41 + {
1166 ovasik 1.42 + error (0, errno, "%s", filename);
1167     + return 1;
1168 ovasik 1.41 + }
1169     +
1170 ovasik 1.42 + /* Define how ISTREAM is being folded. */
1171     +#if HAVE_MBRTOWC
1172     + if (MB_CUR_MAX > 1)
1173     + fold_multibyte_text (istream, width, &saved_errno);
1174     + else
1175     +#endif
1176     + fold_text (istream, width, &saved_errno);
1177 ovasik 1.41 +
1178 ovasik 1.42 if (ferror (istream))
1179     {
1180     error (0, saved_errno, "%s", filename);
1181     @@ -251,7 +499,8 @@ main (int argc, char **argv)
1182    
1183     atexit (close_stdout);
1184    
1185     - break_spaces = count_bytes = have_read_stdin = false;
1186     + operating_mode = column_mode;
1187     + break_spaces = have_read_stdin = false;
1188    
1189     while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1190     {
1191     @@ -260,7 +509,15 @@ main (int argc, char **argv)
1192     switch (optc)
1193     {
1194     case 'b': /* Count bytes rather than columns. */
1195     - count_bytes = true;
1196     + if (operating_mode != column_mode)
1197     + FATAL_ERROR (_("only one way of folding may be specified"));
1198 ovasik 1.41 + operating_mode = byte_mode;
1199     + break;
1200     +
1201 ovasik 1.42 + case 'c':
1202     + if (operating_mode != column_mode)
1203     + FATAL_ERROR (_("only one way of folding may be specified"));
1204     + operating_mode = character_mode;
1205     break;
1206    
1207     case 's': /* Break at word boundaries. */
1208     diff -urNp coreutils-8.0-orig/src/join.c coreutils-8.0/src/join.c
1209     --- coreutils-8.0-orig/src/join.c 2009-09-23 10:25:44.000000000 +0200
1210     +++ coreutils-8.0/src/join.c 2009-10-07 10:07:16.000000000 +0200
1211     @@ -22,17 +22,31 @@
1212     #include <sys/types.h>
1213     #include <getopt.h>
1214    
1215     +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1216     +#if HAVE_WCHAR_H
1217     +# include <wchar.h>
1218     +#endif
1219 ovasik 1.41 +
1220 ovasik 1.42 +/* Get iswblank(), towupper. */
1221     +#if HAVE_WCTYPE_H
1222     +# include <wctype.h>
1223     +#endif
1224 ovasik 1.41 +
1225 ovasik 1.42 #include "system.h"
1226     #include "error.h"
1227     #include "hard-locale.h"
1228     #include "linebuffer.h"
1229     -#include "memcasecmp.h"
1230     #include "quote.h"
1231     #include "stdio--.h"
1232     #include "xmemcoll.h"
1233     #include "xstrtol.h"
1234     #include "argmatch.h"
1235    
1236     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1237     +#if HAVE_MBRTOWC && defined mbstate_t
1238     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1239     +#endif
1240 ovasik 1.41 +
1241 ovasik 1.42 /* The official name of this program (e.g., no `g' prefix). */
1242     #define PROGRAM_NAME "join"
1243    
1244     @@ -121,10 +135,12 @@ static struct outlist outlist_head;
1245     /* Last element in `outlist', where a new element can be added. */
1246     static struct outlist *outlist_end = &outlist_head;
1247    
1248     -/* Tab character separating fields. If negative, fields are separated
1249     - by any nonempty string of blanks, otherwise by exactly one
1250     - tab character whose value (when cast to unsigned char) equals TAB. */
1251     -static int tab = -1;
1252     +/* Tab character separating fields. If NULL, fields are separated
1253     + by any nonempty string of blanks. */
1254     +static char *tab = NULL;
1255 ovasik 1.41 +
1256 ovasik 1.42 +/* The number of bytes used for tab. */
1257     +static size_t tablen = 0;
1258 twaugh 1.16
1259 ovasik 1.42 /* If nonzero, check that the input is correctly ordered. */
1260     static enum
1261     @@ -239,10 +255,11 @@ xfields (struct line *line)
1262     if (ptr == lim)
1263     return;
1264 cvsdist 1.1
1265 ovasik 1.42 - if (0 <= tab)
1266     + if (tab != NULL)
1267     {
1268     + unsigned char t = tab[0];
1269     char *sep;
1270     - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1271     + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1272     extract_field (line, ptr, sep - ptr);
1273 ovasik 1.41 }
1274 ovasik 1.42 else
1275     @@ -269,6 +286,148 @@ xfields (struct line *line)
1276     extract_field (line, ptr, lim - ptr);
1277 ovasik 1.41 }
1278 cvsdist 1.1
1279 ovasik 1.41 +#if HAVE_MBRTOWC
1280     +static void
1281 ovasik 1.42 +xfields_multibyte (struct line *line)
1282 twaugh 1.20 +{
1283 ovasik 1.42 + char *ptr = line->buf.buffer;
1284     + char const *lim = ptr + line->buf.length - 1;
1285     + wchar_t wc = 0;
1286     + size_t mblength = 1;
1287     + mbstate_t state, state_bak;
1288     +
1289     + memset (&state, 0, sizeof (mbstate_t));
1290 twaugh 1.16 +
1291 ovasik 1.42 + if (ptr >= lim)
1292 ovasik 1.41 + return;
1293 twaugh 1.20 +
1294 ovasik 1.42 + if (tab != NULL)
1295 ovasik 1.41 + {
1296 ovasik 1.42 + unsigned char t = tab[0];
1297     + char *sep = ptr;
1298     + for (; ptr < lim; ptr = sep + mblength)
1299 ovasik 1.41 + {
1300 ovasik 1.42 + sep = ptr;
1301     + while (sep < lim)
1302     + {
1303     + state_bak = state;
1304     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1305 ovasik 1.41 +
1306 ovasik 1.42 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1307     + {
1308     + mblength = 1;
1309     + state = state_bak;
1310     + }