/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.40 - (hide annotations) (download) (as text)
Tue Oct 6 13:43:24 2009 UTC (6 weeks, 5 days ago) by ovasik
Branch: MAIN
Changes since 1.39: +2 -1 lines
File MIME type: text/x-patch
New upstream release 8.0 (beta), defuzz patches, remove applied patches
1 ovasik 1.31 diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut
2     --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200
3     +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4     @@ -26,7 +26,7 @@
5     my $prog = 'cut';
6     my $try = "Try \`$prog --help' for more information.\n";
7     my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8     -my $inval = "$prog: invalid byte or field list\n$try";
9     +my $inval = "$prog: invalid byte, character or field list\n$try";
10     my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11    
12     my @Tests =
13     @@ -140,8 +140,8 @@
14     ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15    
16     # None of the following invalid ranges provoked an error up to coreutils-6.9.
17     - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18     - {ERR=>"$prog: invalid decreasing range\n$try"}],
19     + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20     + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21     ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22     ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23     ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
25 ovasik 1.26 +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26 twaugh 1.20 @@ -0,0 +1,58 @@
27     +#! /bin/sh
28     +case $# in
29 ovasik 1.26 + 0) xx='../src/sort';;
30 twaugh 1.20 + *) xx="$1";;
31     +esac
32     +test "$VERBOSE" && echo=echo || echo=:
33     +$echo testing program: $xx
34     +errors=0
35     +test "$srcdir" || srcdir=.
36     +test "$VERBOSE" && $xx --version 2> /dev/null
37     +
38     +export LC_ALL=en_US.UTF-8
39     +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40     +errors=0
41     +
42 ovasik 1.26 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43 twaugh 1.20 +code=$?
44     +if test $code != 0; then
45     + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46     + errors=`expr $errors + 1`
47     +else
48 ovasik 1.26 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49 twaugh 1.20 + case $? in
50     + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51 ovasik 1.26 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52     + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53 twaugh 1.20 + errors=`expr $errors + 1`;;
54     + 2) $echo "Test mb1 may have failed." 1>&2
55 ovasik 1.26 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56 twaugh 1.20 + errors=`expr $errors + 1`;;
57     + esac
58     +fi
59     +
60 ovasik 1.26 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61 twaugh 1.20 +code=$?
62     +if test $code != 0; then
63     + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64     + errors=`expr $errors + 1`
65     +else
66 ovasik 1.26 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67 twaugh 1.20 + case $? in
68     + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69 ovasik 1.26 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70     + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71 twaugh 1.20 + errors=`expr $errors + 1`;;
72     + 2) $echo "Test mb2 may have failed." 1>&2
73 ovasik 1.26 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74 twaugh 1.20 + errors=`expr $errors + 1`;;
75     + esac
76     +fi
77     +
78     +if test $errors = 0; then
79     + $echo Passed all 113 tests. 1>&2
80     +else
81     + $echo Failed $errors tests. 1>&2
82     +fi
83     +test $errors = 0 || errors=1
84     +exit $errors
85 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
86 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87 twaugh 1.20 @@ -0,0 +1,4 @@
88     +Apple@AA10@@20
89     +Banana@AA5@@30
90     +Citrus@AA20@@5
91     +Cherry@AA30@@10
92 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
93 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94 twaugh 1.20 @@ -0,0 +1,4 @@
95     +Citrus@AA20@@5
96     +Cherry@AA30@@10
97     +Apple@AA10@@20
98     +Banana@AA5@@30
99 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
100 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101 twaugh 1.20 @@ -0,0 +1,4 @@
102     +Apple@10
103     +Banana@5
104     +Citrus@20
105     +Cherry@30
106 twaugh 1.21 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
107 ovasik 1.26 +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108 twaugh 1.20 @@ -0,0 +1,4 @@
109     +Banana@5
110     +Apple@10
111     +Citrus@20
112     +Cherry@30
113 ovasik 1.26 diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114     --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115     +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116 ovasik 1.28 @@ -192,6 +192,7 @@
117 ovasik 1.26 misc/sort-compress \
118 ovasik 1.33 misc/sort-continue \
119 ovasik 1.28 misc/sort-files0-from \
120 ovasik 1.26 + misc/sort-mb-tests \
121     misc/sort-merge \
122 ovasik 1.33 misc/sort-merge-fdlimit \
123 ovasik 1.26 misc/sort-rand \
124     @@ -391,6 +392,10 @@
125     $(root_tests)
126    
127     pr_data = \
128     + misc/mb1.X \
129     + misc/mb1.I \
130     + misc/mb2.X \
131     + misc/mb2.I \
132     pr/0F \
133     pr/0FF \
134     pr/0FFnt \
135 twaugh 1.21 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136     +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
137 twaugh 1.20 @@ -22,6 +22,11 @@
138    
139     # include <stdio.h>
140    
141     +/* Get mbstate_t. */
142     +# if HAVE_WCHAR_H
143     +# include <wchar.h>
144     +# endif
145     +
146     /* A `struct linebuffer' holds a line of text. */
147    
148     struct linebuffer
149     @@ -29,6 +34,9 @@
150     size_t size; /* Allocated. */
151     size_t length; /* Used. */
152     char *buffer;
153     +# if HAVE_WCHAR_H
154     + mbstate_t state;
155     +# endif
156     };
157    
158     /* Initialize linebuffer LINEBUFFER for use. */
159 twaugh 1.21 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
160     +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
161 twaugh 1.16 @@ -38,11 +38,28 @@
162     #include <stdio.h>
163 cvsdist 1.1 #include <getopt.h>
164     #include <sys/types.h>
165     +
166 twaugh 1.16 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
167 cvsdist 1.1 +#if HAVE_WCHAR_H
168     +# include <wchar.h>
169     +#endif
170 twaugh 1.16 +
171 cvsdist 1.1 #include "system.h"
172     #include "error.h"
173     #include "quote.h"
174     #include "xstrndup.h"
175    
176     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
177 twaugh 1.16 + installation; work around this configuration error. */
178 cvsdist 1.1 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
179     +# define MB_LEN_MAX 16
180     +#endif
181     +
182     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
183     +#if HAVE_MBRTOWC && defined mbstate_t
184     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
185     +#endif
186     +
187     /* The official name of this program (e.g., no `g' prefix). */
188 twaugh 1.16 #define PROGRAM_NAME "expand"
189 cvsdist 1.1
190 twaugh 1.20 @@ -365,6 +383,142 @@
191 cvsdist 1.1 }
192     }
193    
194     +#if HAVE_MBRTOWC
195     +static void
196 twaugh 1.16 +expand_multibyte (void)
197 cvsdist 1.1 +{
198 twaugh 1.16 + FILE *fp; /* Input strem. */
199     + mbstate_t i_state; /* Current shift state of the input stream. */
200     + mbstate_t i_state_bak; /* Back up the I_STATE. */
201     + mbstate_t o_state; /* Current shift state of the output stream. */
202 cvsdist 1.1 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
203 twaugh 1.16 + char *bufpos; /* Next read position of BUF. */
204     + size_t buflen = 0; /* The length of the byte sequence in buf. */
205     + wchar_t wc; /* A gotten wide character. */
206     + size_t mblength; /* The byte size of a multibyte character
207     + which shows as same character as WC. */
208     + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
209     + int column = 0; /* Column on screen of the next char. */
210     + int next_tab_column; /* Column the next tab stop is on. */
211     + int convert = 1; /* If nonzero, perform translations. */
212     +
213     + fp = next_file ((FILE *) NULL);
214     + if (fp == NULL)
215     + return;
216 cvsdist 1.1 +
217 twaugh 1.16 + memset (&o_state, '\0', sizeof(mbstate_t));
218     + memset (&i_state, '\0', sizeof(mbstate_t));
219 cvsdist 1.1 +
220 twaugh 1.16 + for (;;)
221 cvsdist 1.1 + {
222 twaugh 1.16 + /* Refill the buffer BUF. */
223     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
224 cvsdist 1.1 + {
225 twaugh 1.16 + memmove (buf, bufpos, buflen);
226     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
227     + bufpos = buf;
228 cvsdist 1.1 + }
229 twaugh 1.16 +
230     + /* No character is left in BUF. */
231     + if (buflen < 1)
232 cvsdist 1.1 + {
233 twaugh 1.16 + fp = next_file (fp);
234     +
235     + if (fp == NULL)
236     + break; /* No more files. */
237     + else
238     + {
239     + memset (&i_state, '\0', sizeof(mbstate_t));
240     + continue;
241     + }
242 cvsdist 1.1 + }
243     +
244 twaugh 1.16 + /* Get a wide character. */
245     + i_state_bak = i_state;
246     + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
247 cvsdist 1.1 +
248 twaugh 1.16 + switch (mblength)
249 cvsdist 1.1 + {
250 twaugh 1.16 + case (size_t)-1: /* illegal byte sequence. */
251     + case (size_t)-2:
252     + mblength = 1;
253     + i_state = i_state_bak;
254     + if (convert)
255 cvsdist 1.1 + {
256 twaugh 1.16 + ++column;
257     + if (convert_entire_line == 0)
258     + convert = 0;
259     + }
260     + putchar (*bufpos);
261     + break;
262 cvsdist 1.1 +
263 twaugh 1.16 + case 0: /* null. */
264     + mblength = 1;
265     + if (convert && convert_entire_line == 0)
266     + convert = 0;
267     + putchar ('\0');
268     + break;
269 cvsdist 1.1 +
270 twaugh 1.16 + default:
271     + if (wc == L'\n') /* LF. */
272     + {
273     + tab_index = 0;
274     + column = 0;
275     + convert = 1;
276     + putchar ('\n');
277 cvsdist 1.1 + }
278 twaugh 1.16 + else if (wc == L'\t' && convert) /* Tab. */
279 cvsdist 1.1 + {
280 twaugh 1.16 + if (tab_size == 0)
281 cvsdist 1.1 + {
282 twaugh 1.16 + /* Do not let tab_index == first_free_tab;
283     + stop when it is 1 less. */
284     + while (tab_index < first_free_tab - 1
285     + && column >= tab_list[tab_index])
286     + tab_index++;
287     + next_tab_column = tab_list[tab_index];
288     + if (tab_index < first_free_tab - 1)
289     + tab_index++;
290     + if (column >= next_tab_column)
291     + next_tab_column = column + 1;
292 cvsdist 1.1 + }
293     + else
294 twaugh 1.16 + next_tab_column = column + tab_size - column % tab_size;
295     +
296     + while (column < next_tab_column)
297 cvsdist 1.1 + {
298 twaugh 1.16 + putchar (' ');
299     + ++column;
300 cvsdist 1.1 + }
301     + }
302 twaugh 1.16 + else /* Others. */
303 cvsdist 1.1 + {
304 twaugh 1.16 + if (convert)
305 cvsdist 1.1 + {
306 twaugh 1.16 + if (wc == L'\b')
307     + {
308     + if (column > 0)
309     + --column;
310     + }
311     + else
312     + {
313     + int width; /* The width of WC. */
314 cvsdist 1.1 +
315 twaugh 1.16 + width = wcwidth (wc);
316     + column += (width > 0) ? width : 0;
317     + if (convert_entire_line == 0)
318     + convert = 0;
319     + }
320 cvsdist 1.1 + }
321 twaugh 1.16 + fwrite (bufpos, sizeof(char), mblength, stdout);
322 cvsdist 1.1 + }
323     + }
324 twaugh 1.16 + buflen -= mblength;
325     + bufpos += mblength;
326 cvsdist 1.1 + }
327     +}
328     +#endif
329     +
330 twaugh 1.16 int
331     main (int argc, char **argv)
332 cvsdist 1.1 {
333 twaugh 1.20 @@ -429,7 +583,12 @@
334 twaugh 1.16
335     file_list = (optind < argc ? &argv[optind] : stdin_argv);
336    
337     - expand ();
338 cvsdist 1.1 +#if HAVE_MBRTOWC
339 twaugh 1.16 + if (MB_CUR_MAX > 1)
340     + expand_multibyte ();
341     + else
342     +#endif
343     + expand ();
344    
345     if (have_read_stdin && fclose (stdin) != 0)
346     error (EXIT_FAILURE, errno, "-");
347 twaugh 1.21 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
348     +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
349 ovasik 1.40 @@ -23,17 +23,31 @@
350 twaugh 1.20 #include <sys/types.h>
351     #include <getopt.h>
352 twaugh 1.11
353 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
354 cvsdist 1.1 +#if HAVE_WCHAR_H
355     +# include <wchar.h>
356     +#endif
357     +
358 twaugh 1.20 +/* Get iswblank(), towupper. */
359 cvsdist 1.1 +#if HAVE_WCTYPE_H
360     +# include <wctype.h>
361     +#endif
362     +
363     #include "system.h"
364     #include "error.h"
365 ovasik 1.40 #include "hard-locale.h"
366 twaugh 1.20 #include "linebuffer.h"
367     -#include "memcasecmp.h"
368     #include "quote.h"
369     #include "stdio--.h"
370     #include "xmemcoll.h"
371 cvsdist 1.1 #include "xstrtol.h"
372 ovasik 1.25 #include "argmatch.h"
373 cvsdist 1.1
374     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
375     +#if HAVE_MBRTOWC && defined mbstate_t
376     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
377     +#endif
378     +
379     /* The official name of this program (e.g., no `g' prefix). */
380 twaugh 1.20 #define PROGRAM_NAME "join"
381 cvsdist 1.1
382 twaugh 1.20 @@ -104,10 +118,12 @@
383     /* Last element in `outlist', where a new element can be added. */
384     static struct outlist *outlist_end = &outlist_head;
385 cvsdist 1.1
386 twaugh 1.20 -/* Tab character separating fields. If negative, fields are separated
387     - by any nonempty string of blanks, otherwise by exactly one
388     - tab character whose value (when cast to unsigned char) equals TAB. */
389     -static int tab = -1;
390     +/* Tab character separating fields. If NULL, fields are separated
391     + by any nonempty string of blanks. */
392     +static char *tab = NULL;
393 cvsdist 1.1 +
394 twaugh 1.20 +/* The number of bytes used for tab. */
395     +static size_t tablen = 0;
396 cvsdist 1.1
397 ovasik 1.27 /* If nonzero, check that the input is correctly ordered. */
398     static enum
399 twaugh 1.20 @@ -199,10 +217,11 @@
400     if (ptr == lim)
401     return;
402 cvsdist 1.1
403 twaugh 1.20 - if (0 <= tab)
404     + if (tab != NULL)
405     {
406     + unsigned char t = tab[0];
407     char *sep;
408     - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
409     + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
410 ovasik 1.39 extract_field (line, ptr, sep - ptr);
411 twaugh 1.20 }
412     else
413     @@ -229,6 +248,148 @@
414     extract_field (line, ptr, lim - ptr);
415     }
416 cvsdist 1.1
417 twaugh 1.20 +#if HAVE_MBRTOWC
418     +static void
419     +xfields_multibyte (struct line *line)
420     +{
421     + char *ptr = line->buf.buffer;
422     + char const *lim = ptr + line->buf.length - 1;
423     + wchar_t wc = 0;
424     + size_t mblength = 1;
425     + mbstate_t state, state_bak;
426 cvsdist 1.1 +
427 twaugh 1.20 + memset (&state, 0, sizeof (mbstate_t));
428 cvsdist 1.1 +
429 ovasik 1.34 + if (ptr >= lim)
430 twaugh 1.20 + return;
431 cvsdist 1.1 +
432 twaugh 1.20 + if (tab != NULL)
433 cvsdist 1.1 + {
434 twaugh 1.20 + unsigned char t = tab[0];
435     + char *sep = ptr;
436     + for (; ptr < lim; ptr = sep + mblength)
437     + {
438     + sep = ptr;
439     + while (sep < lim)
440     + {
441     + state_bak = state;
442     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
443     +
444     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
445     + {
446     + mblength = 1;
447     + state = state_bak;
448     + }
449     + mblength = (mblength < 1) ? 1 : mblength;
450     +
451     + if (mblength == tablen && !memcmp (sep, tab, mblength))
452     + break;
453     + else
454     + {
455     + sep += mblength;
456     + continue;
457     + }
458     + }
459     +
460 ovasik 1.34 + if (sep >= lim)
461 twaugh 1.20 + break;
462     +
463     + extract_field (line, ptr, sep - ptr);
464     + }
465 cvsdist 1.1 + }
466     + else
467     + {
468 twaugh 1.20 + /* Skip leading blanks before the first field. */
469     + while(ptr < lim)
470     + {
471     + state_bak = state;
472     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
473     +
474     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
475     + {
476     + mblength = 1;
477     + state = state_bak;
478     + break;
479     + }
480     + mblength = (mblength < 1) ? 1 : mblength;
481     +
482     + if (!iswblank(wc))
483     + break;
484     + ptr += mblength;
485     + }
486 cvsdist 1.1 +
487 twaugh 1.20 + do
488     + {
489     + char *sep;
490     + state_bak = state;
491     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
492     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
493 cvsdist 1.1 + {
494 twaugh 1.20 + mblength = 1;
495     + state = state_bak;
496     + break;
497     + }
498     + mblength = (mblength < 1) ? 1 : mblength;
499 cvsdist 1.1 +
500 twaugh 1.20 + sep = ptr + mblength;
501 ovasik 1.34 + while (sep < lim)
502 cvsdist 1.1 + {
503 twaugh 1.20 + state_bak = state;
504     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
505     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
506     + {
507     + mblength = 1;
508     + state = state_bak;
509     + break;
510     + }
511     + mblength = (mblength < 1) ? 1 : mblength;
512     +
513     + if (iswblank (wc))
514     + break;
515 cvsdist 1.1 +
516 twaugh 1.20 + sep += mblength;
517 cvsdist 1.1 + }
518     +
519 twaugh 1.20 + extract_field (line, ptr, sep - ptr);
520 ovasik 1.34 + if (sep >= lim)
521 twaugh 1.20 + return;
522 cvsdist 1.1 +
523 twaugh 1.20 + state_bak = state;
524     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
525 cvsdist 1.1 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
526     + {
527 twaugh 1.20 + mblength = 1;
528     + state = state_bak;
529     + break;
530 cvsdist 1.1 + }
531 twaugh 1.20 + mblength = (mblength < 1) ? 1 : mblength;
532     +
533     + ptr = sep + mblength;
534 ovasik 1.34 + while (ptr < lim)
535 cvsdist 1.1 + {
536 twaugh 1.20 + state_bak = state;
537     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
538     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
539     + {
540     + mblength = 1;
541     + state = state_bak;
542     + break;
543     + }
544     + mblength = (mblength < 1) ? 1 : mblength;
545     +
546     + if (!iswblank (wc))
547     + break;
548     +
549     + ptr += mblength;
550 cvsdist 1.1 + }
551 twaugh 1.20 + }
552 ovasik 1.34 + while (ptr < lim);
553 twaugh 1.20 + }
554 cvsdist 1.1 +
555 twaugh 1.20 + extract_field (line, ptr, lim - ptr);
556     +}
557 cvsdist 1.1 +#endif
558 ovasik 1.27 +
559 ovasik 1.28 static void
560     freeline (struct line *line)
561 ovasik 1.27 {
562 twaugh 1.20 @@ -377,11 +601,18 @@
563 cvsdist 1.1
564 twaugh 1.20 /* Print the join of LINE1 and LINE2. */
565 cvsdist 1.1
566 twaugh 1.20 +#define PUT_TAB_CHAR \
567     + do \
568     + { \
569     + (tab != NULL) ? \
570     + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
571     + } \
572     + while (0)
573     +
574     static void
575     prjoin (struct line const *line1, struct line const *line2)
576 cvsdist 1.1 {
577 twaugh 1.20 const struct outlist *outlist;
578     - char output_separator = tab < 0 ? ' ' : tab;
579    
580     outlist = outlist_head.next;
581     if (outlist)
582     @@ -416,7 +647,7 @@
583 ovasik 1.39 o = o->next;
584     if (o == NULL)
585     break;
586     - putchar (output_separator);
587     + PUT_TAB_CHAR;
588     }
589 twaugh 1.20 putchar ('\n');
590     }
591     @@ -434,23 +665,23 @@
592     prfield (join_field_1, line1);
593     for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
594 ovasik 1.39 {
595     - putchar (output_separator);
596     + PUT_TAB_CHAR;
597     prfield (i, line1);
598     }
599 twaugh 1.20 for (i = join_field_1 + 1; i < line1->nfields; ++i)
600 ovasik 1.39 {
601     - putchar (output_separator);
602     + PUT_TAB_CHAR;
603     prfield (i, line1);
604     }
605 cvsdist 1.1
606 twaugh 1.20 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
607 ovasik 1.39 {
608     - putchar (output_separator);
609     + PUT_TAB_CHAR;
610     prfield (i, line2);
611     }
612 twaugh 1.20 for (i = join_field_2 + 1; i < line2->nfields; ++i)
613 ovasik 1.39 {
614     - putchar (output_separator);
615     + PUT_TAB_CHAR;
616     prfield (i, line2);
617     }
618 twaugh 1.20 putchar ('\n');
619 twaugh 1.21 @@ -859,20 +1090,41 @@
620 cvsdist 1.1
621 ovasik 1.39 case 't':
622     {
623     - unsigned char newtab = optarg[0];
624     - if (! newtab)
625     + char *newtab;
626     + size_t newtablen;
627     + if (! optarg[0])
628     error (EXIT_FAILURE, 0, _("empty tab"));
629     - if (optarg[1])
630     + newtab = xstrdup (optarg);
631     +#if HAVE_MBRTOWC
632     + if (MB_CUR_MAX > 1)
633     + {
634     + mbstate_t state;
635     +
636     + memset (&state, 0, sizeof (mbstate_t));
637     + newtablen = mbrtowc (NULL, newtab,
638     + strnlen (newtab, MB_LEN_MAX),
639     + &state);
640     + if (newtablen == (size_t) 0
641     + || newtablen == (size_t) -1
642     + || newtablen == (size_t) -2)
643     + newtablen = 1;
644     + }
645     + else
646     +#endif
647     + newtablen = 1;
648     +
649     + if (newtablen == 1 && newtab[1])
650     + {
651     + if (STREQ (newtab, "\\0"))
652     + newtab[0] = '\0';
653     + }
654     + if (tab != NULL && strcmp (tab, newtab))
655     {
656     - if (STREQ (optarg, "\\0"))
657     - newtab = '\0';
658     - else
659     - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
660     - quote (optarg));
661     + free (newtab);
662     + error (EXIT_FAILURE, 0, _("incompatible tabs"));
663     }
664     - if (0 <= tab && tab != newtab)
665     - error (EXIT_FAILURE, 0, _("incompatible tabs"));
666     tab = newtab;
667     + tablen = newtablen;
668     }
669     break;
670    
671 ovasik 1.25 diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
672     --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
673     +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
674     @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
675 ovasik 1.39 size_t jf_1, size_t jf_2)
676 ovasik 1.25 {
677     /* Start of field to compare in each file. */
678     - char *beg1;
679     - char *beg2;
680     -
681     - size_t len1;
682     - size_t len2; /* Length of fields to compare. */
683     + char *beg[2];
684     + char *copy[2];
685 ovasik 1.27 + size_t len[2]; /* Length of fields to compare. */
686 ovasik 1.25 int diff;
687     + int i, j;
688    
689     if (jf_1 < line1->nfields)
690     {
691     - beg1 = line1->fields[jf_1].beg;
692     - len1 = line1->fields[jf_1].len;
693     + beg[0] = line1->fields[jf_1].beg;
694     + len[0] = line1->fields[jf_1].len;
695     }
696     else
697     {
698     - beg1 = NULL;
699     - len1 = 0;
700     + beg[0] = NULL;
701     + len[0] = 0;
702     }
703    
704     if (jf_2 < line2->nfields)
705     {
706     - beg2 = line2->fields[jf_2].beg;
707     - len2 = line2->fields[jf_2].len;
708     + beg[1] = line2->fields[jf_2].beg;
709     + len[1] = line2->fields[jf_2].len;
710     }
711     else
712     {
713     - beg2 = NULL;
714     - len2 = 0;
715     + beg[1] = NULL;
716     + len[1] = 0;
717     }
718    
719     - if (len1 == 0)
720     - return len2 == 0 ? 0 : -1;
721     - if (len2 == 0)
722     + if (len[0] == 0)
723     + return len[1] == 0 ? 0 : -1;
724     + if (len[1] == 0)
725     return 1;
726    
727     if (ignore_case)
728     {
729     - /* FIXME: ignore_case does not work with NLS (in particular,
730     - with multibyte chars). */
731     - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
732     +#ifdef HAVE_MBRTOWC
733     + if (MB_CUR_MAX > 1)
734     + {
735     + size_t mblength;
736     + wchar_t wc, uwc;
737     + mbstate_t state, state_bak;
738     +
739     + memset (&state, '\0', sizeof (mbstate_t));
740     +
741     + for (i = 0; i < 2; i++)
742     + {
743     + copy[i] = alloca (len[i] + 1);
744     +
745     + for (j = 0; j < MIN (len[0], len[1]);)
746     + {
747     + state_bak = state;
748     + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
749     +
750     + switch (mblength)
751     + {
752     + case (size_t) -1:
753     + case (size_t) -2:
754     + state = state_bak;
755     + /* Fall through */
756     + case 0:
757     + mblength = 1;
758     + break;
759     +
760     + default:
761     + uwc = towupper (wc);
762     +
763     + if (uwc != wc)
764     + {
765     + mbstate_t state_wc;
766     +
767     + memset (&state_wc, '\0', sizeof (mbstate_t));
768     + wcrtomb (copy[i] + j, uwc, &state_wc);
769     + }
770     + else
771     + memcpy (copy[i] + j, beg[i] + j, mblength);
772     + }
773     + j += mblength;
774     + }
775     + copy[i][j] = '\0';
776     + }
777     + }
778     + else
779     +#endif
780     + {
781     + for (i = 0; i < 2; i++)
782     + {
783     + copy[i] = alloca (len[i] + 1);
784     +
785     + for (j = 0; j < MIN (len[0], len[1]); j++)
786     + copy[i][j] = toupper (beg[i][j]);
787     +
788     + copy[i][j] = '\0';
789     + }
790     + }
791     }
792     else
793     {
794     - if (hard_LC_COLLATE)
795 ovasik 1.39 - return xmemcoll (beg1, len1, beg2, len2);
796 ovasik 1.25 - diff = memcmp (beg1, beg2, MIN (len1, len2));
797     + copy[0] = (unsigned char *) beg[0];
798     + copy[1] = (unsigned char *) beg[1];
799     }
800    
801     + if (hard_LC_COLLATE)
802     + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
803     + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
804     +
805     +
806     if (diff)
807     return diff;
808     - return len1 < len2 ? -1 : len1 != len2;
809     + return len[0] - len[1];
810     }
811 cvsdist 1.1
812 ovasik 1.27 /* Check that successive input lines PREV and CURRENT from input file
813 twaugh 1.21 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
814     +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
815 twaugh 1.20 @@ -23,6 +23,16 @@
816     #include <getopt.h>
817     #include <sys/types.h>
818 cvsdist 1.1
819 twaugh 1.20 +/* Get mbstate_t, mbrtowc(). */
820     +#if HAVE_WCHAR_H
821     +# include <wchar.h>
822     +#endif
823     +
824     +/* Get isw* functions. */
825     +#if HAVE_WCTYPE_H
826     +# include <wctype.h>
827     +#endif
828     +
829     #include "system.h"
830     #include "argmatch.h"
831     #include "linebuffer.h"
832     @@ -32,7 +42,19 @@
833     #include "quote.h"
834     #include "xmemcoll.h"
835     #include "xstrtol.h"
836     -#include "memcasecmp.h"
837     +#include "xmemcoll.h"
838     +
839     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
840     + installation; work around this configuration error. */
841     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
842     +# define MB_LEN_MAX 16
843     +#endif
844     +
845     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
846     +#if HAVE_MBRTOWC && defined mbstate_t
847     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
848     +#endif
849     +
850    
851     /* The official name of this program (e.g., no `g' prefix). */
852     #define PROGRAM_NAME "uniq"
853     @@ -109,6 +131,10 @@
854     /* Select whether/how to delimit groups of duplicate lines. */
855     static enum delimit_method delimit_groups;
856    
857     +/* Function pointers. */
858     +static char *
859     +(*find_field) (struct linebuffer *line);
860     +
861     static struct option const longopts[] =
862     {
863     {"count", no_argument, NULL, 'c'},
864 twaugh 1.21 @@ -198,7 +224,7 @@
865 twaugh 1.20 return a pointer to the beginning of the line's field to be compared. */
866    
867     static char *
868 ovasik 1.28 -find_field (struct linebuffer const *line)
869 twaugh 1.20 +find_field_uni (struct linebuffer *line)
870     {
871     size_t count;
872 ovasik 1.28 char const *lp = line->buffer;
873 twaugh 1.21 @@ -219,6 +245,83 @@
874 ovasik 1.28 return line->buffer + i;
875 twaugh 1.20 }
876    
877     +#if HAVE_MBRTOWC
878     +
879     +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
880 ovasik 1.39 + do \
881     + { \
882     + mbstate_t state_bak; \
883     + \
884     + CONVFAIL = 0; \
885     + state_bak = *STATEP; \
886     + \
887     + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
888     + \
889     + switch (MBLENGTH) \
890     + { \
891     + case (size_t)-2: \
892     + case (size_t)-1: \
893     + *STATEP = state_bak; \
894     + CONVFAIL++; \
895     + /* Fall through */ \
896     + case 0: \
897     + MBLENGTH = 1; \
898     + } \
899     + } \
900 twaugh 1.20 + while (0)
901     +
902     +static char *
903     +find_field_multi (struct linebuffer *line)
904 cvsdist 1.1 +{
905 twaugh 1.20 + size_t count;
906     + char *lp = line->buffer;
907     + size_t size = line->length - 1;
908     + size_t pos;
909     + size_t mblength;
910 cvsdist 1.1 + wchar_t wc;
911 twaugh 1.20 + mbstate_t *statep;
912     + int convfail;
913 cvsdist 1.1 +
914 twaugh 1.20 + pos = 0;
915     + statep = &(line->state);
916 cvsdist 1.1 +
917 twaugh 1.20 + /* skip fields. */
918     + for (count = 0; count < skip_fields && pos < size; count++)
919 cvsdist 1.1 + {
920 twaugh 1.20 + while (pos < size)
921 ovasik 1.39 + {
922     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
923 twaugh 1.20 +
924 ovasik 1.39 + if (convfail || !iswblank (wc))
925     + {
926     + pos += mblength;
927     + break;
928     + }
929     + pos += mblength;
930     + }
931 cvsdist 1.1 +
932 twaugh 1.20 + while (pos < size)
933 ovasik 1.39 + {
934     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
935 cvsdist 1.1 +
936 ovasik 1.39 + if (!convfail && iswblank (wc))
937     + break;
938 cvsdist 1.1 +
939 ovasik 1.39 + pos += mblength;
940     + }
941 twaugh 1.20 + }
942 cvsdist 1.1 +
943 twaugh 1.20 + /* skip fields. */
944     + for (count = 0; count < skip_chars && pos < size; count++)
945     + {
946     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
947     + pos += mblength;
948 cvsdist 1.1 + }
949     +
950 twaugh 1.20 + return lp + pos;
951 cvsdist 1.1 +}
952     +#endif
953     +
954 twaugh 1.20 /* Return false if two strings OLD and NEW match, true if not.
955     OLD and NEW point not to the beginnings of the lines
956     but rather to the beginnings of the fields to compare.
957 twaugh 1.21 @@ -227,6 +330,8 @@
958 twaugh 1.20 static bool
959     different (char *old, char *new, size_t oldlen, size_t newlen)
960     {
961     + char *copy_old, *copy_new;
962     +
963     if (check_chars < oldlen)
964     oldlen = check_chars;
965     if (check_chars < newlen)
966 twaugh 1.21 @@ -234,14 +339,92 @@
967 cvsdist 1.1
968 twaugh 1.20 if (ignore_case)
969     {
970     - /* FIXME: This should invoke strcoll somehow. */
971     - return oldlen != newlen || memcasecmp (old, new, oldlen);
972     + size_t i;
973     +
974     + copy_old = alloca (oldlen + 1);
975     + copy_new = alloca (oldlen + 1);
976     +
977     + for (i = 0; i < oldlen; i++)
978 ovasik 1.39 + {
979     + copy_old[i] = toupper (old[i]);
980     + copy_new[i] = toupper (new[i]);
981     + }
982 twaugh 1.20 }
983     - else if (hard_LC_COLLATE)
984     - return xmemcoll (old, oldlen, new, newlen) != 0;
985     else
986     - return oldlen != newlen || memcmp (old, new, oldlen);
987     + {
988     + copy_old = (char *)old;
989     + copy_new = (char *)new;
990     + }
991     +
992     + return xmemcoll (copy_old, oldlen, copy_new, newlen);
993     +}
994     +
995     +#if HAVE_MBRTOWC
996     +static int
997     +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
998     +{
999     + size_t i, j, chars;
1000     + const char *str[2];
1001     + char *copy[2];
1002     + size_t len[2];
1003     + mbstate_t state[2];
1004     + size_t mblength;
1005     + wchar_t wc, uwc;
1006     + mbstate_t state_bak;
1007     +
1008     + str[0] = old;
1009     + str[1] = new;
1010     + len[0] = oldlen;
1011     + len[1] = newlen;
1012     + state[0] = oldstate;
1013     + state[1] = newstate;
1014     +
1015     + for (i = 0; i < 2; i++)
1016     + {
1017     + copy[i] = alloca (len[i] + 1);
1018     +
1019     + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1020 ovasik 1.39 + {
1021     + state_bak = state[i];
1022     + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1023 twaugh 1.20 +
1024 ovasik 1.39 + switch (mblength)
1025     + {
1026     + case (size_t)-1:
1027     + case (size_t)-2:
1028     + state[i] = state_bak;
1029     + /* Fall through */
1030     + case 0:
1031     + mblength = 1;
1032     + break;
1033     +
1034     + default:
1035     + if (ignore_case)
1036     + {
1037     + uwc = towupper (wc);
1038     +
1039     + if (uwc != wc)
1040     + {
1041     + mbstate_t state_wc;
1042     +
1043     + memset (&state_wc, '\0', sizeof(mbstate_t));
1044     + wcrtomb (copy[i] + j, uwc, &state_wc);
1045     + }
1046     + else
1047     + memcpy (copy[i] + j, str[i] + j, mblength);
1048     + }
1049     + else
1050     + memcpy (copy[i] + j, str[i] + j, mblength);
1051     + }
1052     + j += mblength;
1053     + }
1054 twaugh 1.20 + copy[i][j] = '\0';
1055     + len[i] = j;
1056     + }
1057     +
1058     + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1059     }
1060     +#endif
1061    
1062     /* Output the line in linebuffer LINE to standard output
1063     provided that the switches say it should be output.
1064 twaugh 1.21 @@ -295,15 +478,43 @@
1065 twaugh 1.20 {
1066     char *prevfield IF_LINT (= NULL);
1067     size_t prevlen IF_LINT (= 0);
1068     +#if HAVE_MBRTOWC
1069     + mbstate_t prevstate;
1070     +
1071     + memset (&prevstate, '\0', sizeof (mbstate_t));
1072     +#endif
1073    
1074     while (!feof (stdin))
1075 ovasik 1.39 {
1076     char *thisfield;
1077     size_t thislen;
1078 twaugh 1.20 +#if HAVE_MBRTOWC
1079 ovasik 1.39 + mbstate_t thisstate;
1080 twaugh 1.20 +#endif
1081     +
1082 ovasik 1.39 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1083     break;
1084     thisfield = find_field (thisline);
1085     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1086 twaugh 1.20 +#if HAVE_MBRTOWC
1087 ovasik 1.39 + if (MB_CUR_MAX > 1)
1088 twaugh 1.20 + {
1089     + thisstate = thisline->state;
1090     +
1091     + if (prevline->length == 0 || different_multi
1092     + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1093     + {
1094     + fwrite (thisline->buffer, sizeof (char),
1095     + thisline->length, stdout);
1096     +
1097     + SWAP_LINES (prevline, thisline);
1098     + prevfield = thisfield;
1099     + prevlen = thislen;
1100     + prevstate = thisstate;
1101     + }
1102     + }
1103 ovasik 1.39 + else
1104 twaugh 1.20 +#endif
1105 ovasik 1.39 if (prevline->length == 0
1106     || different (thisfield, prevfield, thislen, prevlen))
1107     {
1108 twaugh 1.21 @@ -322,17 +533,26 @@
1109 twaugh 1.20 size_t prevlen;
1110     uintmax_t match_count = 0;
1111     bool first_delimiter = true;
1112     +#if HAVE_MBRTOWC
1113     + mbstate_t prevstate;
1114     +#endif
1115    
1116 ovasik 1.23 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1117 ovasik 1.39 goto closefiles;
1118 twaugh 1.20 prevfield = find_field (prevline);
1119     prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1120     +#if HAVE_MBRTOWC
1121     + prevstate = prevline->state;
1122     +#endif
1123    
1124     while (!feof (stdin))
1125 ovasik 1.39 {
1126     bool match;
1127     char *thisfield;
1128     size_t thislen;
1129     +#if HAVE_MBRTOWC
1130     + mbstate_t thisstate;
1131     +#endif
1132     if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1133     {
1134     if (ferror (stdin))
1135 twaugh 1.21 @@ -341,6 +561,15 @@
1136 ovasik 1.39 }
1137     thisfield = find_field (thisline);
1138     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1139 twaugh 1.20 +#if HAVE_MBRTOWC
1140 ovasik 1.39 + if (MB_CUR_MAX > 1)
1141     + {
1142 twaugh 1.20 + thisstate = thisline->state;
1143     + match = !different_multi (thisfield, prevfield,
1144     + thislen, prevlen, thisstate, prevstate);
1145     + }
1146 ovasik 1.39 + else
1147 twaugh 1.20 +#endif
1148 ovasik 1.39 match = !different (thisfield, prevfield, thislen, prevlen);
1149     match_count += match;
1150 twaugh 1.20
1151 twaugh 1.21 @@ -373,6 +602,9 @@
1152 ovasik 1.39 SWAP_LINES (prevline, thisline);
1153     prevfield = thisfield;
1154     prevlen = thislen;
1155     +#if HAVE_MBRTOWC
1156     + prevstate = thisstate;
1157     +#endif
1158     if (!match)
1159     match_count = 0;
1160     }
1161 twaugh 1.21 @@ -417,6 +649,19 @@
1162 twaugh 1.20
1163     atexit (close_stdout);
1164    
1165     +#if HAVE_MBRTOWC
1166     + if (MB_CUR_MAX > 1)
1167     + {
1168     + find_field = find_field_multi;
1169     + }
1170     + else
1171     +#endif
1172     + {
1173     + find_field = find_field_uni;
1174     + }
1175     +
1176     +
1177     +
1178     skip_chars = 0;
1179     skip_fields = 0;
1180     check_chars = SIZE_MAX;
1181 twaugh 1.21 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1182     +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1183 twaugh 1.20 @@ -23,11 +23,33 @@
1184     #include <getopt.h>
1185 twaugh 1.16 #include <sys/types.h>
1186 cvsdist 1.1
1187 twaugh 1.20 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1188 cvsdist 1.1 +#if HAVE_WCHAR_H
1189     +# include <wchar.h>
1190     +#endif
1191     +
1192 twaugh 1.20 +/* Get iswprint(), iswblank(), wcwidth(). */
1193 cvsdist 1.1 +#if HAVE_WCTYPE_H
1194     +# include <wctype.h>
1195     +#endif
1196     +
1197     #include "system.h"
1198 twaugh 1.16 #include "error.h"
1199 twaugh 1.11 #include "quote.h"
1200 cvsdist 1.1 #include "xstrtol.h"
1201 twaugh 1.16
1202 twaugh 1.20 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1203     + installation; work around this configuration error. */
1204     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1205     +# undef MB_LEN_MAX
1206     +# define MB_LEN_MAX 16
1207     +#endif
1208     +
1209 cvsdist 1.1 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1210     +#if HAVE_MBRTOWC && defined mbstate_t
1211     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1212     +#endif
1213     +
1214 twaugh 1.20 #define TAB_WIDTH 8
1215    
1216 twaugh 1.16 /* The official name of this program (e.g., no `g' prefix). */
1217 ovasik 1.28 @@ -35,20 +57,41 @@
1218 cvsdist 1.1
1219 ovasik 1.27 #define AUTHORS proper_name ("David MacKenzie")
1220 cvsdist 1.1
1221 twaugh 1.20 +#define FATAL_ERROR(Message) \
1222     + do \
1223     + { \
1224     + error (0, 0, (Message)); \
1225     + usage (2); \
1226     + } \
1227     + while (0)
1228 cvsdist 1.1 +
1229 twaugh 1.20 +enum operating_mode
1230     +{
1231     + /* Fold texts by columns that are at the given positions. */
1232     + column_mode,
1233 twaugh 1.16 +
1234 twaugh 1.20 + /* Fold texts by bytes that are at the given positions. */
1235     + byte_mode,
1236     +
1237     + /* Fold texts by characters that are at the given positions. */
1238     + character_mode,
1239     +};
1240     +
1241     +/* The argument shows current mode. (Default: column_mode) */
1242     +static enum operating_mode operating_mode;
1243     +
1244     /* If nonzero, try to break on whitespace. */
1245     static bool break_spaces;
1246    
1247     -/* If nonzero, count bytes, not column positions. */
1248     -static bool count_bytes;
1249     -
1250     /* If nonzero, at least one of the files we read was standard input. */
1251     static bool have_read_stdin;
1252    
1253     -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1254     +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1255    
1256     static struct option const longopts[] =
1257     {
1258     {"bytes", no_argument, NULL, 'b'},
1259     + {"characters", no_argument, NULL, 'c'},
1260     {"spaces", no_argument, NULL, 's'},
1261     {"width", required_argument, NULL, 'w'},
1262     {GETOPT_HELP_OPTION_DECL},
1263     @@ -81,6 +124,7 @@
1264     "), stdout);
1265     fputs (_("\
1266     -b, --bytes count bytes rather than columns\n\
1267     + -c, --characters count characters rather than columns\n\
1268     -s, --spaces break at spaces\n\
1269     -w, --width=WIDTH use WIDTH columns instead of 80\n\
1270     "), stdout);
1271     @@ -98,7 +142,7 @@
1272     static size_t
1273     adjust_column (size_t column, char c)
1274     {
1275     - if (!count_bytes)
1276     + if (operating_mode != byte_mode)
1277     {
1278     if (c == '\b')
1279 ovasik 1.39 {
1280 twaugh 1.20 @@ -121,30 +165,14 @@
1281     to stdout, with maximum line length WIDTH.
1282     Return true if successful. */
1283    
1284     -static bool
1285     -fold_file (char const *filename, size_t width)
1286     +static void
1287     +fold_text (FILE *istream, size_t width, int *saved_errno)
1288 cvsdist 1.1 {
1289 twaugh 1.20 - FILE *istream;
1290     int c;
1291     size_t column = 0; /* Screen column where next char will go. */
1292     size_t offset_out = 0; /* Index in `line_out' for next char. */
1293     static char *line_out = NULL;
1294     static size_t allocated_out = 0;
1295     - int saved_errno;
1296     -
1297     - if (STREQ (filename, "-"))
1298     - {
1299     - istream = stdin;
1300     - have_read_stdin = true;
1301     - }
1302     - else
1303     - istream = fopen (filename, "r");
1304     -
1305     - if (istream == NULL)
1306     - {
1307     - error (0, errno, "%s", filename);
1308     - return false;
1309     - }
1310 twaugh 1.16
1311 twaugh 1.20 while ((c = getc (istream)) != EOF)
1312 twaugh 1.16 {
1313 twaugh 1.20 @@ -172,6 +200,15 @@
1314 ovasik 1.39 bool found_blank = false;
1315     size_t logical_end = offset_out;
1316 twaugh 1.20
1317 ovasik 1.39 + /* If LINE_OUT has no wide character,
1318     + put a new wide character in LINE_OUT
1319     + if column is bigger than width. */
1320     + if (offset_out == 0)
1321     + {
1322     + line_out[offset_out++] = c;
1323     + continue;
1324     + }
1325     +
1326     /* Look for the last blank. */