/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Diff of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 1.40 Revision 1.41
1diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut 1diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
2--- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200 2--- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
3+++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200
4@@ -26,7 +26,7 @@
5 my $prog = 'cut';
6 my $try = "Try \`$prog --help' for more information.\n";
7 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
8-my $inval = "$prog: invalid byte or field list\n$try";
9+my $inval = "$prog: invalid byte, character or field list\n$try";
10 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
11
12 my @Tests =
13@@ -140,8 +140,8 @@
14 ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}],
15
16 # None of the following invalid ranges provoked an error up to coreutils-6.9.
17- ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
18- {ERR=>"$prog: invalid decreasing range\n$try"}],
19+ ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
20+ {ERR=>"$prog: invalid byte, character or field list\n$try"}],
21 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
22 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
23 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
24--- /dev/null 2007-03-01 09:16:39.219409909 +0000
25+++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
26@@ -0,0 +1,58 @@
27+#! /bin/sh
28+case $# in
29+ 0) xx='../src/sort';;
30+ *) xx="$1";;
31+esac
32+test "$VERBOSE" && echo=echo || echo=:
33+$echo testing program: $xx
34+errors=0
35+test "$srcdir" || srcdir=.
36+test "$VERBOSE" && $xx --version 2> /dev/null
37+
38+export LC_ALL=en_US.UTF-8
39+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
40+errors=0
41+
42+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
43+code=$?
44+if test $code != 0; then
45+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
46+ errors=`expr $errors + 1`
47+else
48+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
49+ case $? in
50+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
51+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
52+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
53+ errors=`expr $errors + 1`;;
54+ 2) $echo "Test mb1 may have failed." 1>&2
55+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
56+ errors=`expr $errors + 1`;;
57+ esac
58+fi
59+
60+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
61+code=$?
62+if test $code != 0; then
63+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
64+ errors=`expr $errors + 1`
65+else
66+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
67+ case $? in
68+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
69+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
70+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
71+ errors=`expr $errors + 1`;;
72+ 2) $echo "Test mb2 may have failed." 1>&2
73+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
74+ errors=`expr $errors + 1`;;
75+ esac
76+fi
77+
78+if test $errors = 0; then
79+ $echo Passed all 113 tests. 1>&2
80+else
81+ $echo Failed $errors tests. 1>&2
82+fi
83+test $errors = 0 || errors=1
84+exit $errors
85--- /dev/null 2007-03-01 09:16:39.219409909 +0000
86+++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000
87@@ -0,0 +1,4 @@
88+Apple@AA10@@20
89+Banana@AA5@@30
90+Citrus@AA20@@5
91+Cherry@AA30@@10
92--- /dev/null 2007-03-01 09:16:39.219409909 +0000
93+++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000
94@@ -0,0 +1,4 @@
95+Citrus@AA20@@5
96+Cherry@AA30@@10
97+Apple@AA10@@20
98+Banana@AA5@@30
99--- /dev/null 2007-03-01 09:16:39.219409909 +0000
100+++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000
101@@ -0,0 +1,4 @@
102+Apple@10
103+Banana@5
104+Citrus@20
105+Cherry@30
106--- /dev/null 2007-03-01 09:16:39.219409909 +0000
107+++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000
108@@ -0,0 +1,4 @@
109+Banana@5
110+Apple@10
111+Citrus@20
112+Cherry@30
113diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am
114--- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200
115+++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200
116@@ -192,6 +192,7 @@
117 misc/sort-compress \
118 misc/sort-continue \
119 misc/sort-files0-from \
120+ misc/sort-mb-tests \
121 misc/sort-merge \
122 misc/sort-merge-fdlimit \
123 misc/sort-rand \
124@@ -391,6 +392,10 @@
125 $(root_tests)
126
127 pr_data = \
128+ misc/mb1.X \
129+ misc/mb1.I \
130+ misc/mb2.X \
131+ misc/mb2.I \
132 pr/0F \
133 pr/0FF \
134 pr/0FFnt \
135--- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
136+++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000 3+++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
137@@ -22,6 +22,11 @@ 4@@ -21,6 +21,11 @@
138 5
139 # include <stdio.h> 6 # include <stdio.h>
140 7
141+/* Get mbstate_t. */ 8+/* Get mbstate_t. */
142+# if HAVE_WCHAR_H 9+# if HAVE_WCHAR_H
144+# endif 11+# endif
145+ 12+
146 /* A `struct linebuffer' holds a line of text. */ 13 /* A `struct linebuffer' holds a line of text. */
147 14
148 struct linebuffer 15 struct linebuffer
149@@ -29,6 +34,9 @@ 16@@ -28,6 +33,9 @@ struct linebuffer
150 size_t size; /* Allocated. */ 17 size_t size; /* Allocated. */
151 size_t length; /* Used. */ 18 size_t length; /* Used. */
152 char *buffer; 19 char *buffer;
153+# if HAVE_WCHAR_H 20+# if HAVE_WCHAR_H
154+ mbstate_t state; 21+ mbstate_t state;
155+# endif 22+# endif
156 }; 23 };
157 24
158 /* Initialize linebuffer LINEBUFFER for use. */ 25 /* Initialize linebuffer LINEBUFFER for use. */
26diff -urNp coreutils-8.0-orig/lib/linebuffer.h.orig coreutils-8.0/lib/linebuffer.h.orig
27--- coreutils-8.0-orig/lib/linebuffer.h.orig 1970-01-01 01:00:00.000000000 +0100
28+++ coreutils-8.0/lib/linebuffer.h.orig 2009-10-06 10:59:48.000000000 +0200
29@@ -0,0 +1,53 @@
30+/* linebuffer.h -- declarations for reading arbitrarily long lines
31+
32+ Copyright (C) 1986, 1991, 1998, 1999, 2002, 2003, 2007 Free Software
33+ Foundation, Inc.
34+
35+ This program is free software: you can redistribute it and/or modify
36+ it under the terms of the GNU General Public License as published by
37+ the Free Software Foundation; either version 3 of the License, or
38+ (at your option) any later version.
39+
40+ This program is distributed in the hope that it will be useful,
41+ but WITHOUT ANY WARRANTY; without even the implied warranty of
42+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43+ GNU General Public License for more details.
44+
45+ You should have received a copy of the GNU General Public License
46+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
47+
48+#if !defined LINEBUFFER_H
49+# define LINEBUFFER_H
50+
51+# include <stdio.h>
52+
53+/* A `struct linebuffer' holds a line of text. */
54+
55+struct linebuffer
56+{
57+ size_t size; /* Allocated. */
58+ size_t length; /* Used. */
59+ char *buffer;
60+};
61+
62+/* Initialize linebuffer LINEBUFFER for use. */
63+void initbuffer (struct linebuffer *linebuffer);
64+
65+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
66+ Consider lines to be terminated by DELIMITER.
67+ Keep the delimiter; append DELIMITER if we reach EOF and it wasn't
68+ the last character in the file. Do not NUL-terminate.
69+ Return LINEBUFFER, except at end of file return NULL. */
70+struct linebuffer *readlinebuffer_delim (struct linebuffer *linebuffer,
71+ FILE *stream, char delimiter);
72+
73+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
74+ Keep the newline; append a newline if it's the last line of a file
75+ that ends in a non-newline character. Do not NUL-terminate.
76+ Return LINEBUFFER, except at end of file return NULL. */
77+struct linebuffer *readlinebuffer (struct linebuffer *linebuffer, FILE *stream);
78+
79+/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
80+void freebuffer (struct linebuffer *);
81+
82+#endif /* LINEBUFFER_H */
83diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
84--- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
85+++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
86@@ -28,6 +28,11 @@
87 #include <assert.h>
88 #include <getopt.h>
89 #include <sys/types.h>
90+
91+/* Get mbstate_t, mbrtowc(). */
92+#if HAVE_WCHAR_H
93+# include <wchar.h>
94+#endif
95 #include "system.h"
96
97 #include "error.h"
98@@ -36,6 +41,18 @@
99 #include "quote.h"
100 #include "xstrndup.h"
101
102+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
103+ installation; work around this configuration error. */
104+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
105+# undef MB_LEN_MAX
106+# define MB_LEN_MAX 16
107+#endif
108+
109+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
110+#if HAVE_MBRTOWC && defined mbstate_t
111+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
112+#endif
113+
114 /* The official name of this program (e.g., no `g' prefix). */
115 #define PROGRAM_NAME "cut"
116
117@@ -71,6 +88,52 @@
118 } \
119 while (0)
120
121+/* Refill the buffer BUF to get a multibyte character. */
122+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
123+ do \
124+ { \
125+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
126+ { \
127+ memmove (BUF, BUFPOS, BUFLEN); \
128+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
129+ BUFPOS = BUF; \
130+ } \
131+ } \
132+ while (0)
133+
134+/* Get wide character on BUFPOS. BUFPOS is not included after that.
135+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
136+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
137+ do \
138+ { \
139+ mbstate_t state_bak; \
140+ \
141+ if (BUFLEN < 1) \
142+ { \
143+ WC = WEOF; \
144+ break; \
145+ } \
146+ \
147+ /* Get a wide character. */ \
148+ CONVFAIL = 0; \
149+ state_bak = STATE; \
150+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
151+ \
152+ switch (MBLENGTH) \
153+ { \
154+ case (size_t)-1: \
155+ case (size_t)-2: \
156+ CONVFAIL++; \
157+ STATE = state_bak; \
158+ /* Fall througn. */ \
159+ \
160+ case 0: \
161+ MBLENGTH = 1; \
162+ break; \
163+ } \
164+ } \
165+ while (0)
166+
167 struct range_pair
168 {
169 size_t lo;
170@@ -89,7 +152,7 @@ static char *field_1_buffer;
171 /* The number of bytes allocated for FIELD_1_BUFFER. */
172 static size_t field_1_bufsize;
173
174-/* The largest field or byte index used as an endpoint of a closed
175+/* The largest byte, character or field index used as an endpoint of a closed
176 or degenerate range specification; this doesn't include the starting
177 index of right-open-ended ranges. For example, with either range spec
178 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
179@@ -101,10 +164,11 @@ static size_t eol_range_start;
180
181 /* This is a bit vector.
182 In byte mode, which bytes to output.
183+ In character mode, which characters to output.
184 In field mode, which DELIM-separated fields to output.
185- Both bytes and fields are numbered starting with 1,
186+ Bytes, characters and fields are numbered starting with 1,
187 so the zeroth bit of this array is unused.
188- A field or byte K has been selected if
189+ A byte, character or field K has been selected if
190 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
191 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
192 static unsigned char *printable_field;
193@@ -113,15 +177,25 @@ enum operating_mode
194 {
195 undefined_mode,
196
197- /* Output characters that are in the given bytes. */
198+ /* Output bytes that are at the given positions. */
199 byte_mode,
200
201+ /* Output characters that are at the given positions. */
202+ character_mode,
203+
204 /* Output the given delimeter-separated fields. */
205 field_mode
206 };
207
208 static enum operating_mode operating_mode;
209
210+/* If nonzero, when in byte mode, don't split multibyte characters. */
211+static int byte_mode_character_aware;
212+
213+/* If nonzero, the function for single byte locale is work
214+ if this program runs on multibyte locale. */
215+static int force_singlebyte_mode;
216+
217 /* If true do not output lines containing no delimeter characters.
218 Otherwise, all such lines are printed. This option is valid only
219 with field mode. */
220@@ -133,6 +207,9 @@ static bool complement;
221
222 /* The delimeter character for field mode. */
223 static unsigned char delim;
224+#if HAVE_WCHAR_H
225+static wchar_t wcdelim;
226+#endif
227
228 /* True if the --output-delimiter=STRING option was specified. */
229 static bool output_delimiter_specified;
230@@ -206,7 +283,7 @@ Mandatory arguments to long options are
231 -f, --fields=LIST select only these fields; also print any line\n\
232 that contains no delimiter character, unless\n\
233 the -s option is specified\n\
234- -n (ignored)\n\
235+ -n with -b: don't split multibyte characters\n\
236 "), stdout);
237 fputs (_("\
238 --complement complement the set of selected bytes, characters\n\
239@@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
240 in_digits = false;
241 /* Starting a range. */
242 if (dash_found)
243- FATAL_ERROR (_("invalid byte or field list"));
244+ FATAL_ERROR (_("invalid byte, character or field list"));
245 dash_found = true;
246 fieldstr++;
247
248@@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
249 if (!rhs_specified)
250 {
251 /* `n-'. From `initial' to end of line. */
252- eol_range_start = initial;
253+ if (eol_range_start == 0 ||
254+ (eol_range_start != 0 && eol_range_start > initial))
255+ eol_range_start = initial;
256 field_found = true;
257 }
258 else
259 {
260 /* `m-n' or `-n' (1-n). */
261 if (value < initial)
262- FATAL_ERROR (_("invalid decreasing range"));
263+ FATAL_ERROR (_("invalid byte, character or field list"));
264
265 /* Is there already a range going to end of line? */
266 if (eol_range_start != 0)
267@@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
268 if (operating_mode == byte_mode)
269 error (0, 0,
270 _("byte offset %s is too large"), quote (bad_num));
271+ else if (operating_mode == character_mode)
272+ error (0, 0,
273+ _("character offset %s is too large"), quote (bad_num));
274 else
275 error (0, 0,
276 _("field number %s is too large"), quote (bad_num));
277@@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
278 fieldstr++;
279 }
280 else
281- FATAL_ERROR (_("invalid byte or field list"));
282+ FATAL_ERROR (_("invalid byte, character or field list"));
283 }
284
285 max_range_endpoint = 0;
286@@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
287 }
288 }
289
290+#if HAVE_MBRTOWC
291+/* This function is in use for the following case.
292+
293+ 1. Read from the stream STREAM, printing to standard output any selected
294+ characters.
295+
296+ 2. Read from stream STREAM, printing to standard output any selected bytes,
297+ without splitting multibyte characters. */
298+
299+static void
300+cut_characters_or_cut_bytes_no_split (FILE *stream)
301+{
302+ int idx; /* number of bytes or characters in the line so far. */
303+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
304+ char *bufpos; /* Next read position of BUF. */
305+ size_t buflen; /* The length of the byte sequence in buf. */
306+ wint_t wc; /* A gotten wide character. */
307+ size_t mblength; /* The byte size of a multibyte character which shows
308+ as same character as WC. */
309+ mbstate_t state; /* State of the stream. */
310+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
311+
312+ idx = 0;
313+ buflen = 0;
314+ bufpos = buf;
315+ memset (&state, '\0', sizeof(mbstate_t));
316+
317+ while (1)
318+ {
319+ REFILL_BUFFER (buf, bufpos, buflen, stream);
320+
321+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
322+
323+ if (wc == WEOF)
324+ {
325+ if (idx > 0)
326+ putchar ('\n');
327+ break;
328+ }
329+ else if (wc == L'\n')
330+ {
331+ putchar ('\n');
332+ idx = 0;
333+ }
334+ else
335+ {
336+ idx += (operating_mode == byte_mode) ? mblength : 1;
337+ if (print_kth (idx, NULL))
338+ fwrite (bufpos, mblength, sizeof(char), stdout);
339+ }
340+
341+ buflen -= mblength;
342+ bufpos += mblength;
343+ }
344+}
345+#endif
346+
347 /* Read from stream STREAM, printing to standard output any selected fields. */
348
349 static void
350@@ -701,13 +840,192 @@ cut_fields (FILE *stream)
351 }
352 }
353
354+#if HAVE_MBRTOWC
355+static void
356+cut_fields_mb (FILE *stream)
357+{
358+ int c;
359+ unsigned int field_idx;
360+ int found_any_selected_field;
361+ int buffer_first_field;
362+ int empty_input;
363+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
364+ char *bufpos; /* Next read position of BUF. */
365+ size_t buflen; /* The length of the byte sequence in buf. */
366+ wint_t wc = 0; /* A gotten wide character. */
367+ size_t mblength; /* The byte size of a multibyte character which shows
368+ as same character as WC. */
369+ mbstate_t state; /* State of the stream. */
370+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
371+
372+ found_any_selected_field = 0;
373+ field_idx = 1;
374+ bufpos = buf;
375+ buflen = 0;
376+ memset (&state, '\0', sizeof(mbstate_t));
377+
378+ c = getc (stream);
379+ empty_input = (c == EOF);
380+ if (c != EOF)
381+ ungetc (c, stream);
382+ else
383+ wc = WEOF;
384+
385+ /* To support the semantics of the -s flag, we may have to buffer
386+ all of the first field to determine whether it is `delimited.'
387+ But that is unnecessary if all non-delimited lines must be printed
388+ and the first field has been selected, or if non-delimited lines
389+ must be suppressed and the first field has *not* been selected.
390+ That is because a non-delimited line has exactly one field. */
391+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
392+
393+ while (1)
394+ {
395+ if (field_idx == 1 && buffer_first_field)
396+ {
397+ int len = 0;
398+
399+ while (1)
400+ {
401+ REFILL_BUFFER (buf, bufpos, buflen, stream);
402+
403+ GET_NEXT_WC_FROM_BUFFER
404+ (wc, bufpos, buflen, mblength, state, convfail);
405+
406+ if (wc == WEOF)
407+ break;
408+
409+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
410+ memcpy (field_1_buffer + len, bufpos, mblength);
411+ len += mblength;
412+ buflen -= mblength;
413+ bufpos += mblength;
414+
415+ if (!convfail && (wc == L'\n' || wc == wcdelim))
416+ break;
417+ }
418+
419+ if (wc == WEOF)
420+ break;
421+
422+ /* If the first field extends to the end of line (it is not
423+ delimited) and we are printing all non-delimited lines,
424+ print this one. */
425+ if (convfail || (!convfail && wc != wcdelim))
426+ {
427+ if (suppress_non_delimited)
428+ {
429+ /* Empty. */
430+ }
431+ else
432+ {
433+ fwrite (field_1_buffer, sizeof (char), len, stdout);
434+ /* Make sure the output line is newline terminated. */
435+ if (convfail || (!convfail && wc != L'\n'))
436+ putchar ('\n');
437+ }
438+ continue;
439+ }
440+
441+ if (print_kth (1, NULL))
442+ {
443+ /* Print the field, but not the trailing delimiter. */
444+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
445+ found_any_selected_field = 1;
446+ }
447+ ++field_idx;
448+ }
449+
450+ if (wc != WEOF)
451+ {
452+ if (print_kth (field_idx, NULL))
453+ {
454+ if (found_any_selected_field)
455+ {
456+ fwrite (output_delimiter_string, sizeof (char),
457+ output_delimiter_length, stdout);
458+ }
459+ found_any_selected_field = 1;
460+ }
461+
462+ while (1)
463+ {
464+ REFILL_BUFFER (buf, bufpos, buflen, stream);
465+
466+ GET_NEXT_WC_FROM_BUFFER
467+ (wc, bufpos, buflen, mblength, state, convfail);
468+
469+ if (wc == WEOF)
470+ break;
471+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
472+ {
473+ buflen -= mblength;
474+ bufpos += mblength;
475+ break;
476+ }
477+
478+ if (print_kth (field_idx, NULL))
479+ fwrite (bufpos, mblength, sizeof(char), stdout);
480+
481+ buflen -= mblength;
482+ bufpos += mblength;
483+ }
484+ }
485+
486+ if ((!convfail || wc == L'\n') && buflen < 1)
487+ wc = WEOF;
488+
489+ if (!convfail && wc == wcdelim)
490+ ++field_idx;
491+ else if (wc == WEOF || (!convfail && wc == L'\n'))
492+ {
493+ if (found_any_selected_field
494+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
495+ putchar ('\n');
496+ if (wc == WEOF)
497+ break;
498+ field_idx = 1;
499+ found_any_selected_field = 0;
500+ }
501+ }
502+}
503+#endif
504+
505 static void
506 cut_stream (FILE *stream)
507 {
508- if (operating_mode == byte_mode)
509- cut_bytes (stream);
510+#if HAVE_MBRTOWC
511+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
512+ {
513+ switch (operating_mode)
514+ {
515+ case byte_mode:
516+ if (byte_mode_character_aware)
517+ cut_characters_or_cut_bytes_no_split (stream);
518+ else
519+ cut_bytes (stream);
520+ break;
521+
522+ case character_mode:
523+ cut_characters_or_cut_bytes_no_split (stream);
524+ break;
525+
526+ case field_mode:
527+ cut_fields_mb (stream);
528+ break;
529+
530+ default:
531+ abort ();
532+ }
533+ }
534 else
535- cut_fields (stream);
536+#endif
537+ {
538+ if (operating_mode == field_mode)
539+ cut_fields (stream);
540+ else
541+ cut_bytes (stream);
542+ }
543 }
544
545 /* Process file FILE to standard output.
546@@ -757,6 +1075,8 @@ main (int argc, char **argv)
547 bool ok;
548 bool delim_specified = false;
549 char *spec_list_string IF_LINT(= NULL);
550+ char mbdelim[MB_LEN_MAX + 1];
551+ size_t delimlen = 0;
552
553 initialize_main (&argc, &argv);
554 set_program_name (argv[0]);
555@@ -779,7 +1099,6 @@ main (int argc, char **argv)
556 switch (optc)
557 {
558 case 'b':
559- case 'c':
560 /* Build the byte list. */
561 if (operating_mode != undefined_mode)
562 FATAL_ERROR (_("only one type of list may be specified"));
563@@ -787,6 +1106,14 @@ main (int argc, char **argv)
564 spec_list_string = optarg;
565 break;
566
567+ case 'c':
568+ /* Build the character list. */
569+ if (operating_mode != undefined_mode)
570+ FATAL_ERROR (_("only one type of list may be specified"));
571+ operating_mode = character_mode;
572+ spec_list_string = optarg;
573+ break;
574+
575 case 'f':
576 /* Build the field list. */
577 if (operating_mode != undefined_mode)
578@@ -798,10 +1125,35 @@ main (int argc, char **argv)
579 case 'd':
580 /* New delimiter. */
581 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
582- if (optarg[0] != '\0' && optarg[1] != '\0')
583- FATAL_ERROR (_("the delimiter must be a single character"));
584- delim = optarg[0];
585- delim_specified = true;
586+ {
587+#if HAVE_MBRTOWC
588+ if(MB_CUR_MAX > 1)
589+ {
590+ mbstate_t state;
591+
592+ memset (&state, '\0', sizeof(mbstate_t));
593+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
594+
595+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
596+ ++force_singlebyte_mode;
597+ else
598+ {
599+ delimlen = (delimlen < 1) ? 1 : delimlen;
600+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
601+ FATAL_ERROR (_("the delimiter must be a single character"));
602+ memcpy (mbdelim, optarg, delimlen);
603+ }
604+ }
605+
606+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
607+#endif
608+ {
609+ if (optarg[0] != '\0' && optarg[1] != '\0')
610+ FATAL_ERROR (_("the delimiter must be a single character"));
611+ delim = (unsigned char) optarg[0];
612+ }
613+ delim_specified = true;
614+ }
615 break;
616
617 case OUTPUT_DELIMITER_OPTION:
618@@ -814,6 +1166,7 @@ main (int argc, char **argv)
619 break;
620
621 case 'n':
622+ byte_mode_character_aware = 1;
623 break;
624
625 case 's':
626@@ -836,7 +1189,7 @@ main (int argc, char **argv)
627 if (operating_mode == undefined_mode)
628 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
629
630- if (delim != '\0' && operating_mode != field_mode)
631+ if (delim_specified && operating_mode != field_mode)
632 FATAL_ERROR (_("an input delimiter may be specified only\
633 when operating on fields"));
634
635@@ -863,15 +1216,34 @@ main (int argc, char **argv)
636 }
637
638 if (!delim_specified)
639- delim = '\t';
640+ {
641+ delim = '\t';
642+#ifdef HAVE_MBRTOWC
643+ wcdelim = L'\t';
644+ mbdelim[0] = '\t';
645+ mbdelim[1] = '\0';
646+ delimlen = 1;
647+#endif
648+ }
649
650 if (output_delimiter_string == NULL)
651 {
652- static char dummy[2];
653- dummy[0] = delim;
654- dummy[1] = '\0';
655- output_delimiter_string = dummy;
656- output_delimiter_length = 1;
657+#ifdef HAVE_MBRTOWC
658+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
659+ {
660+ output_delimiter_string = xstrdup(mbdelim);
661+ output_delimiter_length = delimlen;
662+ }
663+
664+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
665+#endif
666+ {
667+ static char dummy[2];
668+ dummy[0] = delim;
669+ dummy[1] = '\0';
670+ output_delimiter_string = dummy;
671+ output_delimiter_length = 1;
672+ }
673 }
674
675 if (optind == argc)
676diff -urNp coreutils-8.0-orig/src/cut.c.orig coreutils-8.0/src/cut.c.orig
677--- coreutils-8.0-orig/src/cut.c.orig 1970-01-01 01:00:00.000000000 +0100
678+++ coreutils-8.0/src/cut.c.orig 2009-09-23 10:25:44.000000000 +0200
679@@ -0,0 +1,893 @@
680+/* cut - remove parts of lines of files
681+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
682+ Copyright (C) 1984 David M. Ihnat
683+
684+ This program is free software: you can redistribute it and/or modify
685+ it under the terms of the GNU General Public License as published by
686+ the Free Software Foundation, either version 3 of the License, or
687+ (at your option) any later version.
688+
689+ This program is distributed in the hope that it will be useful,
690+ but WITHOUT ANY WARRANTY; without even the implied warranty of
691+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
692+ GNU General Public License for more details.
693+
694+ You should have received a copy of the GNU General Public License
695+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
696+
697+/* Written by David Ihnat. */
698+
699+/* POSIX changes, bug fixes, long-named options, and cleanup
700+ by David MacKenzie <djm@gnu.ai.mit.edu>.
701+
702+ Rewrite cut_fields and cut_bytes -- Jim Meyering. */
703+
704+#include <config.h>
705+
706+#include <stdio.h>
707+#include <assert.h>
708+#include <getopt.h>
709+#include <sys/types.h>
710+#include "system.h"
711+
712+#include "error.h"
713+#include "getndelim2.h"
714+#include "hash.h"
715+#include "quote.h"
716+#include "xstrndup.h"
717+
718+/* The official name of this program (e.g., no `g' prefix). */
719+#define PROGRAM_NAME "cut"
720+
721+#define AUTHORS \
722+ proper_name ("David M. Ihnat"), \
723+ proper_name ("David MacKenzie"), \
724+ proper_name ("Jim Meyering")
725+
726+#define FATAL_ERROR(Message) \
727+ do \
728+ { \
729+ error (0, 0, (Message)); \
730+ usage (EXIT_FAILURE); \
731+ } \
732+ while (0)
733+
734+/* Append LOW, HIGH to the list RP of range pairs, allocating additional
735+ space if necessary. Update local variable N_RP. When allocating,
736+ update global variable N_RP_ALLOCATED. */
737+
738+#define ADD_RANGE_PAIR(rp, low, high) \
739+ do \
740+ { \
741+ if (low == 0 || high == 0) \
742+ FATAL_ERROR (_("fields and positions are numbered from 1")); \
743+ if (n_rp >= n_rp_allocated) \
744+ { \
745+ (rp) = X2NREALLOC (rp, &n_rp_allocated); \
746+ } \
747+ rp[n_rp].lo = (low); \
748+ rp[n_rp].hi = (high); \
749+ ++n_rp; \
750+ } \
751+ while (0)
752+
753+struct range_pair
754+ {
755+ size_t lo;
756+ size_t hi;
757+ };
758+
759+/* This buffer is used to support the semantics of the -s option
760+ (or lack of same) when the specified field list includes (does
761+ not include) the first field. In both of those cases, the entire
762+ first field must be read into this buffer to determine whether it
763+ is followed by a delimiter or a newline before any of it may be
764+ output. Otherwise, cut_fields can do the job without using this
765+ buffer. */
766+static char *field_1_buffer;
767+
768+/* The number of bytes allocated for FIELD_1_BUFFER. */
769+static size_t field_1_bufsize;
770+
771+/* The largest field or byte index used as an endpoint of a closed
772+ or degenerate range specification; this doesn't include the starting
773+ index of right-open-ended ranges. For example, with either range spec
774+ `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
775+static size_t max_range_endpoint;
776+
777+/* If nonzero, this is the index of the first field in a range that goes
778+ to end of line. */
779+static size_t eol_range_start;
780+
781+/* This is a bit vector.
782+ In byte mode, which bytes to output.
783+ In field mode, which DELIM-separated fields to output.
784+ Both bytes and fields are numbered starting with 1,
785+ so the zeroth bit of this array is unused.
786+ A field or byte K has been selected if
787+ (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
788+ || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
789+static unsigned char *printable_field;
790+
791+enum operating_mode
792+ {
793+ undefined_mode,
794+
795+ /* Output characters that are in the given bytes. */
796+ byte_mode,
797+
798+ /* Output the given delimeter-separated fields. */
799+ field_mode
800+ };
801+
802+static enum operating_mode operating_mode;
803+
804+/* If true do not output lines containing no delimeter characters.
805+ Otherwise, all such lines are printed. This option is valid only
806+ with field mode. */
807+static bool suppress_non_delimited;
808+
809+/* If nonzero, print all bytes, characters, or fields _except_
810+ those that were specified. */
811+static bool complement;
812+
813+/* The delimeter character for field mode. */
814+static unsigned char delim;
815+
816+/* True if the --output-delimiter=STRING option was specified. */
817+static bool output_delimiter_specified;
818+
819+/* The length of output_delimiter_string. */
820+static size_t output_delimiter_length;
821+
822+/* The output field separator string. Defaults to the 1-character
823+ string consisting of the input delimiter. */
824+static char *output_delimiter_string;
825+
826+/* True if we have ever read standard input. */
827+static bool have_read_stdin;
828+
829+#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
830+
831+/* The set of range-start indices. For example, given a range-spec list like
832+ `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
833+ Note that although `4' looks like a range-start index, it is in the middle
834+ of the `3-5' range, so it doesn't count.
835+ This table is created/used IFF output_delimiter_specified is set. */
836+static Hash_table *range_start_ht;
837+
838+/* For long options that have no equivalent short option, use a
839+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
840+enum
841+{
842+ OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
843+ COMPLEMENT_OPTION
844+};
845+
846+static struct option const longopts[] =
847+{
848+ {"bytes", required_argument, NULL, 'b'},
849+ {"characters", required_argument, NULL, 'c'},
850+ {"fields", required_argument, NULL, 'f'},
851+ {"delimiter", required_argument, NULL, 'd'},
852+ {"only-delimited", no_argument, NULL, 's'},
853+ {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
854+ {"complement", no_argument, NULL, COMPLEMENT_OPTION},
855+ {GETOPT_HELP_OPTION_DECL},
856+ {GETOPT_VERSION_OPTION_DECL},
857+ {NULL, 0, NULL, 0}
858+};
859+
860+void
861+usage (int status)
862+{
863+ if (status != EXIT_SUCCESS)
864+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
865+ program_name);
866+ else
867+ {
868+ printf (_("\
869+Usage: %s OPTION... [FILE]...\n\
870+"),
871+ program_name);
872+ fputs (_("\
873+Print selected parts of lines from each FILE to standard output.\n\
874+\n\
875+"), stdout);
876+ fputs (_("\
877+Mandatory arguments to long options are mandatory for short options too.\n\
878+"), stdout);
879+ fputs (_("\
880+ -b, --bytes=LIST select only these bytes\n\
881+ -c, --characters=LIST select only these characters\n\
882+ -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
883+"), stdout);
884+ fputs (_("\
885+ -f, --fields=LIST select only these fields; also print any line\n\
886+ that contains no delimiter character, unless\n\
887+ the -s option is specified\n\
888+ -n (ignored)\n\
889+"), stdout);
890+ fputs (_("\
891+ --complement complement the set of selected bytes, characters\n\
892+ or fields\n\
893+"), stdout);
894+ fputs (_("\
895+ -s, --only-delimited do not print lines not containing delimiters\n\
896+ --output-delimiter=STRING use STRING as the output delimiter\n\
897+ the default is to use the input delimiter\n\
898+"), stdout);
899+ fputs (HELP_OPTION_DESCRIPTION, stdout);
900+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
901+ fputs (_("\
902+\n\
903+Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
904+range, or many ranges separated by commas. Selected input is written\n\
905+in the same order that it is read, and is written exactly once.\n\
906+"), stdout);
907+ fputs (_("\
908+Each range is one of:\n\
909+\n\
910+ N N'th byte, character or field, counted from 1\n\
911+ N- from N'th byte, character or field, to end of line\n\
912+ N-M from N'th to M'th (included) byte, character or field\n\
913+ -M from first to M'th (included) byte, character or field\n\
914+\n\
915+With no FILE, or when FILE is -, read standard input.\n\
916+"), stdout);
917+ emit_ancillary_info ();
918+ }
919+ exit (status);
920+}
921+
922+static inline void
923+mark_range_start (size_t i)
924+{
925+ /* Record the fact that `i' is a range-start index. */
926+ void *ent_from_table = hash_insert (range_start_ht, (void*) i);
927+ if (ent_from_table == NULL)
928+ {
929+ /* Insertion failed due to lack of memory. */
930+ xalloc_die ();
931+ }
932+ assert ((size_t) ent_from_table == i);
933+}
934+
935+static inline void
936+mark_printable_field (size_t i)
937+{
938+ size_t n = i / CHAR_BIT;
939+ printable_field[n] |= (1 << (i % CHAR_BIT));
940+}
941+
942+static inline bool
943+is_printable_field (size_t i)
944+{
945+ size_t n = i / CHAR_BIT;
946+ return (printable_field[n] >> (i % CHAR_BIT)) & 1;
947+}
948+
949+static size_t
950+hash_int (const void *x, size_t tablesize)
951+{
952+#ifdef UINTPTR_MAX
953+ uintptr_t y = (uintptr_t) x;
954+#else
955+ size_t y = (size_t) x;
956+#endif
957+ return y % tablesize;
958+}
959+
960+static bool
961+hash_compare_ints (void const *x, void const *y)
962+{
963+ return (x == y) ? true : false;
964+}
965+
966+static bool
967+is_range_start_index (size_t i)
968+{
969+ return hash_lookup (range_start_ht, (void *) i) ? true : false;
970+}
971+
972+/* Return nonzero if the K'th field or byte is printable.
973+ When returning nonzero, if RANGE_START is non-NULL,
974+ set *RANGE_START to true if K is the beginning of a range, and to
975+ false otherwise. */
976+
977+static bool
978+print_kth (size_t k, bool *range_start)
979+{
980+ bool k_selected
981+ = ((0 < eol_range_start && eol_range_start <= k)
982+ || (k <= max_range_endpoint && is_printable_field (k)));
983+
984+ bool is_selected = k_selected ^ complement;
985+ if (range_start && is_selected)
986+ *range_start = is_range_start_index (k);
987+
988+ return is_selected;
989+}
990+
991+/* Comparison function for qsort to order the list of
992+ struct range_pairs. */
993+static int
994+compare_ranges (const void *a, const void *b)
995+{
996+ int a_start = ((const struct range_pair *) a)->lo;
997+ int b_start = ((const struct range_pair *) b)->lo;
998+ return a_start < b_start ? -1 : a_start > b_start;
999+}
1000+
1001+/* Given the list of field or byte range specifications FIELDSTR, set
1002+ MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
1003+ array. If there is a right-open-ended range, set EOL_RANGE_START
1004+ to its starting index. FIELDSTR should be composed of one or more
1005+ numbers or ranges of numbers, separated by blanks or commas.
1006+ Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
1007+ through end of line. Return true if FIELDSTR contains at least
1008+ one field specification, false otherwise. */
1009+
1010+/* FIXME-someday: What if the user wants to cut out the 1,000,000-th
1011+ field of some huge input file? This function shouldn't have to
1012+ allocate a table of a million bits just so we can test every
1013+ field < 10^6 with an array dereference. Instead, consider using
1014+ an adaptive approach: if the range of selected fields is too large,
1015+ but only a few fields/byte-offsets are actually selected, use a
1016+ hash table. If the range of selected fields is too large, and
1017+ too many are selected, then resort to using the range-pairs (the
1018+ `rp' array) directly. */
1019+
1020+static bool
1021+set_fields (const char *fieldstr)
1022+{
1023+ size_t initial = 1; /* Value of first number in a range. */
1024+ size_t value = 0; /* If nonzero, a number being accumulated. */
1025+ bool lhs_specified = false;
1026+ bool rhs_specified = false;
1027+ bool dash_found = false; /* True if a '-' is found in this field. */
1028+ bool field_found = false; /* True if at least one field spec
1029+ has been processed. */
1030+
1031+ struct range_pair *rp = NULL;
1032+ size_t n_rp = 0;
1033+ size_t n_rp_allocated = 0;
1034+ size_t i;
1035+ bool in_digits = false;
1036+
1037+ /* Collect and store in RP the range end points.
1038+ It also sets EOL_RANGE_START if appropriate. */
1039+
1040+ for (;;)
1041+ {
1042+ if (*fieldstr == '-')
1043+ {
1044+ in_digits = false;
1045+ /* Starting a range. */
1046+ if (dash_found)
1047+ FATAL_ERROR (_("invalid byte or field list"));
1048+ dash_found = true;
1049+ fieldstr++;
1050+
1051+ initial = (lhs_specified ? value : 1);
1052+ value = 0;
1053+ }
1054+ else if (*fieldstr == ',' ||
1055+ isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
1056+ {
1057+ in_digits = false;
1058+ /* Ending the string, or this field/byte sublist. */
1059+ if (dash_found)
1060+ {
1061+ dash_found = false;
1062+
1063+ if (!lhs_specified && !rhs_specified)
1064+ FATAL_ERROR (_("invalid range with no endpoint: -"));
1065+
1066+ /* A range. Possibilities: -n, m-n, n-.
1067+ In any case, `initial' contains the start of the range. */
1068+ if (!rhs_specified)
1069+ {
1070+ /* `n-'. From `initial' to end of line. */
1071+ eol_range_start = initial;
1072+ field_found = true;
1073+ }
1074+ else
1075+ {
1076+ /* `m-n' or `-n' (1-n). */
1077+ if (value < initial)
1078+ FATAL_ERROR (_("invalid decreasing range"));
1079+
1080+ /* Is there already a range going to end of line? */
1081+ if (eol_range_start != 0)
1082+ {
1083+ /* Yes. Is the new sequence already contained
1084+ in the old one? If so, no processing is
1085+ necessary. */
1086+ if (initial < eol_range_start)
1087+ {
1088+ /* No, the new sequence starts before the
1089+ old. Does the old range going to end of line
1090+ extend into the new range? */
1091+ if (eol_range_start <= value)
1092+ {
1093+ /* Yes. Simply move the end of line marker. */
1094+ eol_range_start = initial;
1095+ }
1096+ else
1097+ {
1098+ /* No. A simple range, before and disjoint from
1099+ the range going to end of line. Fill it. */
1100+ ADD_RANGE_PAIR (rp, initial, value);
1101+ }
1102+
1103+ /* In any case, some fields were selected. */
1104+ field_found = true;
1105+ }
1106+ }
1107+ else
1108+ {
1109+ /* There is no range going to end of line. */
1110+ ADD_RANGE_PAIR (rp, initial, value);
1111+ field_found = true;
1112+ }
1113+ value = 0;
1114+ }
1115+ }
1116+ else
1117+ {
1118+ /* A simple field number, not a range. */
1119+ ADD_RANGE_PAIR (rp, value, value);
1120+ value = 0;
1121+ field_found = true;
1122+ }
1123+
1124+ if (*fieldstr == '\0')
1125+ {
1126+ break;
1127+ }
1128+
1129+ fieldstr++;
1130+ lhs_specified = false;
1131+ rhs_specified = false;
1132+ }
1133+ else if (ISDIGIT (*fieldstr))
1134+ {
1135+ /* Record beginning of digit string, in case we have to
1136+ complain about it. */
1137+ static char const *num_start;
1138+ if (!in_digits || !num_start)
1139+ num_start = fieldstr;
1140+ in_digits = true;
1141+
1142+ if (dash_found)
1143+ rhs_specified = 1;
1144+ else
1145+ lhs_specified = 1;
1146+
1147+ /* Detect overflow. */
1148+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
1149+ {
1150+ /* In case the user specified -c$(echo 2^64|bc),22,
1151+ complain only about the first number. */
1152+ /* Determine the length of the offending number. */
1153+ size_t len = strspn (num_start, "0123456789");
1154+ char *bad_num = xstrndup (num_start, len);
1155+ if (operating_mode == byte_mode)
1156+ error (0, 0,
1157+ _("byte offset %s is too large"), quote (bad_num));
1158+ else
1159+ error (0, 0,
1160+ _("field number %s is too large"), quote (bad_num));
1161+ free (bad_num);
1162+ exit (EXIT_FAILURE);
1163+ }
1164+
1165+ fieldstr++;
1166+ }
1167+ else
1168+ FATAL_ERROR (_("invalid byte or field list"));
1169+ }
1170+
1171+ max_range_endpoint = 0;
1172+ for (i = 0; i < n_rp; i++)
1173+ {
1174+ if (rp[i].hi > max_range_endpoint)
1175+ max_range_endpoint = rp[i].hi;
1176+ }
1177+
1178+ /* Allocate an array large enough so that it may be indexed by
1179+ the field numbers corresponding to all finite ranges
1180+ (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
1181+
1182+ printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
1183+
1184+ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
1185+
1186+ /* Set the array entries corresponding to integers in the ranges of RP. */
1187+ for (i = 0; i < n_rp; i++)
1188+ {
1189+ size_t j;
1190+ size_t rsi_candidate;
1191+
1192+ /* Record the range-start indices, i.e., record each start
1193+ index that is not part of any other (lo..hi] range. */
1194+ rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
1195+ if (output_delimiter_specified
1196+ && !is_printable_field (rsi_candidate))
1197+ mark_range_start (rsi_candidate);
1198+
1199+ for (j = rp[i].lo; j <= rp[i].hi; j++)
1200+ mark_printable_field (j);
1201+ }
1202+
1203+ if (output_delimiter_specified
1204+ && !complement
1205+ && eol_range_start && !is_printable_field (eol_range_start))
1206+ mark_range_start (eol_range_start);
1207+
1208+ free (rp);
1209+
1210+ return field_found;
1211+}
1212+
1213+/* Read from stream STREAM, printing to standard output any selected bytes. */
1214+
1215+static void
1216+cut_bytes (FILE *stream)
1217+{
1218+ size_t byte_idx; /* Number of bytes in the line so far. */
1219+ /* Whether to begin printing delimiters between ranges for the current line.
1220+ Set after we've begun printing data corresponding to the first range. */
1221+ bool print_delimiter;
1222+
1223+ byte_idx = 0;
1224+ print_delimiter = false;
1225+ while (1)
1226+ {
1227+ int c; /* Each character from the file. */
1228+
1229+ c = getc (stream);
1230+
1231+ if (c == '\n')
1232+ {
1233+ putchar ('\n');
1234+ byte_idx = 0;
1235+ print_delimiter = false;
1236+ }
1237+ else if (c == EOF)
1238+ {
1239+ if (byte_idx > 0)
1240+ putchar ('\n');
1241+ break;
1242+ }
1243+ else
1244+ {
1245+ bool range_start;
1246+ bool *rs = output_delimiter_specified ? &range_start : NULL;
1247+ if (print_kth (++byte_idx, rs))
1248+ {
1249+ if (rs && *rs && print_delimiter)
1250+ {
1251+ fwrite (output_delimiter_string, sizeof (char),
1252+ output_delimiter_length, stdout);
1253+ }
1254+ print_delimiter = true;
1255+ putchar (c);
1256+ }
1257+ }
1258+ }
1259+}
1260+
1261+/* Read from stream STREAM, printing to standard output any selected fields. */
1262+
1263+static void
1264+cut_fields (FILE *stream)
1265+{
1266+ int c;
1267+ size_t field_idx = 1;
1268+ bool found_any_selected_field = false;
1269+ bool buffer_first_field;
1270+
1271+ c = getc (stream);
1272+ if (c == EOF)
1273+ return;
1274+
1275+ ungetc (c, stream);
1276+
1277+ /* To support the semantics of the -s flag, we may have to buffer
1278+ all of the first field to determine whether it is `delimited.'
1279+ But that is unnecessary if all non-delimited lines must be printed
1280+ and the first field has been selected, or if non-delimited lines
1281+ must be suppressed and the first field has *not* been selected.
1282+ That is because a non-delimited line has exactly one field. */
1283+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
1284+
1285+ while (1)
1286+ {
1287+ if (field_idx == 1 && buffer_first_field)
1288+ {
1289+ ssize_t len;
1290+ size_t n_bytes;
1291+
1292+ len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
1293+ GETNLINE_NO_LIMIT, delim, '\n', stream);
1294+ if (len < 0)
1295+ {
1296+ free (field_1_buffer);
1297+ field_1_buffer = NULL;
1298+ if (ferror (stream) || feof (stream))
1299+ break;
1300+ xalloc_die ();
1301+ }
1302+
1303+ n_bytes = len;
1304+ assert (n_bytes != 0);
1305+
1306+ /* If the first field extends to the end of line (it is not
1307+ delimited) and we are printing all non-delimited lines,
1308+ print this one. */
1309+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
1310+ {
1311+ if (suppress_non_delimited)
1312+ {
1313+ /* Empty. */
1314+ }
1315+ else
1316+ {
1317+ fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
1318+ /* Make sure the output line is newline terminated. */
1319+ if (field_1_buffer[n_bytes - 1] != '\n')
1320+ putchar ('\n');
1321+ }
1322+ continue;
1323+ }
1324+ if (print_kth (1, NULL))
1325+ {
1326+ /* Print the field, but not the trailing delimiter. */
1327+ fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
1328+ found_any_selected_field = true;
1329+ }
1330+ ++field_idx;
1331+ }
1332+
1333+ if (c != EOF)
1334+ {
1335+ if (print_kth (field_idx, NULL))
1336+ {
1337+ if (found_any_selected_field)
1338+ {
1339+ fwrite (output_delimiter_string, sizeof (char),
1340+ output_delimiter_length, stdout);
1341+ }
1342+ found_any_selected_field = true;
1343+
1344+ while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
1345+ {
1346+ putchar (c);
1347+ }
1348+ }
1349+ else
1350+ {
1351+ while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
1352+ {
1353+ /* Empty. */
1354+ }
1355+ }
1356+ }
1357+
1358+ if (c == '\n')
1359+ {
1360+ c = getc (stream);
1361+ if (c != EOF)
1362+ {
1363+ ungetc (c, stream);
1364+ c = '\n';
1365+ }
1366+ }
1367+
1368+ if (c == delim)
1369+ ++field_idx;
1370+ else if (c == '\n' || c == EOF)
1371+ {
1372+ if (found_any_selected_field
1373+ || !(suppress_non_delimited && field_idx == 1))
1374+ putchar ('\n');
1375+ if (c == EOF)
1376+ break;
1377+ field_idx = 1;
1378+ found_any_selected_field = false;
1379+ }
1380+ }
1381+}
1382+
1383+static void
1384+cut_stream (FILE *stream)
1385+{
1386+ if (operating_mode == byte_mode)
1387+ cut_bytes (stream);
1388+ else
1389+ cut_fields (stream);
1390+}
1391+
1392+/* Process file FILE to standard output.
1393+ Return true if successful. */
1394+
1395+static bool
1396+cut_file (char const *file)
1397+{
1398+ FILE *stream;
1399+
1400+ if (STREQ (file, "-"))
1401+ {
1402+ have_read_stdin = true;
1403+ stream = stdin;
1404+ }
1405+ else
1406+ {
1407+ stream = fopen (file, "r");
1408+ if (stream == NULL)
1409+ {
1410+ error (0, errno, "%s", file);
1411+ return false;
1412+ }
1413+ }
1414+
1415+ cut_stream (stream);
1416+
1417+ if (ferror (stream))
1418+ {
1419+ error (0, errno, "%s", file);
1420+ return false;
1421+ }
1422+ if (STREQ (file, "-"))
1423+ clearerr (stream); /* Also clear EOF. */
1424+ else if (fclose (stream) == EOF)
1425+ {
1426+ error (0, errno, "%s", file);
1427+ return false;
1428+ }
1429+ return true;
1430+}
1431+
1432+int
1433+main (int argc, char **argv)
1434+{
1435+ int optc;
1436+ bool ok;
1437+ bool delim_specified = false;
1438+ char *spec_list_string IF_LINT(= NULL);
1439+
1440+ initialize_main (&argc, &argv);
1441+ set_program_name (argv[0]);
1442+ setlocale (LC_ALL, "");
1443+ bindtextdomain (PACKAGE, LOCALEDIR);
1444+ textdomain (PACKAGE);
1445+
1446+ atexit (close_stdout);
1447+
1448+ operating_mode = undefined_mode;
1449+
1450+ /* By default, all non-delimited lines are printed. */
1451+ suppress_non_delimited = false;
1452+
1453+ delim = '\0';
1454+ have_read_stdin = false;
1455+
1456+ while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
1457+ {
1458+ switch (optc)
1459+ {
1460+ case 'b':
1461+ case 'c':
1462+ /* Build the byte list. */
1463+ if (operating_mode != undefined_mode)
1464+ FATAL_ERROR (_("only one type of list may be specified"));
1465+ operating_mode = byte_mode;
1466+ spec_list_string = optarg;
1467+ break;
1468+
1469+ case 'f':
1470+ /* Build the field list. */
1471+ if (operating_mode != undefined_mode)
1472+ FATAL_ERROR (_("only one type of list may be specified"));
1473+ operating_mode = field_mode;
1474+ spec_list_string = optarg;
1475+ break;
1476+
1477+ case 'd':
1478+ /* New delimiter. */
1479+ /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
1480+ if (optarg[0] != '\0' && optarg[1] != '\0')
1481+ FATAL_ERROR (_("the delimiter must be a single character"));
1482+ delim = optarg[0];
1483+ delim_specified = true;
1484+ break;
1485+
1486+ case OUTPUT_DELIMITER_OPTION:
1487+ output_delimiter_specified = true;
1488+ /* Interpret --output-delimiter='' to mean
1489+ `use the NUL byte as the delimiter.' */
1490+ output_delimiter_length = (optarg[0] == '\0'
1491+ ? 1 : strlen (optarg));
1492+ output_delimiter_string = xstrdup (optarg);
1493+ break;
1494+
1495+ case 'n':
1496+ break;
1497+
1498+ case 's':
1499+ suppress_non_delimited = true;
1500+ break;
1501+
1502+ case COMPLEMENT_OPTION:
1503+ complement = true;
1504+ break;
1505+
1506+ case_GETOPT_HELP_CHAR;
1507+
1508+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1509+
1510+ default:
1511+ usage (EXIT_FAILURE);
1512+ }
1513+ }
1514+
1515+ if (operating_mode == undefined_mode)
1516+ FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
1517+
1518+ if (delim != '\0' && operating_mode != field_mode)
1519+ FATAL_ERROR (_("an input delimiter may be specified only\
1520+ when operating on fields"));
1521+
1522+ if (suppress_non_delimited && operating_mode != field_mode)
1523+ FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
1524+\tonly when operating on fields"));
1525+
1526+ if (output_delimiter_specified)
1527+ {
1528+ range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
1529+ NULL, hash_int,
1530+ hash_compare_ints, NULL);
1531+ if (range_start_ht == NULL)
1532+ xalloc_die ();
1533+
1534+ }
1535+
1536+ if (! set_fields (spec_list_string))
1537+ {
1538+ if (operating_mode == field_mode)
1539+ FATAL_ERROR (_("missing list of fields"));
1540+ else
1541+ FATAL_ERROR (_("missing list of positions"));
1542+ }
1543+
1544+ if (!delim_specified)
1545+ delim = '\t';
1546+
1547+ if (output_delimiter_string == NULL)
1548+ {
1549+ static char dummy[2];
1550+ dummy[0] = delim;
1551+ dummy[1] = '\0';
1552+ output_delimiter_string = dummy;
1553+ output_delimiter_length = 1;
1554+ }
1555+
1556+ if (optind == argc)
1557+ ok = cut_file ("-");
1558+ else
1559+ for (ok = true; optind < argc; optind++)
1560+ ok &= cut_file (argv[optind]);
1561+
1562+ if (range_start_ht)
1563+ hash_free (range_start_ht);
1564+
1565+ if (have_read_stdin && fclose (stdin) == EOF)
1566+ {
1567+ error (0, errno, "-");
1568+ ok = false;
1569+ }
1570+
1571+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
1572+}
1573diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
159--- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000 1574--- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
160+++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000 1575+++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
161@@ -38,11 +38,28 @@ 1576@@ -37,11 +37,28 @@
162 #include <stdio.h> 1577 #include <stdio.h>
163 #include <getopt.h> 1578 #include <getopt.h>
164 #include <sys/types.h> 1579 #include <sys/types.h>
165+ 1580+
166+/* Get mbstate_t, mbrtowc(), wcwidth(). */ 1581+/* Get mbstate_t, mbrtowc(), wcwidth(). */
185+#endif 1600+#endif
186+ 1601+
187 /* The official name of this program (e.g., no `g' prefix). */ 1602 /* The official name of this program (e.g., no `g' prefix). */
188 #define PROGRAM_NAME "expand" 1603 #define PROGRAM_NAME "expand"
189 1604
190@@ -365,6 +383,142 @@ 1605@@ -357,6 +374,142 @@ expand (void)
191 } 1606 }
192 } 1607 }
193 1608
194+#if HAVE_MBRTOWC 1609+#if HAVE_MBRTOWC
195+static void 1610+static void
328+#endif 1743+#endif
329+ 1744+
330 int 1745 int
331 main (int argc, char **argv) 1746 main (int argc, char **argv)
332 { 1747 {
333@@ -429,7 +583,12 @@ 1748@@ -421,7 +574,12 @@ main (int argc, char **argv)
334 1749
335 file_list = (optind < argc ? &argv[optind] : stdin_argv); 1750 file_list = (optind < argc ? &argv[optind] : stdin_argv);
336 1751
337- expand (); 1752- expand ();
338+#if HAVE_MBRTOWC 1753+#if HAVE_MBRTOWC
342+#endif 1757+#endif
343+ expand (); 1758+ expand ();
344 1759
345 if (have_read_stdin && fclose (stdin) != 0) 1760 if (have_read_stdin && fclose (stdin) != 0)
346 error (EXIT_FAILURE, errno, "-"); 1761 error (EXIT_FAILURE, errno, "-");
347--- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000 1762diff -urNp coreutils-8.0-orig/src/expand.c.orig coreutils-8.0/src/expand.c.orig
1763--- coreutils-8.0-orig/src/expand.c.orig 1970-01-01 01:00:00.000000000 +0100
1764+++ coreutils-8.0/src/expand.c.orig 2009-09-29 15:27:54.000000000 +0200
1765@@ -0,0 +1,430 @@
1766+/* expand - convert tabs to spaces
1767+ Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
1768+
1769+ This program is free software: you can redistribute it and/or modify
1770+ it under the terms of the GNU General Public License as published by
1771+ the Free Software Foundation, either version 3 of the License, or
1772+ (at your option) any later version.
1773+
1774+ This program is distributed in the hope that it will be useful,
1775+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1776+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1777+ GNU General Public License for more details.
1778+
1779+ You should have received a copy of the GNU General Public License
1780+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
1781+
1782+/* By default, convert all tabs to spaces.
1783+ Preserves backspace characters in the output; they decrement the
1784+ column count for tab calculations.
1785+ The default action is equivalent to -8.
1786+
1787+ Options:
1788+ --tabs=tab1[,tab2[,...]]
1789+ -t tab1[,tab2[,...]]
1790+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
1791+ columns apart instead of the default 8. Otherwise,
1792+ set the tabs at columns tab1, tab2, etc. (numbered from
1793+ 0); replace any tabs beyond the tab stops given with
1794+ single spaces.
1795+ --initial
1796+ -i Only convert initial tabs on each line to spaces.
1797+
1798+ David MacKenzie <djm@gnu.ai.mit.edu> */
1799+
1800+#include <config.h>
1801+
1802+#include <stdio.h>
1803+#include <getopt.h>
1804+#include <sys/types.h>
1805+#include "system.h"
1806+#include "error.h"
1807+#include "quote.h"
1808+#include "xstrndup.h"
1809+
1810+/* The official name of this program (e.g., no `g' prefix). */
1811+#define PROGRAM_NAME "expand"
1812+
1813+#define AUTHORS proper_name ("David MacKenzie")
1814+
1815+/* If true, convert blanks even after nonblank characters have been
1816+ read on the line. */
1817+static bool convert_entire_line;
1818+
1819+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
1820+static uintmax_t tab_size;
1821+
1822+/* Array of the explicit column numbers of the tab stops;
1823+ after `tab_list' is exhausted, each additional tab is replaced
1824+ by a space. The first column is column 0. */
1825+static uintmax_t *tab_list;
1826+
1827+/* The number of allocated entries in `tab_list'. */
1828+static size_t n_tabs_allocated;
1829+
1830+/* The index of the first invalid element of `tab_list',
1831+ where the next element can be added. */
1832+static size_t first_free_tab;
1833+
1834+/* Null-terminated array of input filenames. */
1835+static char **file_list;
1836+
1837+/* Default for `file_list' if no files are given on the command line. */
1838+static char *stdin_argv[] =
1839+{
1840+ (char *) "-", NULL
1841+};
1842+
1843+/* True if we have ever read standard input. */
1844+static bool have_read_stdin;
1845+
1846+/* The desired exit status. */
1847+static int exit_status;
1848+
1849+static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
1850+
1851+static struct option const longopts[] =
1852+{
1853+ {"tabs", required_argument, NULL, 't'},
1854+ {"initial", no_argument, NULL, 'i'},
1855+ {GETOPT_HELP_OPTION_DECL},
1856+ {GETOPT_VERSION_OPTION_DECL},
1857+ {NULL, 0, NULL, 0}
1858+};
1859+
1860+void
1861+usage (int status)
1862+{
1863+ if (status != EXIT_SUCCESS)
1864+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
1865+ program_name);
1866+ else
1867+ {
1868+ printf (_("\
1869+Usage: %s [OPTION]... [FILE]...\n\
1870+"),
1871+ program_name);
1872+ fputs (_("\
1873+Convert tabs in each FILE to spaces, writing to standard output.\n\
1874+With no FILE, or when FILE is -, read standard input.\n\
1875+\n\
1876+"), stdout);
1877+ fputs (_("\
1878+Mandatory arguments to long options are mandatory for short options too.\n\
1879+"), stdout);
1880+ fputs (_("\
1881+ -i, --initial do not convert tabs after non blanks\n\
1882+ -t, --tabs=NUMBER have tabs NUMBER characters apart, not 8\n\
1883+"), stdout);
1884+ fputs (_("\
1885+ -t, --tabs=LIST use comma separated list of explicit tab positions\n\
1886+"), stdout);
1887+ fputs (HELP_OPTION_DESCRIPTION, stdout);
1888+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
1889+ emit_ancillary_info ();
1890+ }
1891+ exit (status);
1892+}
1893+
1894+/* Add tab stop TABVAL to the end of `tab_list'. */
1895+
1896+static void
1897+add_tab_stop (uintmax_t tabval)
1898+{
1899+ if (first_free_tab == n_tabs_allocated)
1900+ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
1901+ tab_list[first_free_tab++] = tabval;
1902+}
1903+
1904+/* Add the comma or blank separated list of tab stops STOPS
1905+ to the list of tab stops. */
1906+
1907+static void
1908+parse_tab_stops (char const *stops)
1909+{
1910+ bool have_tabval = false;
1911+ uintmax_t tabval IF_LINT (= 0);
1912+ char const *num_start IF_LINT (= NULL);
1913+ bool ok = true;
1914+
1915+ for (; *stops; stops++)
1916+ {
1917+ if (*stops == ',' || isblank (to_uchar (*stops)))
1918+ {
1919+ if (have_tabval)
1920+ add_tab_stop (tabval);
1921+ have_tabval = false;
1922+ }
1923+ else if (ISDIGIT (*stops))
1924+ {
1925+ if (!have_tabval)
1926+ {
1927+ tabval = 0;
1928+ have_tabval = true;
1929+ num_start = stops;
1930+ }
1931+
1932+ /* Detect overflow. */
1933+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
1934+ {
1935+ size_t len = strspn (num_start, "0123456789");
1936+ char *bad_num = xstrndup (num_start, len);
1937+ error (0, 0, _("tab stop is too large %s"), quote (bad_num));
1938+ free (bad_num);
1939+ ok = false;
1940+ stops = num_start + len - 1;
1941+ }
1942+ }
1943+ else
1944+ {
1945+ error (0, 0, _("tab size contains invalid character(s): %s"),
1946+ quote (stops));
1947+ ok = false;
1948+ break;
1949+ }
1950+ }
1951+
1952+ if (!ok)
1953+ exit (EXIT_FAILURE);
1954+
1955+ if (have_tabval)
1956+ add_tab_stop (tabval);
1957+}
1958+
1959+/* Check that the list of tab stops TABS, with ENTRIES entries,
1960+ contains only nonzero, ascending values. */
1961+
1962+static void
1963+validate_tab_stops (uintmax_t const *tabs, size_t entries)
1964+{
1965+ uintmax_t prev_tab = 0;
1966+ size_t i;
1967+
1968+ for (i = 0; i < entries; i++)
1969+ {
1970+ if (tabs[i] == 0)
1971+ error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
1972+ if (tabs[i] <= prev_tab)
1973+ error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
1974+ prev_tab = tabs[i];
1975+ }
1976+}
1977+
1978+/* Close the old stream pointer FP if it is non-NULL,
1979+ and return a new one opened to read the next input file.
1980+ Open a filename of `-' as the standard input.
1981+ Return NULL if there are no more input files. */
1982+
1983+static FILE *
1984+next_file (FILE *fp)
1985+{
1986+ static char *prev_file;
1987+ char *file;
1988+
1989+ if (fp)
1990+ {
1991+ if (ferror (fp))
1992+ {
1993+ error (0, errno, "%s", prev_file);
1994+ exit_status = EXIT_FAILURE;
1995+ }
1996+ if (STREQ (prev_file, "-"))
1997+ clearerr (fp); /* Also clear EOF. */
1998+ else if (fclose (fp) != 0)
1999+ {
2000+ error (0, errno, "%s", prev_file);
2001+ exit_status = EXIT_FAILURE;
2002+ }
2003+ }
2004+
2005+ while ((file = *file_list++) != NULL)
2006+ {
2007+ if (STREQ (file, "-"))
2008+ {
2009+ have_read_stdin = true;
2010+ prev_file = file;
2011+ return stdin;
2012+ }
2013+ fp = fopen (file, "r");
2014+ if (fp)
2015+ {
2016+ prev_file = file;
2017+ return fp;
2018+ }
2019+ error (0, errno, "%s", file);
2020+ exit_status = EXIT_FAILURE;
2021+ }
2022+ return NULL;
2023+}
2024+
2025+/* Change tabs to spaces, writing to stdout.
2026+ Read each file in `file_list', in order. */
2027+
2028+static void
2029+expand (void)
2030+{
2031+ /* Input stream. */
2032+ FILE *fp = next_file (NULL);
2033+
2034+ if (!fp)
2035+ return;
2036+
2037+ for (;;)
2038+ {
2039+ /* Input character, or EOF. */
2040+ int c;
2041+
2042+ /* If true, perform translations. */
2043+ bool convert = true;
2044+
2045+
2046+ /* The following variables have valid values only when CONVERT
2047+ is true: */
2048+
2049+ /* Column of next input character. */
2050+ uintmax_t column = 0;
2051+
2052+ /* Index in TAB_LIST of next tab stop to examine. */
2053+ size_t tab_index = 0;
2054+
2055+
2056+ /* Convert a line of text. */
2057+
2058+ do
2059+ {
2060+ while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
2061+ continue;
2062+
2063+ if (convert)
2064+ {
2065+ if (c == '\t')
2066+ {
2067+ /* Column the next input tab stop is on. */
2068+ uintmax_t next_tab_column;
2069+
2070+ if (tab_size)
2071+ next_tab_column = column + (tab_size - column % tab_size);
2072+ else
2073+ for (;;)
2074+ if (tab_index == first_free_tab)
2075+ {
2076+ next_tab_column = column + 1;
2077+ break;
2078+ }
2079+ else
2080+ {
2081+ uintmax_t tab = tab_list[tab_index++];
2082+ if (column < tab)
2083+ {
2084+ next_tab_column = tab;
2085+ break;
2086+ }
2087+ }
2088+
2089+ if (next_tab_column < column)
2090+ error (EXIT_FAILURE, 0, _("input line is too long"));
2091+
2092+ while (++column < next_tab_column)
2093+ if (putchar (' ') < 0)
2094+ error (EXIT_FAILURE, errno, _("write error"));
2095+
2096+ c = ' ';
2097+ }
2098+ else if (c == '\b')
2099+ {
2100+ /* Go back one column, and force recalculation of the
2101+ next tab stop. */
2102+ column -= !!column;
2103+ tab_index -= !!tab_index;
2104+ }
2105+ else
2106+ {
2107+ column++;
2108+ if (!column)
2109+ error (EXIT_FAILURE, 0, _("input line is too long"));
2110+ }
2111+
2112+ convert &= convert_entire_line || !! isblank (c);
2113+ }
2114+
2115+ if (c < 0)
2116+ return;
2117+
2118+ if (putchar (c) < 0)
2119+ error (EXIT_FAILURE, errno, _("write error"));
2120+ }
2121+ while (c != '\n');
2122+ }
2123+}
2124+
2125+int
2126+main (int argc, char **argv)
2127+{
2128+ int c;
2129+
2130+ initialize_main (&argc, &argv);
2131+ set_program_name (argv[0]);
2132+ setlocale (LC_ALL, "");
2133+ bindtextdomain (PACKAGE, LOCALEDIR);
2134+ textdomain (PACKAGE);
2135+
2136+ atexit (close_stdout);
2137+
2138+ have_read_stdin = false;
2139+ exit_status = EXIT_SUCCESS;
2140+ convert_entire_line = true;
2141+ tab_list = NULL;
2142+ first_free_tab = 0;
2143+
2144+ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
2145+ {
2146+ switch (c)
2147+ {