/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.41 - (show annotations) (download) (as text)
Wed Oct 7 08:11:44 2009 UTC (6 weeks, 4 days ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-8_0-1_fc13, coreutils-8_0-2_fc13
Changes since 1.40: +14643 -3261 lines
File MIME type: text/x-patch
defuzz patches
1 diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
2 --- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
3 +++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
4 @@ -21,6 +21,11 @@
5
6 # include <stdio.h>
7
8 +/* Get mbstate_t. */
9 +# if HAVE_WCHAR_H
10 +# include <wchar.h>
11 +# endif
12 +
13 /* A `struct linebuffer' holds a line of text. */
14
15 struct linebuffer
16 @@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
19 char *buffer;
20 +# if HAVE_WCHAR_H
21 + mbstate_t state;
22 +# endif
23 };
24
25 /* Initialize linebuffer LINEBUFFER for use. */
26 diff -urNp coreutils-8.0-orig/lib/linebuffer.h.orig coreutils-8.0/lib/linebuffer.h.orig
27 --- coreutils-8.0-orig/lib/linebuffer.h.orig 1970-01-01 01:00:00.000000000 +0100
28 +++ coreutils-8.0/lib/linebuffer.h.orig 2009-10-06 10:59:48.000000000 +0200
29 @@ -0,0 +1,53 @@
30 +/* linebuffer.h -- declarations for reading arbitrarily long lines
31 +
32 + Copyright (C) 1986, 1991, 1998, 1999, 2002, 2003, 2007 Free Software
33 + Foundation, Inc.
34 +
35 + This program is free software: you can redistribute it and/or modify
36 + it under the terms of the GNU General Public License as published by
37 + the Free Software Foundation; either version 3 of the License, or
38 + (at your option) any later version.
39 +
40 + This program is distributed in the hope that it will be useful,
41 + but WITHOUT ANY WARRANTY; without even the implied warranty of
42 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43 + GNU General Public License for more details.
44 +
45 + You should have received a copy of the GNU General Public License
46 + along with this program. If not, see <http://www.gnu.org/licenses/>. */
47 +
48 +#if !defined LINEBUFFER_H
49 +# define LINEBUFFER_H
50 +
51 +# include <stdio.h>
52 +
53 +/* A `struct linebuffer' holds a line of text. */
54 +
55 +struct linebuffer
56 +{
57 + size_t size; /* Allocated. */
58 + size_t length; /* Used. */
59 + char *buffer;
60 +};
61 +
62 +/* Initialize linebuffer LINEBUFFER for use. */
63 +void initbuffer (struct linebuffer *linebuffer);
64 +
65 +/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
66 + Consider lines to be terminated by DELIMITER.
67 + Keep the delimiter; append DELIMITER if we reach EOF and it wasn't
68 + the last character in the file. Do not NUL-terminate.
69 + Return LINEBUFFER, except at end of file return NULL. */
70 +struct linebuffer *readlinebuffer_delim (struct linebuffer *linebuffer,
71 + FILE *stream, char delimiter);
72 +
73 +/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
74 + Keep the newline; append a newline if it's the last line of a file
75 + that ends in a non-newline character. Do not NUL-terminate.
76 + Return LINEBUFFER, except at end of file return NULL. */
77 +struct linebuffer *readlinebuffer (struct linebuffer *linebuffer, FILE *stream);
78 +
79 +/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
80 +void freebuffer (struct linebuffer *);
81 +
82 +#endif /* LINEBUFFER_H */
83 diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
84 --- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
85 +++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
86 @@ -28,6 +28,11 @@
87 #include <assert.h>
88 #include <getopt.h>
89 #include <sys/types.h>
90 +
91 +/* Get mbstate_t, mbrtowc(). */
92 +#if HAVE_WCHAR_H
93 +# include <wchar.h>
94 +#endif
95 #include "system.h"
96
97 #include "error.h"
98 @@ -36,6 +41,18 @@
99 #include "quote.h"
100 #include "xstrndup.h"
101
102 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
103 + installation; work around this configuration error. */
104 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
105 +# undef MB_LEN_MAX
106 +# define MB_LEN_MAX 16
107 +#endif
108 +
109 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
110 +#if HAVE_MBRTOWC && defined mbstate_t
111 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
112 +#endif
113 +
114 /* The official name of this program (e.g., no `g' prefix). */
115 #define PROGRAM_NAME "cut"
116
117 @@ -71,6 +88,52 @@
118 } \
119 while (0)
120
121 +/* Refill the buffer BUF to get a multibyte character. */
122 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
123 + do \
124 + { \
125 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
126 + { \
127 + memmove (BUF, BUFPOS, BUFLEN); \
128 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
129 + BUFPOS = BUF; \
130 + } \
131 + } \
132 + while (0)
133 +
134 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
135 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
136 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
137 + do \
138 + { \
139 + mbstate_t state_bak; \
140 + \
141 + if (BUFLEN < 1) \
142 + { \
143 + WC = WEOF; \
144 + break; \
145 + } \
146 + \
147 + /* Get a wide character. */ \
148 + CONVFAIL = 0; \
149 + state_bak = STATE; \
150 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
151 + \
152 + switch (MBLENGTH) \
153 + { \
154 + case (size_t)-1: \
155 + case (size_t)-2: \
156 + CONVFAIL++; \
157 + STATE = state_bak; \
158 + /* Fall througn. */ \
159 + \
160 + case 0: \
161 + MBLENGTH = 1; \
162 + break; \
163 + } \
164 + } \
165 + while (0)
166 +
167 struct range_pair
168 {
169 size_t lo;
170 @@ -89,7 +152,7 @@ static char *field_1_buffer;
171 /* The number of bytes allocated for FIELD_1_BUFFER. */
172 static size_t field_1_bufsize;
173
174 -/* The largest field or byte index used as an endpoint of a closed
175 +/* The largest byte, character or field index used as an endpoint of a closed
176 or degenerate range specification; this doesn't include the starting
177 index of right-open-ended ranges. For example, with either range spec
178 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
179 @@ -101,10 +164,11 @@ static size_t eol_range_start;
180
181 /* This is a bit vector.
182 In byte mode, which bytes to output.
183 + In character mode, which characters to output.
184 In field mode, which DELIM-separated fields to output.
185 - Both bytes and fields are numbered starting with 1,
186 + Bytes, characters and fields are numbered starting with 1,
187 so the zeroth bit of this array is unused.
188 - A field or byte K has been selected if
189 + A byte, character or field K has been selected if
190 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
191 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
192 static unsigned char *printable_field;
193 @@ -113,15 +177,25 @@ enum operating_mode
194 {
195 undefined_mode,
196
197 - /* Output characters that are in the given bytes. */
198 + /* Output bytes that are at the given positions. */
199 byte_mode,
200
201 + /* Output characters that are at the given positions. */
202 + character_mode,
203 +
204 /* Output the given delimeter-separated fields. */
205 field_mode
206 };
207
208 static enum operating_mode operating_mode;
209
210 +/* If nonzero, when in byte mode, don't split multibyte characters. */
211 +static int byte_mode_character_aware;
212 +
213 +/* If nonzero, the function for single byte locale is work
214 + if this program runs on multibyte locale. */
215 +static int force_singlebyte_mode;
216 +
217 /* If true do not output lines containing no delimeter characters.
218 Otherwise, all such lines are printed. This option is valid only
219 with field mode. */
220 @@ -133,6 +207,9 @@ static bool complement;
221
222 /* The delimeter character for field mode. */
223 static unsigned char delim;
224 +#if HAVE_WCHAR_H
225 +static wchar_t wcdelim;
226 +#endif
227
228 /* True if the --output-delimiter=STRING option was specified. */
229 static bool output_delimiter_specified;
230 @@ -206,7 +283,7 @@ Mandatory arguments to long options are
231 -f, --fields=LIST select only these fields; also print any line\n\
232 that contains no delimiter character, unless\n\
233 the -s option is specified\n\
234 - -n (ignored)\n\
235 + -n with -b: don't split multibyte characters\n\
236 "), stdout);
237 fputs (_("\
238 --complement complement the set of selected bytes, characters\n\
239 @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
240 in_digits = false;
241 /* Starting a range. */
242 if (dash_found)
243 - FATAL_ERROR (_("invalid byte or field list"));
244 + FATAL_ERROR (_("invalid byte, character or field list"));
245 dash_found = true;
246 fieldstr++;
247
248 @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
249 if (!rhs_specified)
250 {
251 /* `n-'. From `initial' to end of line. */
252 - eol_range_start = initial;
253 + if (eol_range_start == 0 ||
254 + (eol_range_start != 0 && eol_range_start > initial))
255 + eol_range_start = initial;
256 field_found = true;
257 }
258 else
259 {
260 /* `m-n' or `-n' (1-n). */
261 if (value < initial)
262 - FATAL_ERROR (_("invalid decreasing range"));
263 + FATAL_ERROR (_("invalid byte, character or field list"));
264
265 /* Is there already a range going to end of line? */
266 if (eol_range_start != 0)
267 @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
268 if (operating_mode == byte_mode)
269 error (0, 0,
270 _("byte offset %s is too large"), quote (bad_num));
271 + else if (operating_mode == character_mode)
272 + error (0, 0,
273 + _("character offset %s is too large"), quote (bad_num));
274 else
275 error (0, 0,
276 _("field number %s is too large"), quote (bad_num));
277 @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
278 fieldstr++;
279 }
280 else
281 - FATAL_ERROR (_("invalid byte or field list"));
282 + FATAL_ERROR (_("invalid byte, character or field list"));
283 }
284
285 max_range_endpoint = 0;
286 @@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
287 }
288 }
289
290 +#if HAVE_MBRTOWC
291 +/* This function is in use for the following case.
292 +
293 + 1. Read from the stream STREAM, printing to standard output any selected
294 + characters.
295 +
296 + 2. Read from stream STREAM, printing to standard output any selected bytes,
297 + without splitting multibyte characters. */
298 +
299 +static void
300 +cut_characters_or_cut_bytes_no_split (FILE *stream)
301 +{
302 + int idx; /* number of bytes or characters in the line so far. */
303 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
304 + char *bufpos; /* Next read position of BUF. */
305 + size_t buflen; /* The length of the byte sequence in buf. */
306 + wint_t wc; /* A gotten wide character. */
307 + size_t mblength; /* The byte size of a multibyte character which shows
308 + as same character as WC. */
309 + mbstate_t state; /* State of the stream. */
310 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
311 +
312 + idx = 0;
313 + buflen = 0;
314 + bufpos = buf;
315 + memset (&state, '\0', sizeof(mbstate_t));
316 +
317 + while (1)
318 + {
319 + REFILL_BUFFER (buf, bufpos, buflen, stream);
320 +
321 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
322 +
323 + if (wc == WEOF)
324 + {
325 + if (idx > 0)
326 + putchar ('\n');
327 + break;
328 + }
329 + else if (wc == L'\n')
330 + {
331 + putchar ('\n');
332 + idx = 0;
333 + }
334 + else
335 + {
336 + idx += (operating_mode == byte_mode) ? mblength : 1;
337 + if (print_kth (idx, NULL))
338 + fwrite (bufpos, mblength, sizeof(char), stdout);
339 + }
340 +
341 + buflen -= mblength;
342 + bufpos += mblength;
343 + }
344 +}
345 +#endif
346 +
347 /* Read from stream STREAM, printing to standard output any selected fields. */
348
349 static void
350 @@ -701,13 +840,192 @@ cut_fields (FILE *stream)
351 }
352 }
353
354 +#if HAVE_MBRTOWC
355 +static void
356 +cut_fields_mb (FILE *stream)
357 +{
358 + int c;
359 + unsigned int field_idx;
360 + int found_any_selected_field;
361 + int buffer_first_field;
362 + int empty_input;
363 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
364 + char *bufpos; /* Next read position of BUF. */
365 + size_t buflen; /* The length of the byte sequence in buf. */
366 + wint_t wc = 0; /* A gotten wide character. */
367 + size_t mblength; /* The byte size of a multibyte character which shows
368 + as same character as WC. */
369 + mbstate_t state; /* State of the stream. */
370 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
371 +
372 + found_any_selected_field = 0;
373 + field_idx = 1;
374 + bufpos = buf;
375 + buflen = 0;
376 + memset (&state, '\0', sizeof(mbstate_t));
377 +
378 + c = getc (stream);
379 + empty_input = (c == EOF);
380 + if (c != EOF)
381 + ungetc (c, stream);
382 + else
383 + wc = WEOF;
384 +
385 + /* To support the semantics of the -s flag, we may have to buffer
386 + all of the first field to determine whether it is `delimited.'
387 + But that is unnecessary if all non-delimited lines must be printed
388 + and the first field has been selected, or if non-delimited lines
389 + must be suppressed and the first field has *not* been selected.
390 + That is because a non-delimited line has exactly one field. */
391 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
392 +
393 + while (1)
394 + {
395 + if (field_idx == 1 && buffer_first_field)
396 + {
397 + int len = 0;
398 +
399 + while (1)
400 + {
401 + REFILL_BUFFER (buf, bufpos, buflen, stream);
402 +
403 + GET_NEXT_WC_FROM_BUFFER
404 + (wc, bufpos, buflen, mblength, state, convfail);
405 +
406 + if (wc == WEOF)
407 + break;
408 +
409 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
410 + memcpy (field_1_buffer + len, bufpos, mblength);
411 + len += mblength;
412 + buflen -= mblength;
413 + bufpos += mblength;
414 +
415 + if (!convfail && (wc == L'\n' || wc == wcdelim))
416 + break;
417 + }
418 +
419 + if (wc == WEOF)
420 + break;
421 +
422 + /* If the first field extends to the end of line (it is not
423 + delimited) and we are printing all non-delimited lines,
424 + print this one. */
425 + if (convfail || (!convfail && wc != wcdelim))
426 + {
427 + if (suppress_non_delimited)
428 + {
429 + /* Empty. */
430 + }
431 + else
432 + {
433 + fwrite (field_1_buffer, sizeof (char), len, stdout);
434 + /* Make sure the output line is newline terminated. */
435 + if (convfail || (!convfail && wc != L'\n'))
436 + putchar ('\n');
437 + }
438 + continue;
439 + }
440 +
441 + if (print_kth (1, NULL))
442 + {
443 + /* Print the field, but not the trailing delimiter. */
444 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
445 + found_any_selected_field = 1;
446 + }
447 + ++field_idx;
448 + }
449 +
450 + if (wc != WEOF)
451 + {
452 + if (print_kth (field_idx, NULL))
453 + {
454 + if (found_any_selected_field)
455 + {
456 + fwrite (output_delimiter_string, sizeof (char),
457 + output_delimiter_length, stdout);
458 + }
459 + found_any_selected_field = 1;
460 + }
461 +
462 + while (1)
463 + {
464 + REFILL_BUFFER (buf, bufpos, buflen, stream);
465 +
466 + GET_NEXT_WC_FROM_BUFFER
467 + (wc, bufpos, buflen, mblength, state, convfail);
468 +
469 + if (wc == WEOF)
470 + break;
471 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
472 + {
473 + buflen -= mblength;
474 + bufpos += mblength;
475 + break;
476 + }
477 +
478 + if (print_kth (field_idx, NULL))
479 + fwrite (bufpos, mblength, sizeof(char), stdout);
480 +
481 + buflen -= mblength;
482 + bufpos += mblength;
483 + }
484 + }
485 +
486 + if ((!convfail || wc == L'\n') && buflen < 1)
487 + wc = WEOF;
488 +
489 + if (!convfail && wc == wcdelim)
490 + ++field_idx;
491 + else if (wc == WEOF || (!convfail && wc == L'\n'))
492 + {
493 + if (found_any_selected_field
494 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
495 + putchar ('\n');
496 + if (wc == WEOF)
497 + break;
498 + field_idx = 1;
499 + found_any_selected_field = 0;
500 + }
501 + }
502 +}
503 +#endif
504 +
505 static void
506 cut_stream (FILE *stream)
507 {
508 - if (operating_mode == byte_mode)
509 - cut_bytes (stream);
510 +#if HAVE_MBRTOWC
511 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
512 + {
513 + switch (operating_mode)
514 + {
515 + case byte_mode:
516 + if (byte_mode_character_aware)
517 + cut_characters_or_cut_bytes_no_split (stream);
518 + else
519 + cut_bytes (stream);
520 + break;
521 +
522 + case character_mode:
523 + cut_characters_or_cut_bytes_no_split (stream);
524 + break;
525 +
526 + case field_mode:
527 + cut_fields_mb (stream);
528 + break;
529 +
530 + default:
531 + abort ();
532 + }
533 + }
534 else
535 - cut_fields (stream);
536 +#endif
537 + {
538 + if (operating_mode == field_mode)
539 + cut_fields (stream);
540 + else
541 + cut_bytes (stream);
542 + }
543 }
544
545 /* Process file FILE to standard output.
546 @@ -757,6 +1075,8 @@ main (int argc, char **argv)
547 bool ok;
548 bool delim_specified = false;
549 char *spec_list_string IF_LINT(= NULL);
550 + char mbdelim[MB_LEN_MAX + 1];
551 + size_t delimlen = 0;
552
553 initialize_main (&argc, &argv);
554 set_program_name (argv[0]);
555 @@ -779,7 +1099,6 @@ main (int argc, char **argv)
556 switch (optc)
557 {
558 case 'b':
559 - case 'c':
560 /* Build the byte list. */
561 if (operating_mode != undefined_mode)
562 FATAL_ERROR (_("only one type of list may be specified"));
563 @@ -787,6 +1106,14 @@ main (int argc, char **argv)
564 spec_list_string = optarg;
565 break;
566
567 + case 'c':
568 + /* Build the character list. */
569 + if (operating_mode != undefined_mode)
570 + FATAL_ERROR (_("only one type of list may be specified"));
571 + operating_mode = character_mode;
572 + spec_list_string = optarg;
573 + break;
574 +
575 case 'f':
576 /* Build the field list. */
577 if (operating_mode != undefined_mode)
578 @@ -798,10 +1125,35 @@ main (int argc, char **argv)
579 case 'd':
580 /* New delimiter. */
581 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
582 - if (optarg[0] != '\0' && optarg[1] != '\0')
583 - FATAL_ERROR (_("the delimiter must be a single character"));
584 - delim = optarg[0];
585 - delim_specified = true;
586 + {
587 +#if HAVE_MBRTOWC
588 + if(MB_CUR_MAX > 1)
589 + {
590 + mbstate_t state;
591 +
592 + memset (&state, '\0', sizeof(mbstate_t));
593 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
594 +
595 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
596 + ++force_singlebyte_mode;
597 + else
598 + {
599 + delimlen = (delimlen < 1) ? 1 : delimlen;
600 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
601 + FATAL_ERROR (_("the delimiter must be a single character"));
602 + memcpy (mbdelim, optarg, delimlen);
603 + }
604 + }
605 +
606 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
607 +#endif
608 + {
609 + if (optarg[0] != '\0' && optarg[1] != '\0')
610 + FATAL_ERROR (_("the delimiter must be a single character"));
611 + delim = (unsigned char) optarg[0];
612 + }
613 + delim_specified = true;
614 + }
615 break;
616
617 case OUTPUT_DELIMITER_OPTION:
618 @@ -814,6 +1166,7 @@ main (int argc, char **argv)
619 break;
620
621 case 'n':
622 + byte_mode_character_aware = 1;
623 break;
624
625 case 's':
626 @@ -836,7 +1189,7 @@ main (int argc, char **argv)
627 if (operating_mode == undefined_mode)
628 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
629
630 - if (delim != '\0' && operating_mode != field_mode)
631 + if (delim_specified && operating_mode != field_mode)
632 FATAL_ERROR (_("an input delimiter may be specified only\
633 when operating on fields"));
634
635 @@ -863,15 +1216,34 @@ main (int argc, char **argv)
636 }
637
638 if (!delim_specified)
639 - delim = '\t';
640 + {
641 + delim = '\t';
642 +#ifdef HAVE_MBRTOWC
643 + wcdelim = L'\t';
644 + mbdelim[0] = '\t';
645 + mbdelim[1] = '\0';
646 + delimlen = 1;
647 +#endif
648 + }
649
650 if (output_delimiter_string == NULL)
651 {
652 - static char dummy[2];
653 - dummy[0] = delim;
654 - dummy[1] = '\0';
655 - output_delimiter_string = dummy;
656 - output_delimiter_length = 1;
657 +#ifdef HAVE_MBRTOWC
658 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
659 + {
660 + output_delimiter_string = xstrdup(mbdelim);
661 + output_delimiter_length = delimlen;
662 + }
663 +
664 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
665 +#endif
666 + {
667 + static char dummy[2];
668 + dummy[0] = delim;
669 + dummy[1] = '\0';
670 + output_delimiter_string = dummy;
671 + output_delimiter_length = 1;
672 + }
673 }
674
675 if (optind == argc)
676 diff -urNp coreutils-8.0-orig/src/cut.c.orig coreutils-8.0/src/cut.c.orig
677 --- coreutils-8.0-orig/src/cut.c.orig 1970-01-01 01:00:00.000000000 +0100
678 +++ coreutils-8.0/src/cut.c.orig 2009-09-23 10:25:44.000000000 +0200
679 @@ -0,0 +1,893 @@
680 +/* cut - remove parts of lines of files
681 + Copyright (C) 1997-2009 Free Software Foundation, Inc.
682 + Copyright (C) 1984 David M. Ihnat
683 +
684 + This program is free software: you can redistribute it and/or modify
685 + it under the terms of the GNU General Public License as published by
686 + the Free Software Foundation, either version 3 of the License, or
687 + (at your option) any later version.
688 +
689 + This program is distributed in the hope that it will be useful,
690 + but WITHOUT ANY WARRANTY; without even the implied warranty of
691 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
692 + GNU General Public License for more details.
693 +
694 + You should have received a copy of the GNU General Public License
695 + along with this program. If not, see <http://www.gnu.org/licenses/>. */
696 +
697 +/* Written by David Ihnat. */
698 +
699 +/* POSIX changes, bug fixes, long-named options, and cleanup
700 + by David MacKenzie <djm@gnu.ai.mit.edu>.
701 +
702 + Rewrite cut_fields and cut_bytes -- Jim Meyering. */
703 +
704 +#include <config.h>
705 +
706 +#include <stdio.h>
707 +#include <assert.h>
708 +#include <getopt.h>
709 +#include <sys/types.h>
710 +#include "system.h"
711 +
712 +#include "error.h"
713 +#include "getndelim2.h"
714 +#include "hash.h"
715 +#include "quote.h"
716 +#include "xstrndup.h"
717 +
718 +/* The official name of this program (e.g., no `g' prefix). */
719 +#define PROGRAM_NAME "cut"
720 +
721 +#define AUTHORS \
722 + proper_name ("David M. Ihnat"), \
723 + proper_name ("David MacKenzie"), \
724 + proper_name ("Jim Meyering")
725 +
726 +#define FATAL_ERROR(Message) \
727 + do \
728 + { \
729 + error (0, 0, (Message)); \
730 + usage (EXIT_FAILURE); \
731 + } \
732 + while (0)
733 +
734 +/* Append LOW, HIGH to the list RP of range pairs, allocating additional
735 + space if necessary. Update local variable N_RP. When allocating,
736 + update global variable N_RP_ALLOCATED. */
737 +
738 +#define ADD_RANGE_PAIR(rp, low, high) \
739 + do \
740 + { \
741 + if (low == 0 || high == 0) \
742 + FATAL_ERROR (_("fields and positions are numbered from 1")); \
743 + if (n_rp >= n_rp_allocated) \
744 + { \
745 + (rp) = X2NREALLOC (rp, &n_rp_allocated); \
746 + } \
747 + rp[n_rp].lo = (low); \
748 + rp[n_rp].hi = (high); \
749 + ++n_rp; \
750 + } \
751 + while (0)
752 +
753 +struct range_pair
754 + {
755 + size_t lo;
756 + size_t hi;
757 + };
758 +
759 +/* This buffer is used to support the semantics of the -s option
760 + (or lack of same) when the specified field list includes (does
761 + not include) the first field. In both of those cases, the entire
762 + first field must be read into this buffer to determine whether it
763 + is followed by a delimiter or a newline before any of it may be
764 + output. Otherwise, cut_fields can do the job without using this
765 + buffer. */
766 +static char *field_1_buffer;
767 +
768 +/* The number of bytes allocated for FIELD_1_BUFFER. */
769 +static size_t field_1_bufsize;
770 +
771 +/* The largest field or byte index used as an endpoint of a closed
772 + or degenerate range specification; this doesn't include the starting
773 + index of right-open-ended ranges. For example, with either range spec
774 + `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
775 +static size_t max_range_endpoint;
776 +
777 +/* If nonzero, this is the index of the first field in a range that goes
778 + to end of line. */
779 +static size_t eol_range_start;
780 +
781 +/* This is a bit vector.
782 + In byte mode, which bytes to output.
783 + In field mode, which DELIM-separated fields to output.
784 + Both bytes and fields are numbered starting with 1,
785 + so the zeroth bit of this array is unused.
786 + A field or byte K has been selected if
787 + (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
788 + || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
789 +static unsigned char *printable_field;
790 +
791 +enum operating_mode
792 + {
793 + undefined_mode,
794 +
795 + /* Output characters that are in the given bytes. */
796 + byte_mode,
797 +
798 + /* Output the given delimeter-separated fields. */
799 + field_mode
800 + };
801 +
802 +static enum operating_mode operating_mode;
803 +
804 +/* If true do not output lines containing no delimeter characters.
805 + Otherwise, all such lines are printed. This option is valid only
806 + with field mode. */
807 +static bool suppress_non_delimited;
808 +
809 +/* If nonzero, print all bytes, characters, or fields _except_
810 + those that were specified. */
811 +static bool complement;
812 +
813 +/* The delimeter character for field mode. */
814 +static unsigned char delim;
815 +
816 +/* True if the --output-delimiter=STRING option was specified. */
817 +static bool output_delimiter_specified;
818 +
819 +/* The length of output_delimiter_string. */
820 +static size_t output_delimiter_length;
821 +
822 +/* The output field separator string. Defaults to the 1-character
823 + string consisting of the input delimiter. */
824 +static char *output_delimiter_string;
825 +
826 +/* True if we have ever read standard input. */
827 +static bool have_read_stdin;
828 +
829 +#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
830 +
831 +/* The set of range-start indices. For example, given a range-spec list like
832 + `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
833 + Note that although `4' looks like a range-start index, it is in the middle
834 + of the `3-5' range, so it doesn't count.
835 + This table is created/used IFF output_delimiter_specified is set. */
836 +static Hash_table *range_start_ht;
837 +
838 +/* For long options that have no equivalent short option, use a
839 + non-character as a pseudo short option, starting with CHAR_MAX + 1. */
840 +enum
841 +{
842 + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
843 + COMPLEMENT_OPTION
844 +};
845 +
846 +static struct option const longopts[] =
847 +{
848 + {"bytes", required_argument, NULL, 'b'},
849 + {"characters", required_argument, NULL, 'c'},
850 + {"fields", required_argument, NULL, 'f'},
851 + {"delimiter", required_argument, NULL, 'd'},
852 + {"only-delimited", no_argument, NULL, 's'},
853 + {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
854 + {"complement", no_argument, NULL, COMPLEMENT_OPTION},
855 + {GETOPT_HELP_OPTION_DECL},
856 + {GETOPT_VERSION_OPTION_DECL},
857 + {NULL, 0, NULL, 0}
858 +};
859 +
860 +void
861 +usage (int status)
862 +{
863 + if (status != EXIT_SUCCESS)
864 + fprintf (stderr, _("Try `%s --help' for more information.\n"),
865 + program_name);
866 + else
867 + {
868 + printf (_("\
869 +Usage: %s OPTION... [FILE]...\n\
870 +"),
871 + program_name);
872 + fputs (_("\
873 +Print selected parts of lines from each FILE to standard output.\n\
874 +\n\
875 +"), stdout);
876 + fputs (_("\
877 +Mandatory arguments to long options are mandatory for short options too.\n\
878 +"), stdout);
879 + fputs (_("\
880 + -b, --bytes=LIST select only these bytes\n\
881 + -c, --characters=LIST select only these characters\n\
882 + -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
883 +"), stdout);
884 + fputs (_("\
885 + -f, --fields=LIST select only these fields; also print any line\n\
886 + that contains no delimiter character, unless\n\
887 + the -s option is specified\n\
888 + -n (ignored)\n\
889 +"), stdout);
890 + fputs (_("\
891 + --complement complement the set of selected bytes, characters\n\
892 + or fields\n\
893 +"), stdout);
894 + fputs (_("\
895 + -s, --only-delimited do not print lines not containing delimiters\n\
896 + --output-delimiter=STRING use STRING as the output delimiter\n\
897 + the default is to use the input delimiter\n\
898 +"), stdout);
899 + fputs (HELP_OPTION_DESCRIPTION, stdout);
900 + fputs (VERSION_OPTION_DESCRIPTION, stdout);
901 + fputs (_("\
902 +\n\
903 +Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
904 +range, or many ranges separated by commas. Selected input is written\n\
905 +in the same order that it is read, and is written exactly once.\n\
906 +"), stdout);
907 + fputs (_("\
908 +Each range is one of:\n\
909 +\n\
910 + N N'th byte, character or field, counted from 1\n\
911 + N- from N'th byte, character or field, to end of line\n\
912 + N-M from N'th to M'th (included) byte, character or field\n\
913 + -M from first to M'th (included) byte, character or field\n\
914 +\n\
915 +With no FILE, or when FILE is -, read standard input.\n\
916 +"), stdout);
917 + emit_ancillary_info ();
918 + }
919 + exit (status);
920 +}
921 +
922 +static inline void
923 +mark_range_start (size_t i)
924 +{
925 + /* Record the fact that `i' is a range-start index. */
926 + void *ent_from_table = hash_insert (range_start_ht, (void*) i);
927 + if (ent_from_table == NULL)
928 + {
929 + /* Insertion failed due to lack of memory. */
930 + xalloc_die ();
931 + }
932 + assert ((size_t) ent_from_table == i);
933 +}
934 +
935 +static inline void
936 +mark_printable_field (size_t i)
937 +{
938 + size_t n = i / CHAR_BIT;
939 + printable_field[n] |= (1 << (i % CHAR_BIT));
940 +}
941 +
942 +static inline bool
943 +is_printable_field (size_t i)
944 +{
945 + size_t n = i / CHAR_BIT;
946 + return (printable_field[n] >> (i % CHAR_BIT)) & 1;
947 +}
948 +
949 +static size_t
950 +hash_int (const void *x, size_t tablesize)
951 +{
952 +#ifdef UINTPTR_MAX
953 + uintptr_t y = (uintptr_t) x;
954 +#else
955 + size_t y = (size_t) x;
956 +#endif
957 + return y % tablesize;
958 +}
959 +
960 +static bool
961 +hash_compare_ints (void const *x, void const *y)
962 +{
963 + return (x == y) ? true : false;
964 +}
965 +
966 +static bool
967 +is_range_start_index (size_t i)
968 +{
969 + return hash_lookup (range_start_ht, (void *) i) ? true : false;
970 +}
971 +
972 +/* Return nonzero if the K'th field or byte is printable.
973 + When returning nonzero, if RANGE_START is non-NULL,
974 + set *RANGE_START to true if K is the beginning of a range, and to
975 + false otherwise. */
976 +
977 +static bool
978 +print_kth (size_t k, bool *range_start)
979 +{
980 + bool k_selected
981 + = ((0 < eol_range_start && eol_range_start <= k)
982 + || (k <= max_range_endpoint && is_printable_field (k)));
983 +
984 + bool is_selected = k_selected ^ complement;
985 + if (range_start && is_selected)
986 + *range_start = is_range_start_index (k);
987 +
988 + return is_selected;
989 +}
990 +
991 +/* Comparison function for qsort to order the list of
992 + struct range_pairs. */
993 +static int
994 +compare_ranges (const void *a, const void *b)
995 +{
996 + int a_start = ((const struct range_pair *) a)->lo;
997 + int b_start = ((const struct range_pair *) b)->lo;
998 + return a_start < b_start ? -1 : a_start > b_start;
999 +}
1000 +
1001 +/* Given the list of field or byte range specifications FIELDSTR, set
1002 + MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
1003 + array. If there is a right-open-ended range, set EOL_RANGE_START
1004 + to its starting index. FIELDSTR should be composed of one or more
1005 + numbers or ranges of numbers, separated by blanks or commas.
1006 + Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
1007 + through end of line. Return true if FIELDSTR contains at least
1008 + one field specification, false otherwise. */
1009 +
1010 +/* FIXME-someday: What if the user wants to cut out the 1,000,000-th
1011 + field of some huge input file? This function shouldn't have to
1012 + allocate a table of a million bits just so we can test every
1013 + field < 10^6 with an array dereference. Instead, consider using
1014 + an adaptive approach: if the range of selected fields is too large,
1015 + but only a few fields/byte-offsets are actually selected, use a
1016 + hash table. If the range of selected fields is too large, and
1017 + too many are selected, then resort to using the range-pairs (the
1018 + `rp' array) directly. */
1019 +
1020 +static bool
1021 +set_fields (const char *fieldstr)
1022 +{
1023 + size_t initial = 1; /* Value of first number in a range. */
1024 + size_t value = 0; /* If nonzero, a number being accumulated. */
1025 + bool lhs_specified = false;
1026 + bool rhs_specified = false;
1027 + bool dash_found = false; /* True if a '-' is found in this field. */
1028 + bool field_found = false; /* True if at least one field spec
1029 + has been processed. */
1030 +
1031 + struct range_pair *rp = NULL;
1032 + size_t n_rp = 0;
1033 + size_t n_rp_allocated = 0;
1034 + size_t i;
1035 + bool in_digits = false;
1036 +
1037 + /* Collect and store in RP the range end points.
1038 + It also sets EOL_RANGE_START if appropriate. */
1039 +
1040 + for (;;)
1041 + {
1042 + if (*fieldstr == '-')
1043 + {
1044 + in_digits = false;
1045 + /* Starting a range. */
1046 + if (dash_found)
1047 + FATAL_ERROR (_("invalid byte or field list"));
1048 + dash_found = true;
1049 + fieldstr++;
1050 +
1051 + initial = (lhs_specified ? value : 1);
1052 + value = 0;
1053 + }
1054 + else if (*fieldstr == ',' ||
1055 + isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
1056 + {
1057 + in_digits = false;
1058 + /* Ending the string, or this field/byte sublist. */
1059 + if (dash_found)
1060 + {
1061 + dash_found = false;
1062 +
1063 + if (!lhs_specified && !rhs_specified)
1064 + FATAL_ERROR (_("invalid range with no endpoint: -"));
1065 +
1066 + /* A range. Possibilities: -n, m-n, n-.
1067 + In any case, `initial' contains the start of the range. */
1068 + if (!rhs_specified)
1069 + {
1070 + /* `n-'. From `initial' to end of line. */
1071 + eol_range_start = initial;
1072 + field_found = true;
1073 + }
1074 + else
1075 + {
1076 + /* `m-n' or `-n' (1-n). */
1077 + if (value < initial)
1078 + FATAL_ERROR (_("invalid decreasing range"));
1079 +
1080 + /* Is there already a range going to end of line? */
1081 + if (eol_range_start != 0)
1082 + {
1083 + /* Yes. Is the new sequence already contained
1084 + in the old one? If so, no processing is
1085 + necessary. */
1086 + if (initial < eol_range_start)
1087 + {
1088 + /* No, the new sequence starts before the
1089 + old. Does the old range going to end of line
1090 + extend into the new range? */
1091 + if (eol_range_start <= value)
1092 + {
1093 + /* Yes. Simply move the end of line marker. */
1094 + eol_range_start = initial;
1095 + }
1096 + else
1097 + {
1098 + /* No. A simple range, before and disjoint from
1099 + the range going to end of line. Fill it. */
1100 + ADD_RANGE_PAIR (rp, initial, value);
1101 + }
1102 +
1103 + /* In any case, some fields were selected. */
1104 + field_found = true;
1105 + }
1106 + }
1107 + else
1108 + {
1109 + /* There is no range going to end of line. */
1110 + ADD_RANGE_PAIR (rp, initial, value);
1111 + field_found = true;
1112 + }
1113 + value = 0;
1114 + }
1115 + }
1116 + else
1117 + {
1118 + /* A simple field number, not a range. */
1119 + ADD_RANGE_PAIR (rp, value, value);
1120 + value = 0;
1121 + field_found = true;
1122 + }
1123 +
1124 + if (*fieldstr == '\0')
1125 + {
1126 + break;
1127 + }
1128 +
1129 + fieldstr++;
1130 + lhs_specified = false;
1131 + rhs_specified = false;
1132 + }
1133 + else if (ISDIGIT (*fieldstr))
1134 + {
1135 + /* Record beginning of digit string, in case we have to
1136 + complain about it. */
1137 + static char const *num_start;
1138 + if (!in_digits || !num_start)
1139 + num_start = fieldstr;
1140 + in_digits = true;
1141 +
1142 + if (dash_found)
1143 + rhs_specified = 1;
1144 + else
1145 + lhs_specified = 1;
1146 +
1147 + /* Detect overflow. */
1148 + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
1149 + {
1150 + /* In case the user specified -c$(echo 2^64|bc),22,
1151 + complain only about the first number. */
1152 + /* Determine the length of the offending number. */
1153 + size_t len = strspn (num_start, "0123456789");
1154 + char *bad_num = xstrndup (num_start, len);
1155 + if (operating_mode == byte_mode)
1156 + error (0, 0,
1157 + _("byte offset %s is too large"), quote (bad_num));
1158 + else
1159 + error (0, 0,
1160 + _("field number %s is too large"), quote (bad_num));
1161 + free (bad_num);
1162 + exit (EXIT_FAILURE);
1163 + }
1164 +
1165 + fieldstr++;
1166 + }
1167 + else
1168 + FATAL_ERROR (_("invalid byte or field list"));
1169 + }
1170 +
1171 + max_range_endpoint = 0;
1172 + for (i = 0; i < n_rp; i++)
1173 + {
1174 + if (rp[i].hi > max_range_endpoint)
1175 + max_range_endpoint = rp[i].hi;
1176 + }
1177 +
1178 + /* Allocate an array large enough so that it may be indexed by
1179 + the field numbers corresponding to all finite ranges
1180 + (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
1181 +
1182 + printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
1183 +
1184 + qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
1185 +
1186 + /* Set the array entries corresponding to integers in the ranges of RP. */
1187 + for (i = 0; i < n_rp; i++)
1188 + {
1189 + size_t j;
1190 + size_t rsi_candidate;
1191 +
1192 + /* Record the range-start indices, i.e., record each start
1193 + index that is not part of any other (lo..hi] range. */
1194 + rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
1195 + if (output_delimiter_specified
1196 + && !is_printable_field (rsi_candidate))
1197 + mark_range_start (rsi_candidate);
1198 +
1199 + for (j = rp[i].lo; j <= rp[i].hi; j++)
1200 + mark_printable_field (j);
1201 + }
1202 +
1203 + if (output_delimiter_specified
1204 + && !complement
1205 + && eol_range_start && !is_printable_field (eol_range_start))
1206 + mark_range_start (eol_range_start);
1207 +
1208 + free (rp);
1209 +
1210 + return field_found;
1211 +}
1212 +
1213 +/* Read from stream STREAM, printing to standard output any selected bytes. */
1214 +
1215 +static void
1216 +cut_bytes (FILE *stream)
1217 +{
1218 + size_t byte_idx; /* Number of bytes in the line so far. */
1219 + /* Whether to begin printing delimiters between ranges for the current line.
1220 + Set after we've begun printing data corresponding to the first range. */
1221 + bool print_delimiter;
1222 +
1223 + byte_idx = 0;
1224 + print_delimiter = false;
1225 + while (1)
1226 + {
1227 + int c; /* Each character from the file. */
1228 +
1229 + c = getc (stream);
1230 +
1231 + if (c == '\n')
1232 + {
1233 + putchar ('\n');
1234 + byte_idx = 0;
1235 + print_delimiter = false;
1236 + }
1237 + else if (c == EOF)
1238 + {
1239 + if (byte_idx > 0)
1240 + putchar ('\n');
1241 + break;
1242 + }
1243 + else
1244 + {
1245 + bool range_start;
1246 + bool *rs = output_delimiter_specified ? &range_start : NULL;
1247 + if (print_kth (++byte_idx, rs))
1248 + {
1249 + if (rs && *rs && print_delimiter)
1250 + {
1251 + fwrite (output_delimiter_string, sizeof (char),
1252 + output_delimiter_length, stdout);
1253 + }
1254 + print_delimiter = true;
1255 + putchar (c);
1256 + }
1257 + }
1258 + }
1259 +}
1260 +
1261 +/* Read from stream STREAM, printing to standard output any selected fields. */
1262 +
1263 +static void
1264 +cut_fields (FILE *stream)
1265 +{
1266 + int c;
1267 + size_t field_idx = 1;
1268 + bool found_any_selected_field = false;
1269 + bool buffer_first_field;
1270 +
1271 + c = getc (stream);
1272 + if (c == EOF)
1273 + return;
1274 +
1275 + ungetc (c, stream);
1276 +
1277 + /* To support the semantics of the -s flag, we may have to buffer
1278 + all of the first field to determine whether it is `delimited.'
1279 + But that is unnecessary if all non-delimited lines must be printed
1280 + and the first field has been selected, or if non-delimited lines
1281 + must be suppressed and the first field has *not* been selected.
1282 + That is because a non-delimited line has exactly one field. */
1283 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
1284 +
1285 + while (1)
1286 + {
1287 + if (field_idx == 1 && buffer_first_field)
1288 + {
1289 + ssize_t len;
1290 + size_t n_bytes;
1291 +
1292 + len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
1293 + GETNLINE_NO_LIMIT, delim, '\n', stream);
1294 + if (len < 0)
1295 + {
1296 + free (field_1_buffer);
1297 + field_1_buffer = NULL;
1298 + if (ferror (stream) || feof (stream))
1299 + break;
1300 + xalloc_die ();
1301 + }
1302 +
1303 + n_bytes = len;
1304 + assert (n_bytes != 0);
1305 +
1306 + /* If the first field extends to the end of line (it is not
1307 + delimited) and we are printing all non-delimited lines,
1308 + print this one. */
1309 + if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
1310 + {
1311 + if (suppress_non_delimited)
1312 + {
1313 + /* Empty. */
1314 + }
1315 + else
1316 + {
1317 + fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
1318 + /* Make sure the output line is newline terminated. */
1319 + if (field_1_buffer[n_bytes - 1] != '\n')
1320 + putchar ('\n');
1321 + }
1322 + continue;
1323 + }
1324 + if (print_kth (1, NULL))
1325 + {
1326 + /* Print the field, but not the trailing delimiter. */
1327 + fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
1328 + found_any_selected_field = true;
1329 + }
1330 + ++field_idx;
1331 + }
1332 +
1333 + if (c != EOF)
1334 + {
1335 + if (print_kth (field_idx, NULL))
1336 + {
1337 + if (found_any_selected_field)
1338 + {
1339 + fwrite (output_delimiter_string, sizeof (char),
1340 + output_delimiter_length, stdout);
1341 + }
1342 + found_any_selected_field = true;
1343 +
1344 + while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
1345 + {
1346 + putchar (c);
1347 + }
1348 + }
1349 + else
1350 + {
1351 + while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
1352 + {
1353 + /* Empty. */
1354 + }
1355 + }
1356 + }
1357 +
1358 + if (c == '\n')
1359 + {
1360 + c = getc (stream);
1361 + if (c != EOF)
1362 + {
1363 + ungetc (c, stream);
1364 + c = '\n';
1365 + }
1366 + }
1367 +
1368 + if (c == delim)
1369 + ++field_idx;
1370 + else if (c == '\n' || c == EOF)
1371 + {
1372 + if (found_any_selected_field
1373 + || !(suppress_non_delimited && field_idx == 1))
1374 + putchar ('\n');
1375 + if (c == EOF)
1376 + break;
1377 + field_idx = 1;
1378 + found_any_selected_field = false;
1379 + }
1380 + }
1381 +}
1382 +
1383 +static void
1384 +cut_stream (FILE *stream)
1385 +{
1386 + if (operating_mode == byte_mode)
1387 + cut_bytes (stream);
1388 + else
1389 + cut_fields (stream);
1390 +}
1391 +
1392 +/* Process file FILE to standard output.
1393 + Return true if successful. */
1394 +
1395 +static bool
1396 +cut_file (char const *file)
1397 +{
1398 + FILE *stream;
1399 +
1400 + if (STREQ (file, "-"))
1401 + {
1402 + have_read_stdin = true;
1403 + stream = stdin;
1404 + }
1405 + else
1406 + {
1407 + stream = fopen (file, "r");
1408 + if (stream == NULL)
1409 + {
1410 + error (0, errno, "%s", file);
1411 + return false;
1412 + }
1413 + }
1414 +
1415 + cut_stream (stream);
1416 +
1417 + if (ferror (stream))
1418 + {
1419 + error (0, errno, "%s", file);
1420 + return false;
1421 + }
1422 + if (STREQ (file, "-"))
1423 + clearerr (stream); /* Also clear EOF. */
1424 + else if (fclose (stream) == EOF)
1425 + {
1426 + error (0, errno, "%s", file);
1427 + return false;
1428 + }
1429 + return true;
1430 +}
1431 +
1432 +int
1433 +main (int argc, char **argv)
1434 +{
1435 + int optc;
1436 + bool ok;
1437 + bool delim_specified = false;
1438 + char *spec_list_string IF_LINT(= NULL);
1439 +
1440 + initialize_main (&argc, &argv);
1441 + set_program_name (argv[0]);
1442 + setlocale (LC_ALL, "");
1443 + bindtextdomain (PACKAGE, LOCALEDIR);
1444 + textdomain (PACKAGE);
1445 +
1446 + atexit (close_stdout);
1447 +
1448 + operating_mode = undefined_mode;
1449 +
1450 + /* By default, all non-delimited lines are printed. */
1451 + suppress_non_delimited = false;
1452 +
1453 + delim = '\0';
1454 + have_read_stdin = false;
1455 +
1456 + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
1457 + {
1458 + switch (optc)
1459 + {
1460 + case 'b':
1461 + case 'c':
1462 + /* Build the byte list. */
1463 + if (operating_mode != undefined_mode)
1464 + FATAL_ERROR (_("only one type of list may be specified"));
1465 + operating_mode = byte_mode;
1466 + spec_list_string = optarg;
1467 + break;
1468 +
1469 + case 'f':
1470 + /* Build the field list. */
1471 + if (operating_mode != undefined_mode)
1472 + FATAL_ERROR (_("only one type of list may be specified"));
1473 + operating_mode = field_mode;
1474 + spec_list_string = optarg;
1475 + break;
1476 +
1477 + case 'd':
1478 + /* New delimiter. */
1479 + /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
1480 + if (optarg[0] != '\0' && optarg[1] != '\0')
1481 + FATAL_ERROR (_("the delimiter must be a single character"));
1482 + delim = optarg[0];
1483 + delim_specified = true;
1484 + break;
1485 +
1486 + case OUTPUT_DELIMITER_OPTION:
1487 + output_delimiter_specified = true;
1488 + /* Interpret --output-delimiter='' to mean
1489 + `use the NUL byte as the delimiter.' */
1490 + output_delimiter_length = (optarg[0] == '\0'
1491 + ? 1 : strlen (optarg));
1492 + output_delimiter_string = xstrdup (optarg);
1493 + break;
1494 +
1495 + case 'n':
1496 + break;
1497 +
1498 + case 's':
1499 + suppress_non_delimited = true;
1500 + break;
1501 +
1502 + case COMPLEMENT_OPTION:
1503 + complement = true;
1504 + break;
1505 +
1506 + case_GETOPT_HELP_CHAR;
1507 +
1508 + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1509 +
1510 + default:
1511 + usage (EXIT_FAILURE);
1512 + }
1513 + }
1514 +
1515 + if (operating_mode == undefined_mode)
1516 + FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
1517 +
1518 + if (delim != '\0' && operating_mode != field_mode)
1519 + FATAL_ERROR (_("an input delimiter may be specified only\
1520 + when operating on fields"));
1521 +
1522 + if (suppress_non_delimited && operating_mode != field_mode)
1523 + FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
1524 +\tonly when operating on fields"));
1525 +
1526 + if (output_delimiter_specified)
1527 + {
1528 + range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
1529 + NULL, hash_int,
1530 + hash_compare_ints, NULL);
1531 + if (range_start_ht == NULL)
1532 + xalloc_die ();
1533 +
1534 + }
1535 +
1536 + if (! set_fields (spec_list_string))
1537 + {
1538 + if (operating_mode == field_mode)
1539 + FATAL_ERROR (_("missing list of fields"));
1540 + else
1541 + FATAL_ERROR (_("missing list of positions"));
1542 + }
1543 +
1544 + if (!delim_specified)
1545 + delim = '\t';
1546 +
1547 + if (output_delimiter_string == NULL)
1548 + {
1549 + static char dummy[2];
1550 + dummy[0] = delim;
1551 + dummy[1] = '\0';
1552 + output_delimiter_string = dummy;
1553 + output_delimiter_length = 1;
1554 + }
1555 +
1556 + if (optind == argc)
1557 + ok = cut_file ("-");
1558 + else
1559 + for (ok = true; optind < argc; optind++)
1560 + ok &= cut_file (argv[optind]);
1561 +
1562 + if (range_start_ht)
1563 + hash_free (range_start_ht);
1564 +
1565 + if (have_read_stdin && fclose (stdin) == EOF)
1566 + {
1567 + error (0, errno, "-");
1568 + ok = false;
1569 + }
1570 +
1571 + exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
1572 +}
1573 diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
1574 --- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
1575 +++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
1576 @@ -37,11 +37,28 @@
1577 #include <stdio.h>
1578 #include <getopt.h>
1579 #include <sys/types.h>
1580 +
1581 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1582 +#if HAVE_WCHAR_H
1583 +# include <wchar.h>
1584 +#endif
1585 +
1586 #include "system.h"
1587 #include "error.h"
1588 #include "quote.h"
1589 #include "xstrndup.h"
1590
1591 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1592 + installation; work around this configuration error. */
1593 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1594 +# define MB_LEN_MAX 16
1595 +#endif
1596 +
1597 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1598 +#if HAVE_MBRTOWC && defined mbstate_t
1599 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1600 +#endif
1601 +
1602 /* The official name of this program (e.g., no `g' prefix). */
1603 #define PROGRAM_NAME "expand"
1604
1605 @@ -357,6 +374,142 @@ expand (void)
1606 }
1607 }
1608
1609 +#if HAVE_MBRTOWC
1610 +static void
1611 +expand_multibyte (void)
1612 +{
1613 + FILE *fp; /* Input strem. */
1614 + mbstate_t i_state; /* Current shift state of the input stream. */
1615 + mbstate_t i_state_bak; /* Back up the I_STATE. */
1616 + mbstate_t o_state; /* Current shift state of the output stream. */
1617 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1618 + char *bufpos; /* Next read position of BUF. */
1619 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1620 + wchar_t wc; /* A gotten wide character. */
1621 + size_t mblength; /* The byte size of a multibyte character
1622 + which shows as same character as WC. */
1623 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
1624 + int column = 0; /* Column on screen of the next char. */
1625 + int next_tab_column; /* Column the next tab stop is on. */
1626 + int convert = 1; /* If nonzero, perform translations. */
1627 +
1628 + fp = next_file ((FILE *) NULL);
1629 + if (fp == NULL)
1630 + return;
1631 +
1632 + memset (&o_state, '\0', sizeof(mbstate_t));
1633 + memset (&i_state, '\0', sizeof(mbstate_t));
1634 +
1635 + for (;;)
1636 + {
1637 + /* Refill the buffer BUF. */
1638 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
1639 + {
1640 + memmove (buf, bufpos, buflen);
1641 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
1642 + bufpos = buf;
1643 + }
1644 +
1645 + /* No character is left in BUF. */
1646 + if (buflen < 1)
1647 + {
1648 + fp = next_file (fp);
1649 +
1650 + if (fp == NULL)
1651 + break; /* No more files. */
1652 + else
1653 + {
1654 + memset (&i_state, '\0', sizeof(mbstate_t));
1655 + continue;
1656 + }
1657 + }
1658 +
1659 + /* Get a wide character. */
1660 + i_state_bak = i_state;
1661 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
1662 +
1663 + switch (mblength)
1664 + {
1665 + case (size_t)-1: /* illegal byte sequence. */
1666 + case (size_t)-2:
1667 + mblength = 1;
1668 + i_state = i_state_bak;
1669 + if (convert)
1670 + {
1671 + ++column;
1672 + if (convert_entire_line == 0)
1673 + convert = 0;
1674 + }
1675 + putchar (*bufpos);
1676 + break;
1677 +
1678 + case 0: /* null. */
1679 + mblength = 1;
1680 + if (convert && convert_entire_line == 0)
1681 + convert = 0;
1682 + putchar ('\0');
1683 + break;
1684 +
1685 + default:
1686 + if (wc == L'\n') /* LF. */
1687 + {
1688 + tab_index = 0;
1689 + column = 0;
1690 + convert = 1;
1691 + putchar ('\n');
1692 + }
1693 + else if (wc == L'\t' && convert) /* Tab. */
1694 + {
1695 + if (tab_size == 0)
1696 + {
1697 + /* Do not let tab_index == first_free_tab;
1698 + stop when it is 1 less. */
1699 + while (tab_index < first_free_tab - 1
1700 + && column >= tab_list[tab_index])
1701 + tab_index++;
1702 + next_tab_column = tab_list[tab_index];
1703 + if (tab_index < first_free_tab - 1)
1704 + tab_index++;
1705 + if (column >= next_tab_column)
1706 + next_tab_column = column + 1;
1707 + }
1708 + else
1709 + next_tab_column = column + tab_size - column % tab_size;
1710 +
1711 + while (column < next_tab_column)
1712 + {
1713 + putchar (' ');
1714 + ++column;
1715 + }
1716 + }
1717 + else /* Others. */
1718 + {
1719 + if (convert)
1720 + {
1721 + if (wc == L'\b')
1722 + {
1723 + if (column > 0)
1724 + --column;
1725 + }
1726 + else
1727 + {
1728 + int width; /* The width of WC. */
1729 +
1730 + width = wcwidth (wc);
1731 + column += (width > 0) ? width : 0;
1732 + if (convert_entire_line == 0)
1733 + convert = 0;
1734 + }
1735 + }
1736 + fwrite (bufpos, sizeof(char), mblength, stdout);
1737 + }
1738 + }
1739 + buflen -= mblength;
1740 + bufpos += mblength;
1741 + }
1742 +}
1743 +#endif
1744 +
1745 int
1746 main (int argc, char **argv)
1747 {
1748 @@ -421,7 +574,12 @@ main (int argc, char **argv)
1749
1750 file_list = (optind < argc ? &argv[optind] : stdin_argv);
1751
1752 - expand ();
1753 +#if HAVE_MBRTOWC
1754 + if (MB_CUR_MAX > 1)
1755 + expand_multibyte ();
1756 + else
1757 +#endif
1758 + expand ();
1759
1760 if (have_read_stdin && fclose (stdin) != 0)
1761 error (EXIT_FAILURE, errno, "-");
1762 diff -urNp coreutils-8.0-orig/src/expand.c.orig coreutils-8.0/src/expand.c.orig
1763 --- coreutils-8.0-orig/src/expand.c.orig 1970-01-01 01:00:00.000000000 +0100
1764 +++ coreutils-8.0/src/expand.c.orig 2009-09-29 15:27:54.000000000 +0200
1765 @@ -0,0 +1,430 @@
1766 +/* expand - convert tabs to spaces
1767 + Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
1768 +
1769 + This program is free software: you can redistribute it and/or modify
1770 + it under the terms of the GNU General Public License as published by
1771 + the Free Software Foundation, either version 3 of the License, or
1772 + (at your option) any later version.
1773 +
1774 + This program is distributed in the hope that it will be useful,
1775 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1776 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1777 + GNU General Public License for more details.
1778 +
1779 + You should have received a copy of the GNU General Public License
1780 + along with this program. If not, see <http://www.gnu.org/licenses/>. */
1781 +
1782 +/* By default, convert all tabs to spaces.
1783 + Preserves backspace characters in the output; they decrement the
1784 + column count for tab calculations.
1785 + The default action is equivalent to -8.
1786 +
1787 + Options:
1788 + --tabs=tab1[,tab2[,...]]
1789 + -t tab1[,tab2[,...]]
1790 + -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
1791 + columns apart instead of the default 8. Otherwise,
1792 + set the tabs at columns tab1, tab2, etc. (numbered from
1793 + 0); replace any tabs beyond the tab stops given with
1794 + single spaces.
1795 + --initial
1796 + -i Only convert initial tabs on each line to spaces.
1797 +
1798 + David MacKenzie <djm@gnu.ai.mit.edu> */
1799 +
1800 +#include <config.h>
1801 +
1802 +#include <stdio.h>
1803 +#include <getopt.h>
1804 +#include <sys/types.h>
1805 +#include "system.h"
1806 +#include "error.h"
1807 +#include "quote.h"
1808 +#include "xstrndup.h"
1809 +
1810 +/* The official name of this program (e.g., no `g' prefix). */
1811 +#define PROGRAM_NAME "expand"
1812 +
1813 +#define AUTHORS proper_name ("David MacKenzie")
1814 +
1815 +/* If true, convert blanks even after nonblank characters have been
1816 + read on the line. */
1817 +static bool convert_entire_line;
1818 +
1819 +/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
1820 +static uintmax_t tab_size;
1821 +
1822 +/* Array of the explicit column numbers of the tab stops;
1823 + after `tab_list' is exhausted, each additional tab is replaced
1824 + by a space. The first column is column 0. */
1825 +static uintmax_t *tab_list;
1826 +
1827 +/* The number of allocated entries in `tab_list'. */
1828 +static size_t n_tabs_allocated;
1829 +
1830 +/* The index of the first invalid element of `tab_list',
1831 + where the next element can be added. */
1832 +static size_t first_free_tab;
1833 +
1834 +/* Null-terminated array of input filenames. */
1835 +static char **file_list;
1836 +
1837 +/* Default for `file_list' if no files are given on the command line. */
1838 +static char *stdin_argv[] =
1839 +{
1840 + (char *) "-", NULL
1841 +};
1842 +
1843 +/* True if we have ever read standard input. */
1844 +static bool have_read_stdin;
1845 +
1846 +/* The desired exit status. */
1847 +static int exit_status;
1848 +
1849 +static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
1850 +
1851 +static struct option const longopts[] =
1852 +{
1853 + {"tabs", required_argument, NULL, 't'},
1854 + {"initial", no_argument, NULL, 'i'},
1855 + {GETOPT_HELP_OPTION_DECL},
1856 + {GETOPT_VERSION_OPTION_DECL},
1857 + {NULL, 0, NULL, 0}
1858 +};
1859 +
1860 +void
1861 +usage (int status)
1862 +{
1863 + if (status != EXIT_SUCCESS)
1864 + fprintf (stderr, _("Try `%s --help' for more information.\n"),
1865 + program_name);
1866 + else
1867 + {
1868 + printf (_("\
1869 +Usage: %s [OPTION]... [FILE]...\n\
1870 +"),
1871 + program_name);
1872 + fputs (_("\
1873 +Convert tabs in each FILE to spaces, writing to standard output.\n\
1874 +With no FILE, or when FILE is -, read standard input.\n\
1875 +\n\
1876 +"), stdout);
1877 + fputs (_("\
1878 +Mandatory arguments to long options are mandatory for short options too.\n\
1879 +"), stdout);
1880 + fputs (_("\
1881 + -i, --initial do not convert tabs after non blanks\n\
1882 + -t, --tabs=NUMBER have tabs NUMBER characters apart, not 8\n\
1883 +"), stdout);
1884 + fputs (_("\
1885 + -t, --tabs=LIST use comma separated list of explicit tab positions\n\
1886 +"), stdout);
1887 + fputs (HELP_OPTION_DESCRIPTION, stdout);
1888 + fputs (VERSION_OPTION_DESCRIPTION, stdout);
1889 + emit_ancillary_info ();
1890 + }
1891 + exit (status);
1892 +}
1893 +
1894 +/* Add tab stop TABVAL to the end of `tab_list'. */
1895 +
1896 +static void
1897 +add_tab_stop (uintmax_t tabval)
1898 +{
1899 + if (first_free_tab == n_tabs_allocated)
1900 + tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
1901 + tab_list[first_free_tab++] = tabval;
1902 +}
1903 +
1904 +/* Add the comma or blank separated list of tab stops STOPS
1905 + to the list of tab stops. */
1906 +
1907 +static void
1908 +parse_tab_stops (char const *stops)
1909 +{
1910 + bool have_tabval = false;
1911 + uintmax_t tabval IF_LINT (= 0);
1912 + char const *num_start IF_LINT (= NULL);
1913 + bool ok = true;
1914 +
1915 + for (; *stops; stops++)
1916 + {
1917 + if (*stops == ',' || isblank (to_uchar (*stops)))
1918 + {
1919 + if (have_tabval)
1920 + add_tab_stop (tabval);
1921 + have_tabval = false;
1922 + }
1923 + else if (ISDIGIT (*stops))
1924 + {
1925 + if (!have_tabval)
1926 + {
1927 + tabval = 0;
1928 + have_tabval = true;
1929 + num_start = stops;
1930 + }
1931 +
1932 + /* Detect overflow. */
1933 + if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
1934 + {
1935 + size_t len = strspn (num_start, "0123456789");
1936 + char *bad_num = xstrndup (num_start, len);
1937 + error (0, 0, _("tab stop is too large %s"), quote (bad_num));
1938 + free (bad_num);
1939 + ok = false;
1940 + stops = num_start + len - 1;
1941 + }
1942 + }
1943 + else
1944 + {
1945 + error (0, 0, _("tab size contains invalid character(s): %s"),
1946 + quote (stops));
1947 + ok = false;
1948 + break;
1949 + }
1950 + }
1951 +
1952 + if (!ok)
1953 + exit (EXIT_FAILURE);
1954 +
1955 + if (have_tabval)
1956 + add_tab_stop (tabval);
1957 +}
1958 +
1959 +/* Check that the list of tab stops TABS, with ENTRIES entries,
1960 + contains only nonzero, ascending values. */
1961 +
1962 +static void
1963 +validate_tab_stops (uintmax_t const *tabs, size_t entries)
1964 +{
1965 + uintmax_t prev_tab = 0;
1966 + size_t i;
1967 +
1968 + for (i = 0; i < entries; i++)
1969 + {
1970 + if (tabs[i] == 0)
1971 + error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
1972 + if (tabs[i] <= prev_tab)
1973 + error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
1974 + prev_tab = tabs[i];
1975 + }
1976 +}
1977 +
1978 +/* Close the old stream pointer FP if it is non-NULL,
1979 + and return a new one opened to read the next input file.
1980 + Open a filename of `-' as the standard input.
1981 + Return NULL if there are no more input files. */
1982 +
1983 +static FILE *
1984 +next_file (FILE *fp)
1985 +{
1986 + static char *prev_file;
1987 + char *file;
1988 +
1989 + if (fp)
1990 + {
1991 + if (ferror (fp))
1992 + {
1993 + error (0, errno, "%s", prev_file);
1994 + exit_status = EXIT_FAILURE;
1995 + }
1996 + if (STREQ (prev_file, "-"))
1997 + clearerr (fp); /* Also clear EOF. */
1998 + else if (fclose (fp) != 0)
1999 + {
2000 + error (0, errno, "%s", prev_file);
2001 + exit_status = EXIT_FAILURE;
2002 + }
2003 + }
2004 +
2005 + while ((file = *file_list++) != NULL)
2006 + {
2007 + if (STREQ (file, "-"))
2008 + {
2009 + have_read_stdin = true;
2010 + prev_file = file;
2011 + return stdin;
2012 + }
2013 + fp = fopen (file, "r");
2014 + if (fp)
2015 + {
2016 + prev_file = file;
2017 + return fp;
2018 + }
2019 + error (0, errno, "%s", file);
2020 + exit_status = EXIT_FAILURE;
2021 + }
2022 + return NULL;
2023 +}
2024 +
2025 +/* Change tabs to spaces, writing to stdout.
2026 + Read each file in `file_list', in order. */
2027 +
2028 +static void