/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Contents of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.41 - (hide annotations) (download) (as text)
Wed Oct 7 08:11:44 2009 UTC (6 weeks, 4 days ago) by ovasik
Branch: MAIN
CVS Tags: coreutils-8_0-1_fc13, coreutils-8_0-2_fc13
Changes since 1.40: +14643 -3261 lines
File MIME type: text/x-patch
defuzz patches
1 ovasik 1.41 diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
2     --- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
3     +++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
4     @@ -21,6 +21,11 @@
5 twaugh 1.20
6     # include <stdio.h>
7    
8     +/* Get mbstate_t. */
9     +# if HAVE_WCHAR_H
10     +# include <wchar.h>
11     +# endif
12     +
13     /* A `struct linebuffer' holds a line of text. */
14    
15     struct linebuffer
16 ovasik 1.41 @@ -28,6 +33,9 @@ struct linebuffer
17 twaugh 1.20 size_t size; /* Allocated. */
18     size_t length; /* Used. */
19     char *buffer;
20     +# if HAVE_WCHAR_H
21     + mbstate_t state;
22     +# endif
23     };
24    
25     /* Initialize linebuffer LINEBUFFER for use. */
26 ovasik 1.41 diff -urNp coreutils-8.0-orig/lib/linebuffer.h.orig coreutils-8.0/lib/linebuffer.h.orig
27     --- coreutils-8.0-orig/lib/linebuffer.h.orig 1970-01-01 01:00:00.000000000 +0100
28     +++ coreutils-8.0/lib/linebuffer.h.orig 2009-10-06 10:59:48.000000000 +0200
29     @@ -0,0 +1,53 @@
30     +/* linebuffer.h -- declarations for reading arbitrarily long lines
31     +
32     + Copyright (C) 1986, 1991, 1998, 1999, 2002, 2003, 2007 Free Software
33     + Foundation, Inc.
34     +
35     + This program is free software: you can redistribute it and/or modify
36     + it under the terms of the GNU General Public License as published by
37     + the Free Software Foundation; either version 3 of the License, or
38     + (at your option) any later version.
39     +
40     + This program is distributed in the hope that it will be useful,
41     + but WITHOUT ANY WARRANTY; without even the implied warranty of
42     + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43     + GNU General Public License for more details.
44     +
45     + You should have received a copy of the GNU General Public License
46     + along with this program. If not, see <http://www.gnu.org/licenses/>. */
47     +
48     +#if !defined LINEBUFFER_H
49     +# define LINEBUFFER_H
50     +
51     +# include <stdio.h>
52     +
53     +/* A `struct linebuffer' holds a line of text. */
54     +
55     +struct linebuffer
56     +{
57     + size_t size; /* Allocated. */
58     + size_t length; /* Used. */
59     + char *buffer;
60     +};
61     +
62     +/* Initialize linebuffer LINEBUFFER for use. */
63     +void initbuffer (struct linebuffer *linebuffer);
64     +
65     +/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
66     + Consider lines to be terminated by DELIMITER.
67     + Keep the delimiter; append DELIMITER if we reach EOF and it wasn't
68     + the last character in the file. Do not NUL-terminate.
69     + Return LINEBUFFER, except at end of file return NULL. */
70     +struct linebuffer *readlinebuffer_delim (struct linebuffer *linebuffer,
71     + FILE *stream, char delimiter);
72     +
73     +/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
74     + Keep the newline; append a newline if it's the last line of a file
75     + that ends in a non-newline character. Do not NUL-terminate.
76     + Return LINEBUFFER, except at end of file return NULL. */
77     +struct linebuffer *readlinebuffer (struct linebuffer *linebuffer, FILE *stream);
78     +
79     +/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
80     +void freebuffer (struct linebuffer *);
81     +
82     +#endif /* LINEBUFFER_H */
83     diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
84     --- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
85     +++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
86     @@ -28,6 +28,11 @@
87     #include <assert.h>
88 cvsdist 1.1 #include <getopt.h>
89     #include <sys/types.h>
90     +
91 ovasik 1.41 +/* Get mbstate_t, mbrtowc(). */
92 cvsdist 1.1 +#if HAVE_WCHAR_H
93     +# include <wchar.h>
94     +#endif
95     #include "system.h"
96 ovasik 1.41
97 cvsdist 1.1 #include "error.h"
98 ovasik 1.41 @@ -36,6 +41,18 @@
99 cvsdist 1.1 #include "quote.h"
100     #include "xstrndup.h"
101    
102     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
103 ovasik 1.41 + installation; work around this configuration error. */
104 cvsdist 1.1 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
105 ovasik 1.41 +# undef MB_LEN_MAX
106 cvsdist 1.1 +# define MB_LEN_MAX 16
107     +#endif
108     +
109     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
110     +#if HAVE_MBRTOWC && defined mbstate_t
111     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
112     +#endif
113     +
114     /* The official name of this program (e.g., no `g' prefix). */
115 ovasik 1.41 #define PROGRAM_NAME "cut"
116 cvsdist 1.1
117 ovasik 1.41 @@ -71,6 +88,52 @@
118     } \
119     while (0)
120 cvsdist 1.1
121 ovasik 1.41 +/* Refill the buffer BUF to get a multibyte character. */
122     +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
123     + do \
124     + { \
125     + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
126     + { \
127     + memmove (BUF, BUFPOS, BUFLEN); \
128     + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
129     + BUFPOS = BUF; \
130     + } \
131     + } \
132     + while (0)
133 twaugh 1.16 +
134 ovasik 1.41 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
135     + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
136     +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
137     + do \
138     + { \
139     + mbstate_t state_bak; \
140     + \
141     + if (BUFLEN < 1) \
142     + { \
143     + WC = WEOF; \
144     + break; \
145     + } \
146     + \
147     + /* Get a wide character. */ \
148     + CONVFAIL = 0; \
149     + state_bak = STATE; \
150     + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
151     + \
152     + switch (MBLENGTH) \
153     + { \
154     + case (size_t)-1: \
155     + case (size_t)-2: \
156     + CONVFAIL++; \
157     + STATE = state_bak; \
158     + /* Fall througn. */ \
159     + \
160     + case 0: \
161     + MBLENGTH = 1; \
162     + break; \
163     + } \
164     + } \
165     + while (0)
166 cvsdist 1.1 +
167 ovasik 1.41 struct range_pair
168     {
169     size_t lo;
170     @@ -89,7 +152,7 @@ static char *field_1_buffer;
171     /* The number of bytes allocated for FIELD_1_BUFFER. */
172     static size_t field_1_bufsize;
173    
174     -/* The largest field or byte index used as an endpoint of a closed
175     +/* The largest byte, character or field index used as an endpoint of a closed
176     or degenerate range specification; this doesn't include the starting
177     index of right-open-ended ranges. For example, with either range spec
178     `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
179     @@ -101,10 +164,11 @@ static size_t eol_range_start;
180    
181     /* This is a bit vector.
182     In byte mode, which bytes to output.
183     + In character mode, which characters to output.
184     In field mode, which DELIM-separated fields to output.
185     - Both bytes and fields are numbered starting with 1,
186     + Bytes, characters and fields are numbered starting with 1,
187     so the zeroth bit of this array is unused.
188     - A field or byte K has been selected if
189     + A byte, character or field K has been selected if
190     (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
191     || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
192     static unsigned char *printable_field;
193     @@ -113,15 +177,25 @@ enum operating_mode
194     {
195     undefined_mode,
196    
197     - /* Output characters that are in the given bytes. */
198     + /* Output bytes that are at the given positions. */
199     byte_mode,
200    
201     + /* Output characters that are at the given positions. */
202     + character_mode,
203 cvsdist 1.1 +
204 ovasik 1.41 /* Output the given delimeter-separated fields. */
205     field_mode
206     };
207    
208     static enum operating_mode operating_mode;
209    
210     +/* If nonzero, when in byte mode, don't split multibyte characters. */
211     +static int byte_mode_character_aware;
212 twaugh 1.16 +
213 ovasik 1.41 +/* If nonzero, the function for single byte locale is work
214     + if this program runs on multibyte locale. */
215     +static int force_singlebyte_mode;
216     +
217     /* If true do not output lines containing no delimeter characters.
218     Otherwise, all such lines are printed. This option is valid only
219     with field mode. */
220     @@ -133,6 +207,9 @@ static bool complement;
221    
222     /* The delimeter character for field mode. */
223     static unsigned char delim;
224     +#if HAVE_WCHAR_H
225     +static wchar_t wcdelim;
226     +#endif
227    
228     /* True if the --output-delimiter=STRING option was specified. */
229     static bool output_delimiter_specified;
230     @@ -206,7 +283,7 @@ Mandatory arguments to long options are
231     -f, --fields=LIST select only these fields; also print any line\n\
232     that contains no delimiter character, unless\n\
233     the -s option is specified\n\
234     - -n (ignored)\n\
235     + -n with -b: don't split multibyte characters\n\
236     "), stdout);
237     fputs (_("\
238     --complement complement the set of selected bytes, characters\n\
239     @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
240     in_digits = false;
241     /* Starting a range. */
242     if (dash_found)
243     - FATAL_ERROR (_("invalid byte or field list"));
244     + FATAL_ERROR (_("invalid byte, character or field list"));
245     dash_found = true;
246     fieldstr++;
247    
248     @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
249     if (!rhs_specified)
250     {
251     /* `n-'. From `initial' to end of line. */
252     - eol_range_start = initial;
253     + if (eol_range_start == 0 ||
254     + (eol_range_start != 0 && eol_range_start > initial))
255     + eol_range_start = initial;
256     field_found = true;
257     }
258     else
259     {
260     /* `m-n' or `-n' (1-n). */
261     if (value < initial)
262     - FATAL_ERROR (_("invalid decreasing range"));
263     + FATAL_ERROR (_("invalid byte, character or field list"));
264    
265     /* Is there already a range going to end of line? */
266     if (eol_range_start != 0)
267     @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
268     if (operating_mode == byte_mode)
269     error (0, 0,
270     _("byte offset %s is too large"), quote (bad_num));
271     + else if (operating_mode == character_mode)
272     + error (0, 0,
273     + _("character offset %s is too large"), quote (bad_num));
274     else
275     error (0, 0,
276     _("field number %s is too large"), quote (bad_num));
277     @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
278     fieldstr++;
279     }
280     else
281     - FATAL_ERROR (_("invalid byte or field list"));
282     + FATAL_ERROR (_("invalid byte, character or field list"));
283     }
284    
285     max_range_endpoint = 0;
286     @@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
287     }
288     }
289    
290     +#if HAVE_MBRTOWC
291     +/* This function is in use for the following case.
292 twaugh 1.16 +
293 ovasik 1.41 + 1. Read from the stream STREAM, printing to standard output any selected
294     + characters.
295 cvsdist 1.1 +
296 ovasik 1.41 + 2. Read from stream STREAM, printing to standard output any selected bytes,
297     + without splitting multibyte characters. */
298     +
299     +static void
300     +cut_characters_or_cut_bytes_no_split (FILE *stream)
301     +{
302     + int idx; /* number of bytes or characters in the line so far. */
303     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
304     + char *bufpos; /* Next read position of BUF. */
305     + size_t buflen; /* The length of the byte sequence in buf. */
306     + wint_t wc; /* A gotten wide character. */
307     + size_t mblength; /* The byte size of a multibyte character which shows
308     + as same character as WC. */
309     + mbstate_t state; /* State of the stream. */
310     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
311 cvsdist 1.1 +
312 ovasik 1.41 + idx = 0;
313     + buflen = 0;
314     + bufpos = buf;
315     + memset (&state, '\0', sizeof(mbstate_t));
316 cvsdist 1.1 +
317 ovasik 1.41 + while (1)
318     + {
319     + REFILL_BUFFER (buf, bufpos, buflen, stream);
320 cvsdist 1.1 +
321 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
322 twaugh 1.16 +
323 ovasik 1.41 + if (wc == WEOF)
324     + {
325     + if (idx > 0)
326     + putchar ('\n');
327     + break;
328     + }
329     + else if (wc == L'\n')
330     + {
331     + putchar ('\n');
332     + idx = 0;
333     + }
334     + else
335     + {
336     + idx += (operating_mode == byte_mode) ? mblength : 1;
337     + if (print_kth (idx, NULL))
338     + fwrite (bufpos, mblength, sizeof(char), stdout);
339     + }
340 cvsdist 1.1 +
341 twaugh 1.16 + buflen -= mblength;
342     + bufpos += mblength;
343 cvsdist 1.1 + }
344     +}
345     +#endif
346 ovasik 1.41 +
347     /* Read from stream STREAM, printing to standard output any selected fields. */
348 twaugh 1.16
349 ovasik 1.41 static void
350     @@ -701,13 +840,192 @@ cut_fields (FILE *stream)
351     }
352     }
353 twaugh 1.16
354 cvsdist 1.1 +#if HAVE_MBRTOWC
355 ovasik 1.41 +static void
356     +cut_fields_mb (FILE *stream)
357     +{
358     + int c;
359     + unsigned int field_idx;
360     + int found_any_selected_field;
361     + int buffer_first_field;
362     + int empty_input;
363     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
364     + char *bufpos; /* Next read position of BUF. */
365     + size_t buflen; /* The length of the byte sequence in buf. */
366     + wint_t wc = 0; /* A gotten wide character. */
367     + size_t mblength; /* The byte size of a multibyte character which shows
368     + as same character as WC. */
369     + mbstate_t state; /* State of the stream. */
370     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
371     +
372     + found_any_selected_field = 0;
373     + field_idx = 1;
374     + bufpos = buf;
375     + buflen = 0;
376     + memset (&state, '\0', sizeof(mbstate_t));
377     +
378     + c = getc (stream);
379     + empty_input = (c == EOF);
380     + if (c != EOF)
381     + ungetc (c, stream);
382 twaugh 1.16 + else
383 ovasik 1.41 + wc = WEOF;
384 cvsdist 1.1 +
385 ovasik 1.41 + /* To support the semantics of the -s flag, we may have to buffer
386     + all of the first field to determine whether it is `delimited.'
387     + But that is unnecessary if all non-delimited lines must be printed
388     + and the first field has been selected, or if non-delimited lines
389     + must be suppressed and the first field has *not* been selected.
390     + That is because a non-delimited line has exactly one field. */
391     + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
392 cvsdist 1.1 +
393 ovasik 1.41 + while (1)
394     + {
395     + if (field_idx == 1 && buffer_first_field)
396     + {
397     + int len = 0;
398 cvsdist 1.1 +
399 ovasik 1.41 + while (1)
400     + {
401     + REFILL_BUFFER (buf, bufpos, buflen, stream);
402 cvsdist 1.1 +
403 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER
404     + (wc, bufpos, buflen, mblength, state, convfail);
405 cvsdist 1.1 +
406 ovasik 1.41 + if (wc == WEOF)
407     + break;
408 cvsdist 1.1 +
409 ovasik 1.41 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
410     + memcpy (field_1_buffer + len, bufpos, mblength);
411     + len += mblength;
412     + buflen -= mblength;
413     + bufpos += mblength;
414 cvsdist 1.1 +
415 ovasik 1.41 + if (!convfail && (wc == L'\n' || wc == wcdelim))
416     + break;
417     + }
418 twaugh 1.20 +
419 ovasik 1.41 + if (wc == WEOF)
420     + break;
421 twaugh 1.20 +
422 ovasik 1.41 + /* If the first field extends to the end of line (it is not
423     + delimited) and we are printing all non-delimited lines,
424     + print this one. */
425     + if (convfail || (!convfail && wc != wcdelim))
426     + {
427     + if (suppress_non_delimited)
428     + {
429     + /* Empty. */
430     + }
431     + else
432     + {
433     + fwrite (field_1_buffer, sizeof (char), len, stdout);
434     + /* Make sure the output line is newline terminated. */
435     + if (convfail || (!convfail && wc != L'\n'))
436     + putchar ('\n');
437     + }
438     + continue;
439     + }
440 twaugh 1.20 +
441 ovasik 1.41 + if (print_kth (1, NULL))
442     + {
443     + /* Print the field, but not the trailing delimiter. */
444     + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
445     + found_any_selected_field = 1;
446     + }
447     + ++field_idx;
448     + }
449 twaugh 1.20 +
450 ovasik 1.41 + if (wc != WEOF)
451     + {
452     + if (print_kth (field_idx, NULL))
453     + {
454     + if (found_any_selected_field)
455     + {
456     + fwrite (output_delimiter_string, sizeof (char),
457     + output_delimiter_length, stdout);
458     + }
459     + found_any_selected_field = 1;
460     + }
461 twaugh 1.20 +
462 ovasik 1.41 + while (1)
463     + {
464     + REFILL_BUFFER (buf, bufpos, buflen, stream);
465 twaugh 1.20 +
466 ovasik 1.41 + GET_NEXT_WC_FROM_BUFFER
467     + (wc, bufpos, buflen, mblength, state, convfail);
468 cvsdist 1.1 +
469 ovasik 1.41 + if (wc == WEOF)
470     + break;
471     + else if (!convfail && (wc == wcdelim || wc == L'\n'))
472     + {
473     + buflen -= mblength;
474     + bufpos += mblength;
475     + break;
476     + }
477 cvsdist 1.1 +
478 ovasik 1.41 + if (print_kth (field_idx, NULL))
479     + fwrite (bufpos, mblength, sizeof(char), stdout);
480 twaugh 1.20 +
481 ovasik 1.41 + buflen -= mblength;
482     + bufpos += mblength;
483     + }
484     + }
485 cvsdist 1.1 +
486 ovasik 1.41 + if ((!convfail || wc == L'\n') && buflen < 1)
487     + wc = WEOF;
488 cvsdist 1.1 +
489 ovasik 1.41 + if (!convfail && wc == wcdelim)
490     + ++field_idx;
491     + else if (wc == WEOF || (!convfail && wc == L'\n'))
492     + {
493     + if (found_any_selected_field
494     + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
495     + putchar ('\n');
496     + if (wc == WEOF)
497     + break;
498     + field_idx = 1;
499     + found_any_selected_field = 0;
500     + }
501     + }
502     +}
503     +#endif
504 cvsdist 1.1 +
505 ovasik 1.41 static void
506     cut_stream (FILE *stream)
507     {
508     - if (operating_mode == byte_mode)
509     - cut_bytes (stream);
510     +#if HAVE_MBRTOWC
511     + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
512     + {
513     + switch (operating_mode)
514     + {
515     + case byte_mode:
516     + if (byte_mode_character_aware)
517     + cut_characters_or_cut_bytes_no_split (stream);
518     + else
519     + cut_bytes (stream);
520     + break;
521 twaugh 1.20 +
522 ovasik 1.41 + case character_mode:
523     + cut_characters_or_cut_bytes_no_split (stream);
524     + break;
525 twaugh 1.20 +
526 ovasik 1.41 + case field_mode:
527     + cut_fields_mb (stream);
528     + break;
529 twaugh 1.20 +
530 ovasik 1.41 + default:
531     + abort ();
532     + }
533 twaugh 1.20 + }
534 ovasik 1.41 else
535     - cut_fields (stream);
536 cvsdist 1.1 +#endif
537 ovasik 1.41 + {
538     + if (operating_mode == field_mode)
539     + cut_fields (stream);
540     + else
541     + cut_bytes (stream);
542     + }
543     }
544 cvsdist 1.1
545 ovasik 1.41 /* Process file FILE to standard output.
546     @@ -757,6 +1075,8 @@ main (int argc, char **argv)
547     bool ok;
548     bool delim_specified = false;
549     char *spec_list_string IF_LINT(= NULL);
550     + char mbdelim[MB_LEN_MAX + 1];
551     + size_t delimlen = 0;
552 cvsdist 1.1
553 ovasik 1.41 initialize_main (&argc, &argv);
554     set_program_name (argv[0]);
555     @@ -779,7 +1099,6 @@ main (int argc, char **argv)
556     switch (optc)
557 ovasik 1.39 {
558 ovasik 1.41 case 'b':
559     - case 'c':
560     /* Build the byte list. */
561     if (operating_mode != undefined_mode)
562     FATAL_ERROR (_("only one type of list may be specified"));
563     @@ -787,6 +1106,14 @@ main (int argc, char **argv)
564     spec_list_string = optarg;
565     break;
566 cvsdist 1.1
567 ovasik 1.41 + case 'c':
568     + /* Build the character list. */
569     + if (operating_mode != undefined_mode)
570     + FATAL_ERROR (_("only one type of list may be specified"));
571     + operating_mode = character_mode;
572     + spec_list_string = optarg;
573     + break;
574     +
575     case 'f':
576     /* Build the field list. */
577     if (operating_mode != undefined_mode)
578     @@ -798,10 +1125,35 @@ main (int argc, char **argv)
579     case 'd':
580     /* New delimiter. */
581     /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
582     - if (optarg[0] != '\0' && optarg[1] != '\0')
583     - FATAL_ERROR (_("the delimiter must be a single character"));
584     - delim = optarg[0];
585     - delim_specified = true;
586     + {
587 ovasik 1.39 +#if HAVE_MBRTOWC
588 ovasik 1.41 + if(MB_CUR_MAX > 1)
589     + {
590     + mbstate_t state;
591     +
592     + memset (&state, '\0', sizeof(mbstate_t));
593     + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
594     +
595     + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
596     + ++force_singlebyte_mode;
597     + else
598     + {
599     + delimlen = (delimlen < 1) ? 1 : delimlen;
600     + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
601     + FATAL_ERROR (_("the delimiter must be a single character"));
602     + memcpy (mbdelim, optarg, delimlen);
603     + }
604     + }
605 ovasik 1.39 +
606 ovasik 1.41 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
607 ovasik 1.39 +#endif
608 ovasik 1.41 + {
609     + if (optarg[0] != '\0' && optarg[1] != '\0')
610     + FATAL_ERROR (_("the delimiter must be a single character"));
611     + delim = (unsigned char) optarg[0];
612     + }
613     + delim_specified = true;
614     + }
615     break;
616    
617     case OUTPUT_DELIMITER_OPTION:
618     @@ -814,6 +1166,7 @@ main (int argc, char **argv)
619     break;
620    
621     case 'n':
622     + byte_mode_character_aware = 1;
623 ovasik 1.39 break;
624    
625 ovasik 1.41 case 's':
626     @@ -836,7 +1189,7 @@ main (int argc, char **argv)
627     if (operating_mode == undefined_mode)
628     FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
629    
630     - if (delim != '\0' && operating_mode != field_mode)
631     + if (delim_specified && operating_mode != field_mode)
632     FATAL_ERROR (_("an input delimiter may be specified only\
633     when operating on fields"));
634 ovasik 1.25
635 ovasik 1.41 @@ -863,15 +1216,34 @@ main (int argc, char **argv)
636 ovasik 1.25 }
637    
638 ovasik 1.41 if (!delim_specified)
639     - delim = '\t';
640     + {
641     + delim = '\t';
642     +#ifdef HAVE_MBRTOWC
643     + wcdelim = L'\t';
644     + mbdelim[0] = '\t';
645     + mbdelim[1] = '\0';
646     + delimlen = 1;
647     +#endif
648     + }
649 ovasik 1.25
650 ovasik 1.41 if (output_delimiter_string == NULL)
651 ovasik 1.25 {
652 ovasik 1.41 - static char dummy[2];
653     - dummy[0] = delim;
654     - dummy[1] = '\0';
655     - output_delimiter_string = dummy;
656     - output_delimiter_length = 1;
657 ovasik 1.25 +#ifdef HAVE_MBRTOWC
658 ovasik 1.41 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
659     + {
660     + output_delimiter_string = xstrdup(mbdelim);
661     + output_delimiter_length = delimlen;
662     + }
663 ovasik 1.25 +
664 ovasik 1.41 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
665     +#endif
666     + {
667     + static char dummy[2];
668     + dummy[0] = delim;
669     + dummy[1] = '\0';
670     + output_delimiter_string = dummy;
671     + output_delimiter_length = 1;
672     + }
673     }
674    
675     if (optind == argc)
676     diff -urNp coreutils-8.0-orig/src/cut.c.orig coreutils-8.0/src/cut.c.orig
677     --- coreutils-8.0-orig/src/cut.c.orig 1970-01-01 01:00:00.000000000 +0100
678     +++ coreutils-8.0/src/cut.c.orig 2009-09-23 10:25:44.000000000 +0200
679     @@ -0,0 +1,893 @@
680     +/* cut - remove parts of lines of files
681     + Copyright (C) 1997-2009 Free Software Foundation, Inc.
682     + Copyright (C) 1984 David M. Ihnat
683 ovasik 1.25 +
684 ovasik 1.41 + This program is free software: you can redistribute it and/or modify
685     + it under the terms of the GNU General Public License as published by
686     + the Free Software Foundation, either version 3 of the License, or
687     + (at your option) any later version.
688 ovasik 1.25 +
689 ovasik 1.41 + This program is distributed in the hope that it will be useful,
690     + but WITHOUT ANY WARRANTY; without even the implied warranty of
691     + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
692     + GNU General Public License for more details.
693 ovasik 1.25 +
694 ovasik 1.41 + You should have received a copy of the GNU General Public License
695     + along with this program. If not, see <http://www.gnu.org/licenses/>. */
696 ovasik 1.25 +
697 ovasik 1.41 +/* Written by David Ihnat. */
698 ovasik 1.25 +
699 ovasik 1.41 +/* POSIX changes, bug fixes, long-named options, and cleanup
700     + by David MacKenzie <djm@gnu.ai.mit.edu>.
701 ovasik 1.25 +
702 ovasik 1.41 + Rewrite cut_fields and cut_bytes -- Jim Meyering. */
703 ovasik 1.25 +
704 ovasik 1.41 +#include <config.h>
705 ovasik 1.25 +
706 ovasik 1.41 +#include <stdio.h>
707     +#include <assert.h>
708     +#include <getopt.h>
709     +#include <sys/types.h>
710     +#include "system.h"
711     +
712     +#include "error.h"
713     +#include "getndelim2.h"
714     +#include "hash.h"
715     +#include "quote.h"
716     +#include "xstrndup.h"
717     +
718     +/* The official name of this program (e.g., no `g' prefix). */
719     +#define PROGRAM_NAME "cut"
720     +
721     +#define AUTHORS \
722     + proper_name ("David M. Ihnat"), \
723     + proper_name ("David MacKenzie"), \
724     + proper_name ("Jim Meyering")
725 ovasik 1.25 +
726 ovasik 1.41 +#define FATAL_ERROR(Message) \
727     + do \
728     + { \
729     + error (0, 0, (Message)); \
730     + usage (EXIT_FAILURE); \
731     + } \
732     + while (0)
733 ovasik 1.25 +
734 ovasik 1.41 +/* Append LOW, HIGH to the list RP of range pairs, allocating additional
735     + space if necessary. Update local variable N_RP. When allocating,
736     + update global variable N_RP_ALLOCATED. */
737     +
738     +#define ADD_RANGE_PAIR(rp, low, high) \
739     + do \
740     + { \
741     + if (low == 0 || high == 0) \
742     + FATAL_ERROR (_("fields and positions are numbered from 1")); \
743     + if (n_rp >= n_rp_allocated) \
744     + { \
745     + (rp) = X2NREALLOC (rp, &n_rp_allocated); \
746     + } \
747     + rp[n_rp].lo = (low); \
748     + rp[n_rp].hi = (high); \
749     + ++n_rp; \
750     + } \
751     + while (0)
752 twaugh 1.20 +
753 ovasik 1.41 +struct range_pair
754     + {
755     + size_t lo;
756     + size_t hi;
757     + };
758     +
759     +/* This buffer is used to support the semantics of the -s option
760     + (or lack of same) when the specified field list includes (does
761     + not include) the first field. In both of those cases, the entire
762     + first field must be read into this buffer to determine whether it
763     + is followed by a delimiter or a newline before any of it may be
764     + output. Otherwise, cut_fields can do the job without using this
765     + buffer. */
766     +static char *field_1_buffer;
767     +
768     +/* The number of bytes allocated for FIELD_1_BUFFER. */
769     +static size_t field_1_bufsize;
770     +
771     +/* The largest field or byte index used as an endpoint of a closed
772     + or degenerate range specification; this doesn't include the starting
773     + index of right-open-ended ranges. For example, with either range spec
774     + `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
775     +static size_t max_range_endpoint;
776     +
777     +/* If nonzero, this is the index of the first field in a range that goes
778     + to end of line. */
779     +static size_t eol_range_start;
780     +
781     +/* This is a bit vector.
782     + In byte mode, which bytes to output.
783     + In field mode, which DELIM-separated fields to output.
784     + Both bytes and fields are numbered starting with 1,
785     + so the zeroth bit of this array is unused.
786     + A field or byte K has been selected if
787     + (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
788     + || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
789     +static unsigned char *printable_field;
790 twaugh 1.20 +
791 ovasik 1.41 +enum operating_mode
792     + {
793     + undefined_mode,
794 twaugh 1.20 +
795 ovasik 1.41 + /* Output characters that are in the given bytes. */
796     + byte_mode,
797 twaugh 1.20 +
798 ovasik 1.41 + /* Output the given delimeter-separated fields. */
799     + field_mode
800     + };
801 twaugh 1.20 +
802 ovasik 1.41 +static enum operating_mode operating_mode;
803 twaugh 1.20 +
804 ovasik 1.41 +/* If true do not output lines containing no delimeter characters.
805     + Otherwise, all such lines are printed. This option is valid only
806     + with field mode. */
807     +static bool suppress_non_delimited;
808     +
809     +/* If nonzero, print all bytes, characters, or fields _except_
810     + those that were specified. */
811     +static bool complement;
812     +
813     +/* The delimeter character for field mode. */
814     +static unsigned char delim;
815     +
816     +/* True if the --output-delimiter=STRING option was specified. */
817     +static bool output_delimiter_specified;
818     +
819     +/* The length of output_delimiter_string. */
820     +static size_t output_delimiter_length;
821     +
822     +/* The output field separator string. Defaults to the 1-character
823     + string consisting of the input delimiter. */
824     +static char *output_delimiter_string;
825     +
826     +/* True if we have ever read standard input. */
827     +static bool have_read_stdin;
828     +
829     +#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
830     +
831     +/* The set of range-start indices. For example, given a range-spec list like
832     + `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
833     + Note that although `4' looks like a range-start index, it is in the middle
834     + of the `3-5' range, so it doesn't count.
835     + This table is created/used IFF output_delimiter_specified is set. */
836     +static Hash_table *range_start_ht;
837     +
838     +/* For long options that have no equivalent short option, use a
839     + non-character as a pseudo short option, starting with CHAR_MAX + 1. */
840     +enum
841     +{
842     + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
843     + COMPLEMENT_OPTION
844     +};
845 twaugh 1.20 +
846 ovasik 1.41 +static struct option const longopts[] =
847     +{
848     + {"bytes", required_argument, NULL, 'b'},
849     + {"characters", required_argument, NULL, 'c'},
850     + {"fields", required_argument, NULL, 'f'},
851     + {"delimiter", required_argument, NULL, 'd'},
852     + {"only-delimited", no_argument, NULL, 's'},
853     + {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
854     + {"complement", no_argument, NULL, COMPLEMENT_OPTION},
855     + {GETOPT_HELP_OPTION_DECL},
856     + {GETOPT_VERSION_OPTION_DECL},
857     + {NULL, 0, NULL, 0}
858     +};
859 twaugh 1.20 +
860 ovasik 1.41 +void
861     +usage (int status)
862 cvsdist 1.1 +{
863 ovasik 1.41 + if (status != EXIT_SUCCESS)
864     + fprintf (stderr, _("Try `%s --help' for more information.\n"),
865     + program_name);
866     + else
867     + {
868     + printf (_("\
869     +Usage: %s OPTION... [FILE]...\n\
870     +"),
871     + program_name);
872     + fputs (_("\
873     +Print selected parts of lines from each FILE to standard output.\n\
874     +\n\
875     +"), stdout);
876     + fputs (_("\
877     +Mandatory arguments to long options are mandatory for short options too.\n\
878     +"), stdout);
879     + fputs (_("\
880     + -b, --bytes=LIST select only these bytes\n\
881     + -c, --characters=LIST select only these characters\n\
882     + -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
883     +"), stdout);
884     + fputs (_("\
885     + -f, --fields=LIST select only these fields; also print any line\n\
886     + that contains no delimiter character, unless\n\
887     + the -s option is specified\n\
888     + -n (ignored)\n\
889     +"), stdout);
890     + fputs (_("\
891     + --complement complement the set of selected bytes, characters\n\
892     + or fields\n\
893     +"), stdout);
894     + fputs (_("\
895     + -s, --only-delimited do not print lines not containing delimiters\n\
896     + --output-delimiter=STRING use STRING as the output delimiter\n\
897     + the default is to use the input delimiter\n\
898     +"), stdout);
899     + fputs (HELP_OPTION_DESCRIPTION, stdout);
900     + fputs (VERSION_OPTION_DESCRIPTION, stdout);
901     + fputs (_("\
902     +\n\
903     +Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
904     +range, or many ranges separated by commas. Selected input is written\n\
905     +in the same order that it is read, and is written exactly once.\n\
906     +"), stdout);
907     + fputs (_("\
908     +Each range is one of:\n\
909     +\n\
910     + N N'th byte, character or field, counted from 1\n\
911     + N- from N'th byte, character or field, to end of line\n\
912     + N-M from N'th to M'th (included) byte, character or field\n\
913     + -M from first to M'th (included) byte, character or field\n\
914     +\n\
915     +With no FILE, or when FILE is -, read standard input.\n\
916     +"), stdout);
917     + emit_ancillary_info ();
918     + }
919     + exit (status);
920     +}
921 cvsdist 1.1 +
922 ovasik 1.41 +static inline void
923     +mark_range_start (size_t i)
924     +{
925     + /* Record the fact that `i' is a range-start index. */
926     + void *ent_from_table = hash_insert (range_start_ht, (void*) i);
927     + if (ent_from_table == NULL)
928 cvsdist 1.1 + {
929 ovasik 1.41 + /* Insertion failed due to lack of memory. */
930     + xalloc_die ();
931     + }
932     + assert ((size_t) ent_from_table == i);
933     +}
934 cvsdist 1.1 +
935 ovasik 1.41 +static inline void
936     +mark_printable_field (size_t i)
937     +{
938     + size_t n = i / CHAR_BIT;
939     + printable_field[n] |= (1 << (i % CHAR_BIT));
940     +}
941 cvsdist 1.1 +
942 ovasik 1.41 +static inline bool
943     +is_printable_field (size_t i)
944     +{
945     + size_t n = i / CHAR_BIT;
946     + return (printable_field[n] >> (i % CHAR_BIT)) & 1;
947     +}
948 cvsdist 1.1 +
949 ovasik 1.41 +static size_t
950     +hash_int (const void *x, size_t tablesize)
951     +{
952     +#ifdef UINTPTR_MAX
953     + uintptr_t y = (uintptr_t) x;
954     +#else
955     + size_t y = (size_t) x;
956     +#endif
957     + return y % tablesize;
958     +}
959 cvsdist 1.1 +
960 ovasik 1.41 +static bool
961     +hash_compare_ints (void const *x, void const *y)
962     +{
963     + return (x == y) ? true : false;
964     +}
965 cvsdist 1.1 +
966 ovasik 1.41 +static bool
967     +is_range_start_index (size_t i)
968     +{
969     + return hash_lookup (range_start_ht, (void *) i) ? true : false;
970 cvsdist 1.1 +}
971     +
972 ovasik 1.41 +/* Return nonzero if the K'th field or byte is printable.
973     + When returning nonzero, if RANGE_START is non-NULL,
974     + set *RANGE_START to true if K is the beginning of a range, and to
975     + false otherwise. */
976 twaugh 1.20 +
977 ovasik 1.41 +static bool
978     +print_kth (size_t k, bool *range_start)
979     +{
980     + bool k_selected
981     + = ((0 < eol_range_start && eol_range_start <= k)
982     + || (k <= max_range_endpoint && is_printable_field (k)));
983     +
984     + bool is_selected = k_selected ^ complement;
985     + if (range_start && is_selected)
986     + *range_start = is_range_start_index (k);
987 twaugh 1.20 +
988 ovasik 1.41 + return is_selected;
989     +}
990 twaugh 1.20 +
991 ovasik 1.41 +/* Comparison function for qsort to order the list of
992     + struct range_pairs. */
993     +static int
994     +compare_ranges (const void *a, const void *b)
995     +{
996     + int a_start = ((const struct range_pair *) a)->lo;
997     + int b_start = ((const struct range_pair *) b)->lo;
998     + return a_start < b_start ? -1 : a_start > b_start;
999     +}
1000 twaugh 1.20 +
1001 ovasik 1.41 +/* Given the list of field or byte range specifications FIELDSTR, set
1002     + MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
1003     + array. If there is a right-open-ended range, set EOL_RANGE_START
1004     + to its starting index. FIELDSTR should be composed of one or more
1005     + numbers or ranges of numbers, separated by blanks or commas.
1006     + Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
1007     + through end of line. Return true if FIELDSTR contains at least
1008     + one field specification, false otherwise. */
1009     +
1010     +/* FIXME-someday: What if the user wants to cut out the 1,000,000-th
1011     + field of some huge input file? This function shouldn't have to
1012     + allocate a table of a million bits just so we can test every
1013     + field < 10^6 with an array dereference. Instead, consider using
1014     + an adaptive approach: if the range of selected fields is too large,
1015     + but only a few fields/byte-offsets are actually selected, use a
1016     + hash table. If the range of selected fields is too large, and
1017     + too many are selected, then resort to using the range-pairs (the
1018     + `rp' array) directly. */
1019 twaugh 1.20 +
1020 ovasik 1.41 +static bool
1021     +set_fields (const char *fieldstr)
1022 twaugh 1.20 +{
1023 ovasik 1.41 + size_t initial = 1; /* Value of first number in a range. */
1024     + size_t value = 0; /* If nonzero, a number being accumulated. */
1025     + bool lhs_specified = false;
1026     + bool rhs_specified = false;
1027     + bool dash_found = false; /* True if a '-' is found in this field. */
1028     + bool field_found = false; /* True if at least one field spec
1029     + has been processed. */
1030     +
1031     + struct range_pair *rp = NULL;
1032     + size_t n_rp = 0;
1033     + size_t n_rp_allocated = 0;
1034     + size_t i;
1035     + bool in_digits = false;
1036 twaugh 1.20 +
1037 ovasik 1.41 + /* Collect and store in RP the range end points.
1038     + It also sets EOL_RANGE_START if appropriate. */
1039 twaugh 1.20 +
1040 ovasik 1.41 + for (;;)
1041 twaugh 1.20 + {
1042 ovasik 1.41 + if (*fieldstr == '-')
1043     + {
1044     + in_digits = false;
1045     + /* Starting a range. */
1046     + if (dash_found)
1047     + FATAL_ERROR (_("invalid byte or field list"));
1048     + dash_found = true;
1049     + fieldstr++;
1050 twaugh 1.20 +
1051 ovasik 1.41 + initial = (lhs_specified ? value : 1);
1052     + value = 0;
1053     + }
1054     + else if (*fieldstr == ',' ||
1055     + isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
1056 ovasik 1.39 + {
1057 ovasik 1.41 + in_digits = false;
1058     + /* Ending the string, or this field/byte sublist. */
1059     + if (dash_found)
1060     + {
1061     + dash_found = false;
1062 twaugh 1.20 +
1063 ovasik 1.41 + if (!lhs_specified && !rhs_specified)
1064     + FATAL_ERROR (_("invalid range with no endpoint: -"));
1065 ovasik 1.39 +
1066 ovasik 1.41 + /* A range. Possibilities: -n, m-n, n-.
1067     + In any case, `initial' contains the start of the range. */
1068     + if (!rhs_specified)
1069     + {
1070     + /* `n-'. From `initial' to end of line. */
1071     + eol_range_start = initial;
1072     + field_found = true;
1073     + }
1074     + else
1075 ovasik 1.39 + {
1076 ovasik 1.41 + /* `m-n' or `-n' (1-n). */
1077     + if (value < initial)
1078     + FATAL_ERROR (_("invalid decreasing range"));
1079 ovasik 1.39 +
1080 ovasik 1.41 + /* Is there already a range going to end of line? */
1081     + if (eol_range_start != 0)
1082 ovasik 1.39 + {
1083 ovasik 1.41 + /* Yes. Is the new sequence already contained
1084     + in the old one? If so, no processing is
1085     + necessary. */
1086     + if (initial < eol_range_start)
1087     + {
1088     + /* No, the new sequence starts before the
1089     + old. Does the old range going to end of line
1090     + extend into the new range? */
1091     + if (eol_range_start <= value)
1092     + {
1093     + /* Yes. Simply move the end of line marker. */
1094     + eol_range_start = initial;
1095     + }
1096     + else
1097     + {
1098     + /* No. A simple range, before and disjoint from
1099     + the range going to end of line. Fill it. */
1100     + ADD_RANGE_PAIR (rp, initial, value);
1101     + }
1102 ovasik 1.39 +
1103 ovasik 1.41 + /* In any case, some fields were selected. */
1104     + field_found = true;
1105     + }
1106 ovasik 1.39 + }
1107     + else
1108 ovasik 1.41 + {
1109     + /* There is no range going to end of line. */
1110     + ADD_RANGE_PAIR (rp, initial, value);
1111     + field_found = true;
1112     + }
1113     + value = 0;
1114 ovasik 1.39 + }
1115 ovasik 1.41 + }
1116     + else
1117     + {
1118     + /* A simple field number, not a range. */
1119     + ADD_RANGE_PAIR (rp, value, value);
1120     + value = 0;
1121     + field_found = true;
1122     + }
1123     +
1124     + if (*fieldstr == '\0')
1125     + {
1126     + break;
1127     + }
1128     +
1129     + fieldstr++;
1130     + lhs_specified = false;
1131     + rhs_specified = false;
1132     + }
1133     + else if (ISDIGIT (*fieldstr))
1134     + {
1135     + /* Record beginning of digit string, in case we have to
1136     + complain about it. */
1137     + static char const *num_start;
1138     + if (!in_digits || !num_start)
1139     + num_start = fieldstr;
1140     + in_digits = true;
1141     +
1142     + if (dash_found)
1143     + rhs_specified = 1;
1144     + else
1145     + lhs_specified = 1;
1146     +
1147     + /* Detect overflow. */
1148     + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
1149     + {
1150     + /* In case the user specified -c$(echo 2^64|bc),22,
1151     + complain only about the first number. */
1152     + /* Determine the length of the offending number. */
1153     + size_t len = strspn (num_start, "0123456789");
1154     + char *bad_num = xstrndup (num_start, len);
1155     + if (operating_mode == byte_mode)
1156     + error (0, 0,
1157     + _("byte offset %s is too large"), quote (bad_num));
1158 ovasik 1.39 + else
1159 ovasik 1.41 + error (0, 0,
1160     + _("field number %s is too large"), quote (bad_num));
1161     + free (bad_num);
1162     + exit (EXIT_FAILURE);
1163 ovasik 1.39 + }
1164 ovasik 1.41 +
1165     + fieldstr++;
1166 ovasik 1.39 + }
1167 ovasik 1.41 + else
1168     + FATAL_ERROR (_("invalid byte or field list"));
1169 twaugh 1.20 + }
1170     +
1171 ovasik 1.41 + max_range_endpoint = 0;
1172     + for (i = 0; i < n_rp; i++)
1173     + {
1174     + if (rp[i].hi > max_range_endpoint)
1175     + max_range_endpoint = rp[i].hi;
1176     + }
1177 twaugh 1.20 +
1178 ovasik 1.41 + /* Allocate an array large enough so that it may be indexed by
1179     + the field numbers corresponding to all finite ranges
1180     + (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
1181 twaugh 1.20 +
1182 ovasik 1.41 + printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
1183 twaugh 1.20 +
1184 ovasik 1.41 + qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
1185 twaugh 1.20 +
1186 ovasik 1.41 + /* Set the array entries corresponding to integers in the ranges of RP. */
1187     + for (i = 0; i < n_rp; i++)
1188 twaugh 1.20 + {
1189 ovasik 1.41 + size_t j;
1190     + size_t rsi_candidate;
1191     +
1192     + /* Record the range-start indices, i.e., record each start
1193     + index that is not part of any other (lo..hi] range. */
1194     + rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
1195     + if (output_delimiter_specified
1196     + && !is_printable_field (rsi_candidate))
1197     + mark_range_start (rsi_candidate);
1198     +
1199     + for (j = rp[i].lo; j <= rp[i].hi; j++)
1200     + mark_printable_field (j);
1201 twaugh 1.20 + }
1202     +
1203 ovasik 1.41 + if (output_delimiter_specified
1204     + && !complement
1205     + && eol_range_start && !is_printable_field (eol_range_start))
1206     + mark_range_start (eol_range_start);
1207     +
1208     + free (rp);
1209     +
1210     + return field_found;
1211     +}
1212     +
1213     +/* Read from stream STREAM, printing to standard output any selected bytes. */
1214 twaugh 1.20 +
1215 ovasik 1.41 +static void
1216     +cut_bytes (FILE *stream)
1217     +{
1218     + size_t byte_idx; /* Number of bytes in the line so far. */
1219     + /* Whether to begin printing delimiters between ranges for the current line.
1220     + Set after we've begun printing data corresponding to the first range. */
1221     + bool print_delimiter;
1222 twaugh 1.20 +
1223 ovasik 1.41 + byte_idx = 0;
1224     + print_delimiter = false;
1225     + while (1)
1226     + {
1227     + int c; /* Each character from the file. */
1228     +
1229     + c = getc (stream);
1230     +
1231     + if (c == '\n')
1232     + {
1233     + putchar ('\n');
1234     + byte_idx = 0;
1235     + print_delimiter = false;
1236     + }
1237     + else if (c == EOF)
1238     + {
1239     + if (byte_idx > 0)
1240     + putchar ('\n');
1241     + break;
1242     + }
1243     + else
1244     + {
1245     + bool range_start;
1246     + bool *rs = output_delimiter_specified ? &range_start : NULL;
1247     + if (print_kth (++byte_idx, rs))
1248     + {
1249     + if (rs && *rs && print_delimiter)
1250     + {
1251     + fwrite (output_delimiter_string, sizeof (char),
1252     + output_delimiter_length, stdout);
1253     + }
1254     + print_delimiter = true;
1255     + putchar (c);
1256     + }
1257     + }
1258     + }
1259     +}
1260     +
1261     +/* Read from stream STREAM, printing to standard output any selected fields. */
1262     +
1263     +static void
1264     +cut_fields (FILE *stream)
1265     +{
1266     + int c;
1267     + size_t field_idx = 1;
1268     + bool found_any_selected_field = false;
1269     + bool buffer_first_field;
1270     +
1271     + c = getc (stream);
1272     + if (c == EOF)
1273     + return;
1274     +
1275     + ungetc (c, stream);
1276     +
1277     + /* To support the semantics of the -s flag, we may have to buffer
1278     + all of the first field to determine whether it is `delimited.'
1279     + But that is unnecessary if all non-delimited lines must be printed
1280     + and the first field has been selected, or if non-delimited lines
1281     + must be suppressed and the first field has *not* been selected.
1282     + That is because a non-delimited line has exactly one field. */
1283     + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
1284     +
1285     + while (1)
1286     + {
1287     + if (field_idx == 1 && buffer_first_field)
1288     + {
1289     + ssize_t len;
1290     + size_t n_bytes;
1291     +
1292     + len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
1293     + GETNLINE_NO_LIMIT, delim, '\n', stream);
1294     + if (len < 0)
1295     + {
1296     + free (field_1_buffer);
1297     + field_1_buffer = NULL;
1298     + if (ferror (stream) || feof (stream))
1299     + break;
1300     + xalloc_die ();
1301     + }
1302     +
1303     + n_bytes = len;
1304     + assert (n_bytes != 0);
1305     +
1306     + /* If the first field extends to the end of line (it is not
1307     + delimited) and we are printing all non-delimited lines,
1308     + print this one. */
1309     + if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
1310     + {
1311     + if (suppress_non_delimited)
1312     + {
1313     + /* Empty. */
1314     + }
1315     + else
1316     + {
1317     + fwrite (field_1_buffer, sizeof (char), n_by