Parent Directory
|
Revision Log
fix sort -h for multibyte locales (reported via http://bugs.archlinux.org/task/16022)
| 1 | ovasik | 1.31 | diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut |
| 2 | --- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200 | ||
| 3 | +++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200 | ||
| 4 | @@ -26,7 +26,7 @@ | ||
| 5 | my $prog = 'cut'; | ||
| 6 | my $try = "Try \`$prog --help' for more information.\n"; | ||
| 7 | my $from_1 = "$prog: fields and positions are numbered from 1\n$try"; | ||
| 8 | -my $inval = "$prog: invalid byte or field list\n$try"; | ||
| 9 | +my $inval = "$prog: invalid byte, character or field list\n$try"; | ||
| 10 | my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; | ||
| 11 | |||
| 12 | my @Tests = | ||
| 13 | @@ -140,8 +140,8 @@ | ||
| 14 | ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}], | ||
| 15 | |||
| 16 | # None of the following invalid ranges provoked an error up to coreutils-6.9. | ||
| 17 | - ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, | ||
| 18 | - {ERR=>"$prog: invalid decreasing range\n$try"}], | ||
| 19 | + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, | ||
| 20 | + {ERR=>"$prog: invalid byte, character or field list\n$try"}], | ||
| 21 | ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], | ||
| 22 | ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], | ||
| 23 | ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], | ||
| 24 | twaugh | 1.21 | --- /dev/null 2007-03-01 09:16:39.219409909 +0000 |
| 25 | ovasik | 1.26 | +++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000 |
| 26 | twaugh | 1.20 | @@ -0,0 +1,58 @@ |
| 27 | +#! /bin/sh | ||
| 28 | +case $# in | ||
| 29 | ovasik | 1.26 | + 0) xx='../src/sort';; |
| 30 | twaugh | 1.20 | + *) xx="$1";; |
| 31 | +esac | ||
| 32 | +test "$VERBOSE" && echo=echo || echo=: | ||
| 33 | +$echo testing program: $xx | ||
| 34 | +errors=0 | ||
| 35 | +test "$srcdir" || srcdir=. | ||
| 36 | +test "$VERBOSE" && $xx --version 2> /dev/null | ||
| 37 | + | ||
| 38 | +export LC_ALL=en_US.UTF-8 | ||
| 39 | +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 | ||
| 40 | +errors=0 | ||
| 41 | + | ||
| 42 | ovasik | 1.26 | +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O |
| 43 | twaugh | 1.20 | +code=$? |
| 44 | +if test $code != 0; then | ||
| 45 | + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 | ||
| 46 | + errors=`expr $errors + 1` | ||
| 47 | +else | ||
| 48 | ovasik | 1.26 | + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 |
| 49 | twaugh | 1.20 | + case $? in |
| 50 | + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; | ||
| 51 | ovasik | 1.26 | + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 |
| 52 | + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null | ||
| 53 | twaugh | 1.20 | + errors=`expr $errors + 1`;; |
| 54 | + 2) $echo "Test mb1 may have failed." 1>&2 | ||
| 55 | ovasik | 1.26 | + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 |
| 56 | twaugh | 1.20 | + errors=`expr $errors + 1`;; |
| 57 | + esac | ||
| 58 | +fi | ||
| 59 | + | ||
| 60 | ovasik | 1.26 | +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O |
| 61 | twaugh | 1.20 | +code=$? |
| 62 | +if test $code != 0; then | ||
| 63 | + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 | ||
| 64 | + errors=`expr $errors + 1` | ||
| 65 | +else | ||
| 66 | ovasik | 1.26 | + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 |
| 67 | twaugh | 1.20 | + case $? in |
| 68 | + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; | ||
| 69 | ovasik | 1.26 | + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 |
| 70 | + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null | ||
| 71 | twaugh | 1.20 | + errors=`expr $errors + 1`;; |
| 72 | + 2) $echo "Test mb2 may have failed." 1>&2 | ||
| 73 | ovasik | 1.26 | + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 |
| 74 | twaugh | 1.20 | + errors=`expr $errors + 1`;; |
| 75 | + esac | ||
| 76 | +fi | ||
| 77 | + | ||
| 78 | +if test $errors = 0; then | ||
| 79 | + $echo Passed all 113 tests. 1>&2 | ||
| 80 | +else | ||
| 81 | + $echo Failed $errors tests. 1>&2 | ||
| 82 | +fi | ||
| 83 | +test $errors = 0 || errors=1 | ||
| 84 | +exit $errors | ||
| 85 | twaugh | 1.21 | --- /dev/null 2007-03-01 09:16:39.219409909 +0000 |
| 86 | ovasik | 1.26 | +++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000 |
| 87 | twaugh | 1.20 | @@ -0,0 +1,4 @@ |
| 88 | +Apple@AA10@@20 | ||
| 89 | +Banana@AA5@@30 | ||
| 90 | +Citrus@AA20@@5 | ||
| 91 | +Cherry@AA30@@10 | ||
| 92 | twaugh | 1.21 | --- /dev/null 2007-03-01 09:16:39.219409909 +0000 |
| 93 | ovasik | 1.26 | +++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000 |
| 94 | twaugh | 1.20 | @@ -0,0 +1,4 @@ |
| 95 | +Citrus@AA20@@5 | ||
| 96 | +Cherry@AA30@@10 | ||
| 97 | +Apple@AA10@@20 | ||
| 98 | +Banana@AA5@@30 | ||
| 99 | twaugh | 1.21 | --- /dev/null 2007-03-01 09:16:39.219409909 +0000 |
| 100 | ovasik | 1.26 | +++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000 |
| 101 | twaugh | 1.20 | @@ -0,0 +1,4 @@ |
| 102 | +Apple@10 | ||
| 103 | +Banana@5 | ||
| 104 | +Citrus@20 | ||
| 105 | +Cherry@30 | ||
| 106 | twaugh | 1.21 | --- /dev/null 2007-03-01 09:16:39.219409909 +0000 |
| 107 | ovasik | 1.26 | +++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000 |
| 108 | twaugh | 1.20 | @@ -0,0 +1,4 @@ |
| 109 | +Banana@5 | ||
| 110 | +Apple@10 | ||
| 111 | +Citrus@20 | ||
| 112 | +Cherry@30 | ||
| 113 | ovasik | 1.26 | diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am |
| 114 | --- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200 | ||
| 115 | +++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200 | ||
| 116 | ovasik | 1.28 | @@ -192,6 +192,7 @@ |
| 117 | ovasik | 1.26 | misc/sort-compress \ |
| 118 | ovasik | 1.33 | misc/sort-continue \ |
| 119 | ovasik | 1.28 | misc/sort-files0-from \ |
| 120 | ovasik | 1.26 | + misc/sort-mb-tests \ |
| 121 | misc/sort-merge \ | ||
| 122 | ovasik | 1.33 | misc/sort-merge-fdlimit \ |
| 123 | ovasik | 1.26 | misc/sort-rand \ |
| 124 | @@ -391,6 +392,10 @@ | ||
| 125 | $(root_tests) | ||
| 126 | |||
| 127 | pr_data = \ | ||
| 128 | + misc/mb1.X \ | ||
| 129 | + misc/mb1.I \ | ||
| 130 | + misc/mb2.X \ | ||
| 131 | + misc/mb2.I \ | ||
| 132 | pr/0F \ | ||
| 133 | pr/0FF \ | ||
| 134 | pr/0FFnt \ | ||
| 135 | twaugh | 1.21 | --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100 |
| 136 | +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000 | ||
| 137 | twaugh | 1.20 | @@ -22,6 +22,11 @@ |
| 138 | |||
| 139 | # include <stdio.h> | ||
| 140 | |||
| 141 | +/* Get mbstate_t. */ | ||
| 142 | +# if HAVE_WCHAR_H | ||
| 143 | +# include <wchar.h> | ||
| 144 | +# endif | ||
| 145 | + | ||
| 146 | /* A `struct linebuffer' holds a line of text. */ | ||
| 147 | |||
| 148 | struct linebuffer | ||
| 149 | @@ -29,6 +34,9 @@ | ||
| 150 | size_t size; /* Allocated. */ | ||
| 151 | size_t length; /* Used. */ | ||
| 152 | char *buffer; | ||
| 153 | +# if HAVE_WCHAR_H | ||
| 154 | + mbstate_t state; | ||
| 155 | +# endif | ||
| 156 | }; | ||
| 157 | |||
| 158 | /* Initialize linebuffer LINEBUFFER for use. */ | ||
| 159 | twaugh | 1.21 | --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000 |
| 160 | +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000 | ||
| 161 | twaugh | 1.16 | @@ -38,11 +38,28 @@ |
| 162 | #include <stdio.h> | ||
| 163 | cvsdist | 1.1 | #include <getopt.h> |
| 164 | #include <sys/types.h> | ||
| 165 | + | ||
| 166 | twaugh | 1.16 | +/* Get mbstate_t, mbrtowc(), wcwidth(). */ |
| 167 | cvsdist | 1.1 | +#if HAVE_WCHAR_H |
| 168 | +# include <wchar.h> | ||
| 169 | +#endif | ||
| 170 | twaugh | 1.16 | + |
| 171 | cvsdist | 1.1 | #include "system.h" |
| 172 | #include "error.h" | ||
| 173 | #include "quote.h" | ||
| 174 | #include "xstrndup.h" | ||
| 175 | |||
| 176 | +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC | ||
| 177 | twaugh | 1.16 | + installation; work around this configuration error. */ |
| 178 | cvsdist | 1.1 | +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 |
| 179 | +# define MB_LEN_MAX 16 | ||
| 180 | +#endif | ||
| 181 | + | ||
| 182 | +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ | ||
| 183 | +#if HAVE_MBRTOWC && defined mbstate_t | ||
| 184 | +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) | ||
| 185 | +#endif | ||
| 186 | + | ||
| 187 | /* The official name of this program (e.g., no `g' prefix). */ | ||
| 188 | twaugh | 1.16 | #define PROGRAM_NAME "expand" |
| 189 | cvsdist | 1.1 | |
| 190 | twaugh | 1.20 | @@ -183,6 +200,7 @@ |
| 191 | twaugh | 1.16 | stops = num_start + len - 1; |
| 192 | } | ||
| 193 | } | ||
| 194 | cvsdist | 1.1 | + |
| 195 | else | ||
| 196 | twaugh | 1.16 | { |
| 197 | error (0, 0, _("tab size contains invalid character(s): %s"), | ||
| 198 | twaugh | 1.20 | @@ -365,6 +383,142 @@ |
| 199 | cvsdist | 1.1 | } |
| 200 | } | ||
| 201 | |||
| 202 | +#if HAVE_MBRTOWC | ||
| 203 | +static void | ||
| 204 | twaugh | 1.16 | +expand_multibyte (void) |
| 205 | cvsdist | 1.1 | +{ |
| 206 | twaugh | 1.16 | + FILE *fp; /* Input strem. */ |
| 207 | + mbstate_t i_state; /* Current shift state of the input stream. */ | ||
| 208 | + mbstate_t i_state_bak; /* Back up the I_STATE. */ | ||
| 209 | + mbstate_t o_state; /* Current shift state of the output stream. */ | ||
| 210 | cvsdist | 1.1 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ |
| 211 | twaugh | 1.16 | + char *bufpos; /* Next read position of BUF. */ |
| 212 | + size_t buflen = 0; /* The length of the byte sequence in buf. */ | ||
| 213 | + wchar_t wc; /* A gotten wide character. */ | ||
| 214 | + size_t mblength; /* The byte size of a multibyte character | ||
| 215 | + which shows as same character as WC. */ | ||
| 216 | + int tab_index = 0; /* Index in `tab_list' of next tabstop. */ | ||
| 217 | + int column = 0; /* Column on screen of the next char. */ | ||
| 218 | + int next_tab_column; /* Column the next tab stop is on. */ | ||
| 219 | + int convert = 1; /* If nonzero, perform translations. */ | ||
| 220 | + | ||
| 221 | + fp = next_file ((FILE *) NULL); | ||
| 222 | + if (fp == NULL) | ||
| 223 | + return; | ||
| 224 | cvsdist | 1.1 | + |
| 225 | twaugh | 1.16 | + memset (&o_state, '\0', sizeof(mbstate_t)); |
| 226 | + memset (&i_state, '\0', sizeof(mbstate_t)); | ||
| 227 | cvsdist | 1.1 | + |
| 228 | twaugh | 1.16 | + for (;;) |
| 229 | cvsdist | 1.1 | + { |
| 230 | twaugh | 1.16 | + /* Refill the buffer BUF. */ |
| 231 | + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) | ||
| 232 | cvsdist | 1.1 | + { |
| 233 | twaugh | 1.16 | + memmove (buf, bufpos, buflen); |
| 234 | + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); | ||
| 235 | + bufpos = buf; | ||
| 236 | cvsdist | 1.1 | + } |
| 237 | twaugh | 1.16 | + |
| 238 | + /* No character is left in BUF. */ | ||
| 239 | + if (buflen < 1) | ||
| 240 | cvsdist | 1.1 | + { |
| 241 | twaugh | 1.16 | + fp = next_file (fp); |
| 242 | + | ||
| 243 | + if (fp == NULL) | ||
| 244 | + break; /* No more files. */ | ||
| 245 | + else | ||
| 246 | + { | ||
| 247 | + memset (&i_state, '\0', sizeof(mbstate_t)); | ||
| 248 | + continue; | ||
| 249 | + } | ||
| 250 | cvsdist | 1.1 | + } |
| 251 | + | ||
| 252 | twaugh | 1.16 | + /* Get a wide character. */ |
| 253 | + i_state_bak = i_state; | ||
| 254 | + mblength = mbrtowc (&wc, bufpos, buflen, &i_state); | ||
| 255 | cvsdist | 1.1 | + |
| 256 | twaugh | 1.16 | + switch (mblength) |
| 257 | cvsdist | 1.1 | + { |
| 258 | twaugh | 1.16 | + case (size_t)-1: /* illegal byte sequence. */ |
| 259 | + case (size_t)-2: | ||
| 260 | + mblength = 1; | ||
| 261 | + i_state = i_state_bak; | ||
| 262 | + if (convert) | ||
| 263 | cvsdist | 1.1 | + { |
| 264 | twaugh | 1.16 | + ++column; |
| 265 | + if (convert_entire_line == 0) | ||
| 266 | + convert = 0; | ||
| 267 | + } | ||
| 268 | + putchar (*bufpos); | ||
| 269 | + break; | ||
| 270 | cvsdist | 1.1 | + |
| 271 | twaugh | 1.16 | + case 0: /* null. */ |
| 272 | + mblength = 1; | ||
| 273 | + if (convert && convert_entire_line == 0) | ||
| 274 | + convert = 0; | ||
| 275 | + putchar ('\0'); | ||
| 276 | + break; | ||
| 277 | cvsdist | 1.1 | + |
| 278 | twaugh | 1.16 | + default: |
| 279 | + if (wc == L'\n') /* LF. */ | ||
| 280 | + { | ||
| 281 | + tab_index = 0; | ||
| 282 | + column = 0; | ||
| 283 | + convert = 1; | ||
| 284 | + putchar ('\n'); | ||
| 285 | cvsdist | 1.1 | + } |
| 286 | twaugh | 1.16 | + else if (wc == L'\t' && convert) /* Tab. */ |
| 287 | cvsdist | 1.1 | + { |
| 288 | twaugh | 1.16 | + if (tab_size == 0) |
| 289 | cvsdist | 1.1 | + { |
| 290 | twaugh | 1.16 | + /* Do not let tab_index == first_free_tab; |
| 291 | + stop when it is 1 less. */ | ||
| 292 | + while (tab_index < first_free_tab - 1 | ||
| 293 | + && column >= tab_list[tab_index]) | ||
| 294 | + tab_index++; | ||
| 295 | + next_tab_column = tab_list[tab_index]; | ||
| 296 | + if (tab_index < first_free_tab - 1) | ||
| 297 | + tab_index++; | ||
| 298 | + if (column >= next_tab_column) | ||
| 299 | + next_tab_column = column + 1; | ||
| 300 | cvsdist | 1.1 | + } |
| 301 | + else | ||
| 302 | twaugh | 1.16 | + next_tab_column = column + tab_size - column % tab_size; |
| 303 | + | ||
| 304 | + while (column < next_tab_column) | ||
| 305 | cvsdist | 1.1 | + { |
| 306 | twaugh | 1.16 | + putchar (' '); |
| 307 | + ++column; | ||
| 308 | cvsdist | 1.1 | + } |
| 309 | + } | ||
| 310 | twaugh | 1.16 | + else /* Others. */ |
| 311 | cvsdist | 1.1 | + { |
| 312 | twaugh | 1.16 | + if (convert) |
| 313 | cvsdist | 1.1 | + { |
| 314 | twaugh | 1.16 | + if (wc == L'\b') |
| 315 | + { | ||
| 316 | + if (column > 0) | ||
| 317 | + --column; | ||
| 318 | + } | ||
| 319 | + else | ||
| 320 | + { | ||
| 321 | + int width; /* The width of WC. */ | ||
| 322 | cvsdist | 1.1 | + |
| 323 | twaugh | 1.16 | + width = wcwidth (wc); |
| 324 | + column += (width > 0) ? width : 0; | ||
| 325 | + if (convert_entire_line == 0) | ||
| 326 | + convert = 0; | ||
| 327 | + } | ||
| 328 | cvsdist | 1.1 | + } |
| 329 | twaugh | 1.16 | + fwrite (bufpos, sizeof(char), mblength, stdout); |
| 330 | cvsdist | 1.1 | + } |
| 331 | + } | ||
| 332 | twaugh | 1.16 | + buflen -= mblength; |
| 333 | + bufpos += mblength; | ||
| 334 | cvsdist | 1.1 | + } |
| 335 | +} | ||
| 336 | +#endif | ||
| 337 | + | ||
| 338 | twaugh | 1.16 | int |
| 339 | main (int argc, char **argv) | ||
| 340 | cvsdist | 1.1 | { |
| 341 | twaugh | 1.20 | @@ -429,7 +583,12 @@ |
| 342 | twaugh | 1.16 | |
| 343 | file_list = (optind < argc ? &argv[optind] : stdin_argv); | ||
| 344 | |||
| 345 | - expand (); | ||
| 346 | cvsdist | 1.1 | +#if HAVE_MBRTOWC |
| 347 | twaugh | 1.16 | + if (MB_CUR_MAX > 1) |
| 348 | + expand_multibyte (); | ||
| 349 | + else | ||
| 350 | +#endif | ||
| 351 | + expand (); | ||
| 352 | |||
| 353 | if (have_read_stdin && fclose (stdin) != 0) | ||
| 354 | error (EXIT_FAILURE, errno, "-"); | ||
| 355 | twaugh | 1.21 | --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000 |
| 356 | +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000 | ||
| 357 | ovasik | 1.29 | @@ -23,16 +23,30 @@ |
| 358 | twaugh | 1.20 | #include <sys/types.h> |
| 359 | #include <getopt.h> | ||
| 360 | twaugh | 1.11 | |
| 361 | twaugh | 1.20 | +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ |
| 362 | cvsdist | 1.1 | +#if HAVE_WCHAR_H |
| 363 | +# include <wchar.h> | ||
| 364 | +#endif | ||
| 365 | + | ||
| 366 | twaugh | 1.20 | +/* Get iswblank(), towupper. */ |
| 367 | cvsdist | 1.1 | +#if HAVE_WCTYPE_H |
| 368 | +# include <wctype.h> | ||
| 369 | +#endif | ||
| 370 | + | ||
| 371 | #include "system.h" | ||
| 372 | #include "error.h" | ||
| 373 | twaugh | 1.20 | #include "linebuffer.h" |
| 374 | -#include "memcasecmp.h" | ||
| 375 | #include "quote.h" | ||
| 376 | #include "stdio--.h" | ||
| 377 | #include "xmemcoll.h" | ||
| 378 | cvsdist | 1.1 | #include "xstrtol.h" |
| 379 | ovasik | 1.25 | #include "argmatch.h" |
| 380 | cvsdist | 1.1 | |
| 381 | +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ | ||
| 382 | +#if HAVE_MBRTOWC && defined mbstate_t | ||
| 383 | +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) | ||
| 384 | +#endif | ||
| 385 | + | ||
| 386 | /* The official name of this program (e.g., no `g' prefix). */ | ||
| 387 | twaugh | 1.20 | #define PROGRAM_NAME "join" |
| 388 | cvsdist | 1.1 | |
| 389 | twaugh | 1.20 | @@ -104,10 +118,12 @@ |
| 390 | /* Last element in `outlist', where a new element can be added. */ | ||
| 391 | static struct outlist *outlist_end = &outlist_head; | ||
| 392 | cvsdist | 1.1 | |
| 393 | twaugh | 1.20 | -/* Tab character separating fields. If negative, fields are separated |
| 394 | - by any nonempty string of blanks, otherwise by exactly one | ||
| 395 | - tab character whose value (when cast to unsigned char) equals TAB. */ | ||
| 396 | -static int tab = -1; | ||
| 397 | +/* Tab character separating fields. If NULL, fields are separated | ||
| 398 | + by any nonempty string of blanks. */ | ||
| 399 | +static char *tab = NULL; | ||
| 400 | cvsdist | 1.1 | + |
| 401 | twaugh | 1.20 | +/* The number of bytes used for tab. */ |
| 402 | +static size_t tablen = 0; | ||
| 403 | cvsdist | 1.1 | |
| 404 | ovasik | 1.27 | /* If nonzero, check that the input is correctly ordered. */ |
| 405 | static enum | ||
| 406 | twaugh | 1.20 | @@ -199,10 +217,11 @@ |
| 407 | if (ptr == lim) | ||
| 408 | return; | ||
| 409 | cvsdist | 1.1 | |
| 410 | twaugh | 1.20 | - if (0 <= tab) |
| 411 | + if (tab != NULL) | ||
| 412 | { | ||
| 413 | + unsigned char t = tab[0]; | ||
| 414 | char *sep; | ||
| 415 | - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) | ||
| 416 | + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) | ||
| 417 | extract_field (line, ptr, sep - ptr); | ||
| 418 | } | ||
| 419 | else | ||
| 420 | @@ -229,6 +248,148 @@ | ||
| 421 | extract_field (line, ptr, lim - ptr); | ||
| 422 | } | ||
| 423 | cvsdist | 1.1 | |
| 424 | twaugh | 1.20 | +#if HAVE_MBRTOWC |
| 425 | +static void | ||
| 426 | +xfields_multibyte (struct line *line) | ||
| 427 | +{ | ||
| 428 | + char *ptr = line->buf.buffer; | ||
| 429 | + char const *lim = ptr + line->buf.length - 1; | ||
| 430 | + wchar_t wc = 0; | ||
| 431 | + size_t mblength = 1; | ||
| 432 | + mbstate_t state, state_bak; | ||
| 433 | cvsdist | 1.1 | + |
| 434 | twaugh | 1.20 | + memset (&state, 0, sizeof (mbstate_t)); |
| 435 | cvsdist | 1.1 | + |
| 436 | ovasik | 1.34 | + if (ptr >= lim) |
| 437 | twaugh | 1.20 | + return; |
| 438 | cvsdist | 1.1 | + |
| 439 | twaugh | 1.20 | + if (tab != NULL) |
| 440 | cvsdist | 1.1 | + { |
| 441 | twaugh | 1.20 | + unsigned char t = tab[0]; |
| 442 | + char *sep = ptr; | ||
| 443 | + for (; ptr < lim; ptr = sep + mblength) | ||
| 444 | + { | ||
| 445 | + sep = ptr; | ||
| 446 | + while (sep < lim) | ||
| 447 | + { | ||
| 448 | + state_bak = state; | ||
| 449 | + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); | ||
| 450 | + | ||
| 451 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | ||
| 452 | + { | ||
| 453 | + mblength = 1; | ||
| 454 | + state = state_bak; | ||
| 455 | + } | ||
| 456 | + mblength = (mblength < 1) ? 1 : mblength; | ||
| 457 | + | ||
| 458 | + if (mblength == tablen && !memcmp (sep, tab, mblength)) | ||
| 459 | + break; | ||
| 460 | + else | ||
| 461 | + { | ||
| 462 | + sep += mblength; | ||
| 463 | + continue; | ||
| 464 | + } | ||
| 465 | + } | ||
| 466 | + | ||
| 467 | ovasik | 1.34 | + if (sep >= lim) |
| 468 | twaugh | 1.20 | + break; |
| 469 | + | ||
| 470 | + extract_field (line, ptr, sep - ptr); | ||
| 471 | + } | ||
| 472 | cvsdist | 1.1 | + } |
| 473 | + else | ||
| 474 | + { | ||
| 475 | twaugh | 1.20 | + /* Skip leading blanks before the first field. */ |
| 476 | + while(ptr < lim) | ||
| 477 | + { | ||
| 478 | + state_bak = state; | ||
| 479 | + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); | ||
| 480 | + | ||
| 481 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | ||
| 482 | + { | ||
| 483 | + mblength = 1; | ||
| 484 | + state = state_bak; | ||
| 485 | + break; | ||
| 486 | + } | ||
| 487 | + mblength = (mblength < 1) ? 1 : mblength; | ||
| 488 | + | ||
| 489 | + if (!iswblank(wc)) | ||
| 490 | + break; | ||
| 491 | + ptr += mblength; | ||
| 492 | + } | ||
| 493 | cvsdist | 1.1 | + |
| 494 | twaugh | 1.20 | + do |
| 495 | + { | ||
| 496 | + char *sep; | ||
| 497 | + state_bak = state; | ||
| 498 | + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); | ||
| 499 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | ||
| 500 | cvsdist | 1.1 | + { |
| 501 | twaugh | 1.20 | + mblength = 1; |
| 502 | + state = state_bak; | ||
| 503 | + break; | ||
| 504 | + } | ||
| 505 | + mblength = (mblength < 1) ? 1 : mblength; | ||
| 506 | cvsdist | 1.1 | + |
| 507 | twaugh | 1.20 | + sep = ptr + mblength; |
| 508 | ovasik | 1.34 | + while (sep < lim) |
| 509 | cvsdist | 1.1 | + { |
| 510 | twaugh | 1.20 | + state_bak = state; |
| 511 | + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); | ||
| 512 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | ||
| 513 | + { | ||
| 514 | + mblength = 1; | ||
| 515 | + state = state_bak; | ||
| 516 | + break; | ||
| 517 | + } | ||
| 518 | + mblength = (mblength < 1) ? 1 : mblength; | ||
| 519 | + | ||
| 520 | + if (iswblank (wc)) | ||
| 521 | + break; | ||
| 522 | cvsdist | 1.1 | + |
| 523 | twaugh | 1.20 | + sep += mblength; |
| 524 | cvsdist | 1.1 | + } |
| 525 | + | ||
| 526 | twaugh | 1.20 | + extract_field (line, ptr, sep - ptr); |
| 527 | ovasik | 1.34 | + if (sep >= lim) |
| 528 | twaugh | 1.20 | + return; |
| 529 | cvsdist | 1.1 | + |
| 530 | twaugh | 1.20 | + state_bak = state; |
| 531 | + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); | ||
| 532 | cvsdist | 1.1 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) |
| 533 | + { | ||
| 534 | twaugh | 1.20 | + mblength = 1; |
| 535 | + state = state_bak; | ||
| 536 | + break; | ||
| 537 | cvsdist | 1.1 | + } |
| 538 | twaugh | 1.20 | + mblength = (mblength < 1) ? 1 : mblength; |
| 539 | + | ||
| 540 | + ptr = sep + mblength; | ||
| 541 | ovasik | 1.34 | + while (ptr < lim) |
| 542 | cvsdist | 1.1 | + { |
| 543 | twaugh | 1.20 | + state_bak = state; |
| 544 | + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); | ||
| 545 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | ||
| 546 | + { | ||
| 547 | + mblength = 1; | ||
| 548 | + state = state_bak; | ||
| 549 | + break; | ||
| 550 | + } | ||
| 551 | + mblength = (mblength < 1) ? 1 : mblength; | ||
| 552 | + | ||
| 553 | + if (!iswblank (wc)) | ||
| 554 | + break; | ||
| 555 | + | ||
| 556 | + ptr += mblength; | ||
| 557 | cvsdist | 1.1 | + } |
| 558 | twaugh | 1.20 | + } |
| 559 | ovasik | 1.34 | + while (ptr < lim); |
| 560 | twaugh | 1.20 | + } |
| 561 | cvsdist | 1.1 | + |
| 562 | twaugh | 1.20 | + extract_field (line, ptr, lim - ptr); |
| 563 | +} | ||
| 564 | cvsdist | 1.1 | +#endif |
| 565 | ovasik | 1.27 | + |
| 566 | ovasik | 1.28 | static void |
| 567 | freeline (struct line *line) | ||
| 568 | ovasik | 1.27 | { |
| 569 | twaugh | 1.20 | @@ -377,11 +601,18 @@ |
| 570 | cvsdist | 1.1 | |
| 571 | twaugh | 1.20 | /* Print the join of LINE1 and LINE2. */ |
| 572 | cvsdist | 1.1 | |
| 573 | twaugh | 1.20 | +#define PUT_TAB_CHAR \ |
| 574 | + do \ | ||
| 575 | + { \ | ||
| 576 | + (tab != NULL) ? \ | ||
| 577 | + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ | ||
| 578 | + } \ | ||
| 579 | + while (0) | ||
| 580 | + | ||
| 581 | static void | ||
| 582 | prjoin (struct line const *line1, struct line const *line2) | ||
| 583 | cvsdist | 1.1 | { |
| 584 | twaugh | 1.20 | const struct outlist *outlist; |
| 585 | - char output_separator = tab < 0 ? ' ' : tab; | ||
| 586 | |||
| 587 | outlist = outlist_head.next; | ||
| 588 | if (outlist) | ||
| 589 | @@ -397,12 +628,12 @@ | ||
| 590 | if (o->file == 0) | ||
| 591 | { | ||
| 592 | if (line1 == &uni_blank) | ||
| 593 | - { | ||
| 594 | + { | ||
| 595 | line = line2; | ||
| 596 | field = join_field_2; | ||
| 597 | } | ||
| 598 | else | ||
| 599 | - { | ||
| 600 | + { | ||
| 601 | line = line1; | ||
| 602 | field = join_field_1; | ||
| 603 | } | ||
| 604 | @@ -416,7 +647,7 @@ | ||
| 605 | o = o->next; | ||
| 606 | if (o == NULL) | ||
| 607 | break; | ||
| 608 | - putchar (output_separator); | ||
| 609 | + PUT_TAB_CHAR; | ||
| 610 | } | ||
| 611 | putchar ('\n'); | ||
| 612 | } | ||
| 613 | @@ -434,23 +665,23 @@ | ||
| 614 | prfield (join_field_1, line1); | ||
| 615 | for (i = 0; i < join_field_1 && i < line1->nfields; ++i) | ||
| 616 | { | ||
| 617 | - putchar (output_separator); | ||
| 618 | + PUT_TAB_CHAR; | ||
| 619 | prfield (i, line1); | ||
| 620 | } | ||
| 621 | for (i = join_field_1 + 1; i < line1->nfields; ++i) | ||
| 622 | { | ||
| 623 | - putchar (output_separator); | ||
| 624 | + PUT_TAB_CHAR; | ||
| 625 | prfield (i, line1); | ||
| 626 | } | ||
| 627 | cvsdist | 1.1 | |
| 628 | twaugh | 1.20 | for (i = 0; i < join_field_2 && i < line2->nfields; ++i) |
| 629 | { | ||
| 630 | - putchar (output_separator); | ||
| 631 | + PUT_TAB_CHAR; | ||
| 632 | prfield (i, line2); | ||
| 633 | } | ||
| 634 | for (i = join_field_2 + 1; i < line2->nfields; ++i) | ||
| 635 | { | ||
| 636 | - putchar (output_separator); | ||
| 637 | + PUT_TAB_CHAR; | ||
| 638 | prfield (i, line2); | ||
| 639 | } | ||
| 640 | putchar ('\n'); | ||
| 641 | twaugh | 1.21 | @@ -859,20 +1090,41 @@ |
| 642 | cvsdist | 1.1 | |
| 643 | twaugh | 1.20 | case 't': |
| 644 | { | ||
| 645 | - unsigned char newtab = optarg[0]; | ||
| 646 | - if (! newtab) | ||
| 647 | + char *newtab; | ||
| 648 | + size_t newtablen; | ||
| 649 | + if (! optarg[0]) | ||
| 650 | error (EXIT_FAILURE, 0, _("empty tab")); | ||
| 651 | - if (optarg[1]) | ||
| 652 | + newtab = xstrdup (optarg); | ||
| 653 | +#if HAVE_MBRTOWC | ||
| 654 | + if (MB_CUR_MAX > 1) | ||
| 655 | + { | ||
| 656 | + mbstate_t state; | ||
| 657 | + | ||
| 658 | + memset (&state, 0, sizeof (mbstate_t)); | ||
| 659 | + newtablen = mbrtowc (NULL, newtab, | ||
| 660 | + strnlen (newtab, MB_LEN_MAX), | ||
| 661 | + &state); | ||
| 662 | + if (newtablen == (size_t) 0 | ||
| 663 | + || newtablen == (size_t) -1 | ||
| 664 | + || newtablen == (size_t) -2) | ||
| 665 | + newtablen = 1; | ||
| 666 | + } | ||
| 667 | + else | ||
| 668 | +#endif | ||
| 669 | + newtablen = 1; | ||
| 670 | + | ||
| 671 | + if (newtablen == 1 && newtab[1]) | ||
| 672 | + { | ||
| 673 | + if (STREQ (newtab, "\\0")) | ||
| 674 | + newtab[0] = '\0'; | ||
| 675 | + } | ||
| 676 | + if (tab != NULL && strcmp (tab, newtab)) | ||
| 677 | { | ||
| 678 | - if (STREQ (optarg, "\\0")) | ||
| 679 | - newtab = '\0'; | ||
| 680 | - else | ||
| 681 | - error (EXIT_FAILURE, 0, _("multi-character tab %s"), | ||
| 682 | - quote (optarg)); | ||
| 683 | + free (newtab); | ||
| 684 | + error (EXIT_FAILURE, 0, _("incompatible tabs")); | ||
| 685 | } | ||
| 686 | - if (0 <= tab && tab != newtab) | ||
| 687 | - error (EXIT_FAILURE, 0, _("incompatible tabs")); | ||
| 688 | tab = newtab; | ||
| 689 | + tablen = newtablen; | ||
| 690 | } | ||
| 691 | break; | ||
| 692 | ovasik | 1.25 | |
| 693 | diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c | ||
| 694 | --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 | ||
| 695 | +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 | ||
| 696 | @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct | ||
| 697 | size_t jf_1, size_t jf_2) | ||
| 698 | { | ||
| 699 | /* Start of field to compare in each file. */ | ||
| 700 | - char *beg1; | ||
| 701 | - char *beg2; | ||
| 702 | - | ||
| 703 | - size_t len1; | ||
| 704 | - size_t len2; /* Length of fields to compare. */ | ||
| 705 | + char *beg[2]; | ||
| 706 | + char *copy[2]; | ||
| 707 | ovasik | 1.27 | + size_t len[2]; /* Length of fields to compare. */ |
| 708 | ovasik | 1.25 | int diff; |
| 709 | + int i, j; | ||
| 710 | |||
| 711 | if (jf_1 < line1->nfields) | ||
| 712 | { | ||
| 713 | - beg1 = line1->fields[jf_1].beg; | ||
| 714 | - len1 = line1->fields[jf_1].len; | ||
| 715 | + beg[0] = line1->fields[jf_1].beg; | ||
| 716 | + len[0] = line1->fields[jf_1].len; | ||
| 717 | } | ||
| 718 | else | ||
| 719 | { | ||
| 720 | - beg1 = NULL; | ||
| 721 | - len1 = 0; | ||
| 722 | + beg[0] = NULL; | ||
| 723 | + len[0] = 0; | ||
| 724 | } | ||
| 725 | |||
| 726 | if (jf_2 < line2->nfields) | ||
| 727 | { | ||
| 728 | - beg2 = line2->fields[jf_2].beg; | ||
| 729 | - len2 = line2->fields[jf_2].len; | ||
| 730 | + beg[1] = line2->fields[jf_2].beg; | ||
| 731 | + len[1] = line2->fields[jf_2].len; | ||
| 732 | } | ||
| 733 | else | ||
| 734 | { | ||
| 735 | - beg2 = NULL; | ||
| 736 | - len2 = 0; | ||
| 737 | + beg[1] = NULL; | ||
| 738 | + len[1] = 0; | ||
| 739 | } | ||
| 740 | |||
| 741 | - if (len1 == 0) | ||
| 742 | - return len2 == 0 ? 0 : -1; | ||
| 743 | - if (len2 == 0) | ||
| 744 | + if (len[0] == 0) | ||
| 745 | + return len[1] == 0 ? 0 : -1; | ||
| 746 | + if (len[1] == 0) | ||
| 747 | return 1; | ||
| 748 | |||
| 749 | if (ignore_case) | ||
| 750 | { | ||
| 751 | - /* FIXME: ignore_case does not work with NLS (in particular, | ||
| 752 | - with multibyte chars). */ | ||
| 753 | - diff = memcasecmp (beg1, beg2, MIN (len1, len2)); | ||
| 754 | +#ifdef HAVE_MBRTOWC | ||
| 755 | + if (MB_CUR_MAX > 1) | ||
| 756 | + { | ||
| 757 | + size_t mblength; | ||
| 758 | + wchar_t wc, uwc; | ||
| 759 | + mbstate_t state, state_bak; | ||
| 760 | + | ||
| 761 | + memset (&state, '\0', sizeof (mbstate_t)); | ||
| 762 | + | ||
| 763 | + for (i = 0; i < 2; i++) | ||
| 764 | + { | ||
| 765 | + copy[i] = alloca (len[i] + 1); | ||
| 766 | + | ||
| 767 | + for (j = 0; j < MIN (len[0], len[1]);) | ||
| 768 | + { | ||
| 769 | + state_bak = state; | ||
| 770 | + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); | ||
| 771 | + | ||
| 772 | + switch (mblength) | ||
| 773 | + { | ||
| 774 | + case (size_t) -1: | ||
| 775 | + case (size_t) -2: | ||
| 776 | + state = state_bak; | ||
| 777 | + /* Fall through */ | ||
| 778 | + case 0: | ||
| 779 | + mblength = 1; | ||
| 780 | + break; | ||
| 781 | + | ||
| 782 | + default: | ||
| 783 | + uwc = towupper (wc); | ||
| 784 | + | ||
| 785 | + if (uwc != wc) | ||
| 786 | + { | ||
| 787 | + mbstate_t state_wc; | ||
| 788 | + | ||
| 789 | + memset (&state_wc, '\0', sizeof (mbstate_t)); | ||
| 790 | + wcrtomb (copy[i] + j, uwc, &state_wc); | ||
| 791 | + } | ||
| 792 | + else | ||
| 793 | + memcpy (copy[i] + j, beg[i] + j, mblength); | ||
| 794 | + } | ||
| 795 | + j += mblength; | ||
| 796 | + } | ||
| 797 | + copy[i][j] = '\0'; | ||
| 798 | + } | ||
| 799 | + } | ||
| 800 | + else | ||
| 801 | +#endif | ||
| 802 | + { | ||
| 803 | + for (i = 0; i < 2; i++) | ||
| 804 | + { | ||
| 805 | + copy[i] = alloca (len[i] + 1); | ||
| 806 | + | ||
| 807 | + for (j = 0; j < MIN (len[0], len[1]); j++) | ||
| 808 | + copy[i][j] = toupper (beg[i][j]); | ||
| 809 | + | ||
| 810 | + copy[i][j] = '\0'; | ||
| 811 | + } | ||
| 812 | + } | ||
| 813 | } | ||
| 814 | else | ||
| 815 | { | ||
| 816 | - if (hard_LC_COLLATE) | ||
| 817 | - return xmemcoll (beg1, len1, beg2, len2); | ||
| 818 | - diff = memcmp (beg1, beg2, MIN (len1, len2)); | ||
| 819 | + copy[0] = (unsigned char *) beg[0]; | ||
| 820 | + copy[1] = (unsigned char *) beg[1]; | ||
| 821 | } | ||
| 822 | |||
| 823 | + if (hard_LC_COLLATE) | ||
| 824 | + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); | ||
| 825 | + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); | ||
| 826 | + | ||
| 827 | + | ||
| 828 | if (diff) | ||
| 829 | return diff; | ||
| 830 | - return len1 < len2 ? -1 : len1 != len2; | ||
| 831 | + return len[0] - len[1]; | ||
| 832 | } | ||
| 833 | cvsdist | 1.1 | |
| 834 | ovasik | 1.27 | /* Check that successive input lines PREV and CURRENT from input file |
| 835 | twaugh | 1.21 | --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000 |
| 836 | +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000 | ||
| 837 | twaugh | 1.20 | @@ -23,6 +23,16 @@ |
| 838 | #include <getopt.h> | ||
| 839 | #include <sys/types.h> | ||
| 840 | cvsdist | 1.1 | |
| 841 | twaugh | 1.20 | +/* Get mbstate_t, mbrtowc(). */ |
| 842 | +#if HAVE_WCHAR_H | ||
| 843 | +# include <wchar.h> | ||
| 844 | +#endif | ||
| 845 | + | ||
| 846 | +/* Get isw* functions. */ | ||
| 847 | +#if HAVE_WCTYPE_H | ||
| 848 | +# include <wctype.h> | ||
| 849 | +#endif | ||
| 850 | + | ||
| 851 | #include "system.h" | ||
| 852 | #include "argmatch.h" | ||
| 853 | #include "linebuffer.h" | ||
| 854 | @@ -32,7 +42,19 @@ | ||
| 855 | #include "quote.h" | ||
| 856 | #include "xmemcoll.h" | ||
| 857 | #include "xstrtol.h" | ||
| 858 | -#include "memcasecmp.h" | ||
| 859 | +#include "xmemcoll.h" | ||
| 860 | + | ||
| 861 | +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC | ||
| 862 | + installation; work around this configuration error. */ | ||
| 863 | +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 | ||
| 864 | +# define MB_LEN_MAX 16 | ||
| 865 | +#endif | ||
| 866 | + | ||
| 867 | +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ | ||
| 868 | +#if HAVE_MBRTOWC && defined mbstate_t | ||
| 869 | +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) | ||
| 870 | +#endif | ||
| 871 | + | ||
| 872 | |||
| 873 | /* The official name of this program (e.g., no `g' prefix). */ | ||
| 874 | #define PROGRAM_NAME "uniq" | ||
| 875 | @@ -109,6 +131,10 @@ | ||
| 876 | /* Select whether/how to delimit groups of duplicate lines. */ | ||
| 877 | static enum delimit_method delimit_groups; | ||
| 878 | |||
| 879 | +/* Function pointers. */ | ||
| 880 | +static char * | ||
| 881 | +(*find_field) (struct linebuffer *line); | ||
| 882 | + | ||
| 883 | static struct option const longopts[] = | ||
| 884 | { | ||
| 885 | {"count", no_argument, NULL, 'c'}, | ||
| 886 | twaugh | 1.21 | @@ -198,7 +224,7 @@ |
| 887 | twaugh | 1.20 | return a pointer to the beginning of the line's field to be compared. */ |
| 888 | |||
| 889 | static char * | ||
| 890 | ovasik | 1.28 | -find_field (struct linebuffer const *line) |
| 891 | twaugh | 1.20 | +find_field_uni (struct linebuffer *line) |
| 892 | { | ||
| 893 | size_t count; | ||
| 894 | ovasik | 1.28 | char const *lp = line->buffer; |
| 895 | twaugh | 1.21 | @@ -219,6 +245,83 @@ |
| 896 | ovasik | 1.28 | return line->buffer + i; |
| 897 | twaugh | 1.20 | } |
| 898 | |||
| 899 | +#if HAVE_MBRTOWC | ||
| 900 | + | ||
| 901 | +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ | ||
| 902 | + do \ | ||
| 903 | + { \ | ||
| 904 | + mbstate_t state_bak; \ | ||
| 905 | + \ | ||
| 906 | + CONVFAIL = 0; \ | ||
| 907 | + state_bak = *STATEP; \ | ||
| 908 | + \ | ||
| 909 | + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ | ||
| 910 | + \ | ||
| 911 | + switch (MBLENGTH) \ | ||
| 912 | + { \ | ||
| 913 | + case (size_t)-2: \ | ||
| 914 | + case (size_t)-1: \ | ||
| 915 | + *STATEP = state_bak; \ | ||
| 916 | + CONVFAIL++; \ | ||
| 917 | + /* Fall through */ \ | ||
| 918 | + case 0: \ | ||
| 919 | + MBLENGTH = 1; \ | ||
| 920 | + } \ | ||
| 921 | + } \ | ||
| 922 | + while (0) | ||
| 923 | + | ||
| 924 | +static char * | ||
| 925 | +find_field_multi (struct linebuffer *line) | ||
| 926 | cvsdist | 1.1 | +{ |
| 927 | twaugh | 1.20 | + size_t count; |
| 928 | + char *lp = line->buffer; | ||
| 929 | + size_t size = line->length - 1; | ||
| 930 | + size_t pos; | ||
| 931 | + size_t mblength; | ||
| 932 | cvsdist | 1.1 | + wchar_t wc; |
| 933 | twaugh | 1.20 | + mbstate_t *statep; |
| 934 | + int convfail; | ||
| 935 | cvsdist | 1.1 | + |
| 936 | twaugh | 1.20 | + pos = 0; |
| 937 | + statep = &(line->state); | ||
| 938 | cvsdist | 1.1 | + |
| 939 | twaugh | 1.20 | + /* skip fields. */ |
| 940 | + for (count = 0; count < skip_fields && pos < size; count++) | ||
| 941 | cvsdist | 1.1 | + { |
| 942 | twaugh | 1.20 | + while (pos < size) |
| 943 | cvsdist | 1.1 | + { |
| 944 | twaugh | 1.20 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); |
| 945 | + | ||
| 946 | + if (convfail || !iswblank (wc)) | ||
| 947 | cvsdist | 1.1 | + { |
| 948 | twaugh | 1.20 | + pos += mblength; |
| 949 | + break; | ||
| 950 | cvsdist | 1.1 | + } |
| 951 | twaugh | 1.20 | + pos += mblength; |
| 952 | + } | ||
| 953 | cvsdist | 1.1 | + |
| 954 | twaugh | 1.20 | + while (pos < size) |
| 955 | + { | ||
| 956 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); | ||
| 957 | cvsdist | 1.1 | + |
| 958 | twaugh | 1.20 | + if (!convfail && iswblank (wc)) |
| 959 | + break; | ||
| 960 | cvsdist | 1.1 | + |
| 961 | twaugh | 1.20 | + pos += mblength; |
| 962 | + } | ||
| 963 | + } | ||
| 964 | cvsdist | 1.1 | + |
| 965 | twaugh | 1.20 | + /* skip fields. */ |
| 966 | + for (count = 0; count < skip_chars && pos < size; count++) | ||
| 967 | + { | ||
| 968 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); | ||
| 969 | + pos += mblength; | ||
| 970 | cvsdist | 1.1 | + } |
| 971 | + | ||
| 972 | twaugh | 1.20 | + return lp + pos; |
| 973 | cvsdist | 1.1 | +} |
| 974 | +#endif | ||
| 975 | + | ||
| 976 | twaugh | 1.20 | /* Return false if two strings OLD and NEW match, true if not. |
| 977 | OLD and NEW point not to the beginnings of the lines | ||
| 978 | but rather to the beginnings of the fields to compare. | ||
| 979 | twaugh | 1.21 | @@ -227,6 +330,8 @@ |
| 980 | twaugh | 1.20 | static bool |
| 981 | different (char *old, char *new, size_t oldlen, size_t newlen) | ||
| 982 | { | ||
| 983 | + char *copy_old, *copy_new; | ||
| 984 | + | ||
| 985 | if (check_chars < oldlen) | ||
| 986 | oldlen = check_chars; | ||
| 987 | if (check_chars < newlen) | ||
| 988 | twaugh | 1.21 | @@ -234,14 +339,92 @@ |
| 989 | cvsdist | 1.1 | |
| 990 | twaugh | 1.20 | if (ignore_case) |
| 991 | { | ||
| 992 | - /* FIXME: This should invoke strcoll somehow. */ | ||
| 993 | - return oldlen != newlen || memcasecmp (old, new, oldlen); | ||
| 994 | + size_t i; | ||
| 995 | + | ||
| 996 | + copy_old = alloca (oldlen + 1); | ||
| 997 | + copy_new = alloca (oldlen + 1); | ||
| 998 | + | ||
| 999 | + for (i = 0; i < oldlen; i++) | ||
| 1000 | + { | ||
| 1001 | + copy_old[i] = toupper (old[i]); | ||
| 1002 | + copy_new[i] = toupper (new[i]); | ||
| 1003 | + } | ||
| 1004 | } | ||
| 1005 | - else if (hard_LC_COLLATE) | ||
| 1006 | - return xmemcoll (old, oldlen, new, newlen) != 0; | ||
| 1007 | else | ||
| 1008 | - return oldlen != newlen || memcmp (old, new, oldlen); | ||
| 1009 | + { | ||
| 1010 | + copy_old = (char *)old; | ||
| 1011 | + copy_new = (char *)new; | ||
| 1012 | + } | ||
| 1013 | + | ||
| 1014 | + return xmemcoll (copy_old, oldlen, copy_new, newlen); | ||
| 1015 | +} | ||
| 1016 | + | ||
| 1017 | +#if HAVE_MBRTOWC | ||
| 1018 | +static int | ||
| 1019 | +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) | ||
| 1020 | +{ | ||
| 1021 | + size_t i, j, chars; | ||
| 1022 | + const char *str[2]; | ||
| 1023 | + char *copy[2]; | ||
| 1024 | + size_t len[2]; | ||
| 1025 | + mbstate_t state[2]; | ||
| 1026 | + size_t mblength; | ||
| 1027 | + wchar_t wc, uwc; | ||
| 1028 | + mbstate_t state_bak; | ||
| 1029 | + | ||
| 1030 | + str[0] = old; | ||
| 1031 | + str[1] = new; | ||
| 1032 | + len[0] = oldlen; | ||
| 1033 | + len[1] = newlen; | ||
| 1034 | + state[0] = oldstate; | ||
| 1035 | + state[1] = newstate; | ||
| 1036 | + | ||
| 1037 | + for (i = 0; i < 2; i++) | ||
| 1038 | + { | ||
| 1039 | + copy[i] = alloca (len[i] + 1); | ||
| 1040 | + | ||
| 1041 | + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) | ||
| 1042 | + { | ||
| 1043 | + state_bak = state[i]; | ||
| 1044 | + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); | ||
| 1045 | + | ||
| 1046 | + switch (mblength) | ||
| 1047 | + { | ||
| 1048 | + case (size_t)-1: | ||
| 1049 | + case (size_t)-2: | ||
| 1050 | + state[i] = state_bak; | ||
| 1051 | + /* Fall through */ | ||
| 1052 | + case 0: | ||
| 1053 | + mblength = 1; | ||
| 1054 | + break; | ||
| 1055 | + | ||
| 1056 | + default: | ||
| 1057 | + if (ignore_case) | ||
| 1058 | + { | ||
| 1059 | + uwc = towupper (wc); | ||
| 1060 | + | ||
| 1061 | + if (uwc != wc) | ||
| 1062 | + { | ||
| 1063 | + mbstate_t state_wc; | ||
| 1064 | + | ||
| 1065 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | ||
| 1066 | + wcrtomb (copy[i] + j, uwc, &state_wc); | ||
| 1067 | + } | ||
| 1068 | + else | ||
| 1069 | + memcpy (copy[i] + j, str[i] + j, mblength); | ||
| 1070 | + } | ||
| 1071 | + else | ||
| 1072 | + memcpy (copy[i] + j, str[i] + j, mblength); | ||
| 1073 | + } | ||
| 1074 | + j += mblength; | ||
| 1075 | + } | ||
| 1076 | + copy[i][j] = '\0'; | ||
| 1077 | + len[i] = j; | ||
| 1078 | + } | ||
| 1079 | + | ||
| 1080 | + return xmemcoll (copy[0], len[0], copy[1], len[1]); | ||
| 1081 | } | ||
| 1082 | +#endif | ||
| 1083 | |||
| 1084 | /* Output the line in linebuffer LINE to standard output | ||
| 1085 | provided that the switches say it should be output. | ||
| 1086 | twaugh | 1.21 | @@ -295,15 +478,43 @@ |
| 1087 | twaugh | 1.20 | { |
| 1088 | char *prevfield IF_LINT (= NULL); | ||
| 1089 | size_t prevlen IF_LINT (= 0); | ||
| 1090 | +#if HAVE_MBRTOWC | ||
| 1091 | + mbstate_t prevstate; | ||
| 1092 | + | ||
| 1093 | + memset (&prevstate, '\0', sizeof (mbstate_t)); | ||
| 1094 | +#endif | ||
| 1095 | |||
| 1096 | while (!feof (stdin)) | ||
| 1097 | { | ||
| 1098 | char *thisfield; | ||
| 1099 | size_t thislen; | ||
| 1100 | +#if HAVE_MBRTOWC | ||
| 1101 | + mbstate_t thisstate; | ||
| 1102 | +#endif | ||
| 1103 | + | ||
| 1104 | ovasik | 1.23 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) |
| 1105 | twaugh | 1.20 | break; |
| 1106 | thisfield = find_field (thisline); | ||
| 1107 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); | ||
| 1108 | +#if HAVE_MBRTOWC | ||
| 1109 | + if (MB_CUR_MAX > 1) | ||
| 1110 | + { | ||
| 1111 | + thisstate = thisline->state; | ||
| 1112 | + | ||
| 1113 | + if (prevline->length == 0 || different_multi | ||
| 1114 | + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) | ||
| 1115 | + { | ||
| 1116 | + fwrite (thisline->buffer, sizeof (char), | ||
| 1117 | + thisline->length, stdout); | ||
| 1118 | + | ||
| 1119 | + SWAP_LINES (prevline, thisline); | ||
| 1120 | + prevfield = thisfield; | ||
| 1121 | + prevlen = thislen; | ||
| 1122 | + prevstate = thisstate; | ||
| 1123 | + } | ||
| 1124 | + } | ||
| 1125 | + else | ||
| 1126 | +#endif | ||
| 1127 | if (prevline->length == 0 | ||
| 1128 | || different (thisfield, prevfield, thislen, prevlen)) | ||
| 1129 | { | ||
| 1130 | twaugh | 1.21 | @@ -322,17 +533,26 @@ |
| 1131 | twaugh | 1.20 | size_t prevlen; |
| 1132 | uintmax_t match_count = 0; | ||
| 1133 | bool first_delimiter = true; | ||
| 1134 | +#if HAVE_MBRTOWC | ||
| 1135 | + mbstate_t prevstate; | ||
| 1136 | +#endif | ||
| 1137 | |||
| 1138 | ovasik | 1.23 | if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) |
| 1139 | twaugh | 1.20 | goto closefiles; |
| 1140 | prevfield = find_field (prevline); | ||
| 1141 | prevlen = prevline->length - 1 - (prevfield - prevline->buffer); | ||
| 1142 | +#if HAVE_MBRTOWC | ||
| 1143 | + prevstate = prevline->state; | ||
| 1144 | +#endif | ||
| 1145 | |||
| 1146 | while (!feof (stdin)) | ||
| 1147 | { | ||
| 1148 | bool match; | ||
| 1149 | char *thisfield; | ||
| 1150 | size_t thislen; | ||
| 1151 | +#if HAVE_MBRTOWC | ||
| 1152 | + mbstate_t thisstate; | ||
| 1153 | +#endif | ||
| 1154 | ovasik | 1.23 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) |
| 1155 | twaugh | 1.20 | { |
| 1156 | if (ferror (stdin)) | ||
| 1157 | twaugh | 1.21 | @@ -341,6 +561,15 @@ |
| 1158 | twaugh | 1.20 | } |
| 1159 | thisfield = find_field (thisline); | ||
| 1160 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); | ||
| 1161 | +#if HAVE_MBRTOWC | ||
| 1162 | + if (MB_CUR_MAX > 1) | ||
| 1163 | + { | ||
| 1164 | + thisstate = thisline->state; | ||
| 1165 | + match = !different_multi (thisfield, prevfield, | ||
| 1166 | + thislen, prevlen, thisstate, prevstate); | ||
| 1167 | + } | ||
| 1168 | + else | ||
| 1169 | +#endif | ||
| 1170 | match = !different (thisfield, prevfield, thislen, prevlen); | ||
| 1171 | match_count += match; | ||
| 1172 | |||
| 1173 | twaugh | 1.21 | @@ -373,6 +602,9 @@ |
| 1174 | twaugh | 1.20 | SWAP_LINES (prevline, thisline); |
| 1175 | prevfield = thisfield; | ||
| 1176 | prevlen = thislen; | ||
| 1177 | +#if HAVE_MBRTOWC | ||
| 1178 | + prevstate = thisstate; | ||
| 1179 | +#endif | ||
| 1180 | if (!match) | ||
| 1181 | match_count = 0; | ||
| 1182 | } | ||
| 1183 | twaugh | 1.21 | @@ -417,6 +649,19 @@ |
| 1184 | twaugh | 1.20 | |
| 1185 | atexit (close_stdout); | ||
| 1186 | |||
| 1187 | +#if HAVE_MBRTOWC | ||
| 1188 | + if (MB_CUR_MAX > 1) | ||
| 1189 | + { | ||
| 1190 | + find_field = find_field_multi; | ||
| 1191 | + } | ||
| 1192 | + else | ||
| 1193 | +#endif | ||
| 1194 | + { | ||
| 1195 | + find_field = find_field_uni; | ||
| 1196 | + } | ||
| 1197 | + | ||
| 1198 | + | ||
| 1199 | + | ||
| 1200 | skip_chars = 0; | ||
| 1201 | skip_fields = 0; | ||
| 1202 | check_chars = SIZE_MAX; | ||
| 1203 | twaugh | 1.21 | --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000 |
| 1204 | +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000 | ||
| 1205 | twaugh | 1.20 | @@ -23,11 +23,33 @@ |
| 1206 | #include <getopt.h> | ||
| 1207 | twaugh | 1.16 | #include <sys/types.h> |
| 1208 | cvsdist | 1.1 | |
| 1209 | twaugh | 1.20 | +/* Get mbstate_t, mbrtowc(), wcwidth(). */ |
| 1210 | cvsdist | 1.1 | +#if HAVE_WCHAR_H |
| 1211 | +# include <wchar.h> | ||
| 1212 | +#endif | ||
| 1213 | + | ||
| 1214 | twaugh | 1.20 | +/* Get iswprint(), iswblank(), wcwidth(). */ |
| 1215 | cvsdist | 1.1 | +#if HAVE_WCTYPE_H |
| 1216 | +# include <wctype.h> | ||
| 1217 | +#endif | ||
| 1218 | + | ||
| 1219 | #include "system.h" | ||
| 1220 | twaugh | 1.16 | #include "error.h" |
| 1221 | twaugh | 1.11 | #include "quote.h" |
| 1222 | cvsdist | 1.1 | #include "xstrtol.h" |
| 1223 | twaugh | 1.16 | |
| 1224 | twaugh | 1.20 | +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC |
| 1225 | + installation; work around this configuration error. */ | ||
| 1226 | +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 | ||
| 1227 | +# undef MB_LEN_MAX | ||
| 1228 | +# define MB_LEN_MAX 16 | ||
| 1229 | +#endif | ||
| 1230 | + | ||
| 1231 | cvsdist | 1.1 | +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ |
| 1232 | +#if HAVE_MBRTOWC && defined mbstate_t | ||
| 1233 | +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) | ||
| 1234 | +#endif | ||
| 1235 | + | ||
| 1236 | twaugh | 1.20 | #define TAB_WIDTH 8 |
| 1237 | |||
| 1238 | twaugh | 1.16 | /* The official name of this program (e.g., no `g' prefix). */ |
| 1239 | ovasik | 1.28 | @@ -35,20 +57,41 @@ |
| 1240 | cvsdist | 1.1 | |
| 1241 | ovasik | 1.27 | #define AUTHORS proper_name ("David MacKenzie") |
| 1242 | cvsdist | 1.1 | |
| 1243 | twaugh | 1.20 | +#define FATAL_ERROR(Message) \ |
| 1244 | + do \ | ||
| 1245 | + { \ | ||
| 1246 | + error (0, 0, (Message)); \ | ||
| 1247 | + usage (2); \ | ||
| 1248 | + } \ | ||
| 1249 | + while (0) | ||
| 1250 | cvsdist | 1.1 | + |
| 1251 | twaugh | 1.20 | +enum operating_mode |
| 1252 | +{ | ||
| 1253 | + /* Fold texts by columns that are at the given positions. */ | ||
| 1254 | + column_mode, | ||
| 1255 | twaugh | 1.16 | + |
| 1256 | twaugh | 1.20 | + /* Fold texts by bytes that are at the given positions. */ |
| 1257 | + byte_mode, | ||
| 1258 | + | ||
| 1259 | + /* Fold texts by characters that are at the given positions. */ | ||
| 1260 | + character_mode, | ||
| 1261 | +}; | ||
| 1262 | + | ||
| 1263 | +/* The argument shows current mode. (Default: column_mode) */ | ||
| 1264 | +static enum operating_mode operating_mode; | ||
| 1265 | + | ||
| 1266 | /* If nonzero, try to break on whitespace. */ | ||
| 1267 | static bool break_spaces; | ||
| 1268 | |||
| 1269 | -/* If nonzero, count bytes, not column positions. */ | ||
| 1270 | -static bool count_bytes; | ||
| 1271 | - | ||
| 1272 | /* If nonzero, at least one of the files we read was standard input. */ | ||
| 1273 | static bool have_read_stdin; | ||
| 1274 | |||
| 1275 | -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; | ||
| 1276 | +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; | ||
| 1277 | |||
| 1278 | static struct option const longopts[] = | ||
| 1279 | { | ||
| 1280 | {"bytes", no_argument, NULL, 'b'}, | ||
| 1281 | + {"characters", no_argument, NULL, 'c'}, | ||
| 1282 | {"spaces", no_argument, NULL, 's'}, | ||
| 1283 | {"width", required_argument, NULL, 'w'}, | ||
| 1284 | {GETOPT_HELP_OPTION_DECL}, | ||
| 1285 | @@ -81,6 +124,7 @@ | ||
| 1286 | "), stdout); | ||
| 1287 | fputs (_("\ | ||
| 1288 | -b, --bytes count bytes rather than columns\n\ | ||
| 1289 | + -c, --characters count characters rather than columns\n\ | ||
| 1290 | -s, --spaces break at spaces\n\ | ||
| 1291 | -w, --width=WIDTH use WIDTH columns instead of 80\n\ | ||
| 1292 | "), stdout); | ||
| 1293 | @@ -98,7 +142,7 @@ | ||
| 1294 | static size_t | ||
| 1295 | adjust_column (size_t column, char c) | ||
| 1296 | { | ||
| 1297 | - if (!count_bytes) | ||
| 1298 | + if (operating_mode != byte_mode) | ||
| 1299 | { | ||
| 1300 | if (c == '\b') | ||
| 1301 | { | ||
| 1302 | @@ -121,30 +165,14 @@ | ||
| 1303 | to stdout, with maximum line length WIDTH. | ||
| 1304 | Return true if successful. */ | ||
| 1305 | |||
| 1306 | -static bool | ||
| 1307 | -fold_file (char const *filename, size_t width) | ||
| 1308 | +static void | ||
| 1309 | +fold_text (FILE *istream, size_t width, int *saved_errno) | ||
| 1310 | cvsdist | 1.1 | { |
| 1311 | twaugh | 1.20 | - FILE *istream; |
| 1312 | int c; | ||
| 1313 | size_t column = 0; /* Screen column where next char will go. */ | ||
| 1314 | size_t offset_out = 0; /* Index in `line_out' for next char. */ | ||
| 1315 | static char *line_out = NULL; | ||
| 1316 | static size_t allocated_out = 0; | ||
| 1317 | - int saved_errno; | ||
| 1318 | - | ||
| 1319 | - if (STREQ (filename, "-")) | ||
| 1320 | - { | ||
| 1321 | - istream = stdin; | ||
| 1322 | - have_read_stdin = true; | ||
| 1323 | - } | ||
| 1324 | - else | ||
| 1325 | - istream = fopen (filename, "r"); | ||
| 1326 | - | ||
| 1327 | - if (istream == NULL) | ||
| 1328 | - { | ||
| 1329 | - error (0, errno, "%s", filename); | ||
| 1330 | - return false; | ||
| 1331 | - } | ||
| 1332 | twaugh | 1.16 | |
| 1333 | twaugh | 1.20 | while ((c = getc (istream)) != EOF) |
| 1334 | twaugh | 1.16 | { |
| 1335 | twaugh | 1.20 | @@ -172,6 +200,15 @@ |
| 1336 | bool found_blank = false; | ||
| 1337 | size_t logical_end = offset_out; | ||
| 1338 | |||
| 1339 | + /* If LINE_OUT has no wide character, | ||
| 1340 | + put a new wide character in LINE_OUT | ||
| 1341 | + if column is bigger than width. */ | ||
| 1342 | + if (offset_out == 0) | ||
| 1343 | + { | ||
| 1344 | + line_out[offset_out++] = c; | ||
| 1345 | + continue; | ||
| 1346 | + } | ||
| 1347 | + | ||
| 1348 | /* Look for the last blank. */ | ||
| 1349 | while (logical_end) | ||
| 1350 |