/[pkgs]/devel/coreutils/coreutils-i18n.patch
ViewVC logotype

Diff of /devel/coreutils/coreutils-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 1.38 Revision 1.39
185+#endif 185+#endif
186+ 186+
187 /* The official name of this program (e.g., no `g' prefix). */ 187 /* The official name of this program (e.g., no `g' prefix). */
188 #define PROGRAM_NAME "expand" 188 #define PROGRAM_NAME "expand"
189 189
190@@ -183,6 +200,7 @@
191 stops = num_start + len - 1;
192 }
193 }
194+
195 else
196 {
197 error (0, 0, _("tab size contains invalid character(s): %s"),
198@@ -365,6 +383,142 @@ 190@@ -365,6 +383,142 @@
199 } 191 }
200 } 192 }
201 193
202+#if HAVE_MBRTOWC 194+#if HAVE_MBRTOWC
412 { 404 {
413+ unsigned char t = tab[0]; 405+ unsigned char t = tab[0];
414 char *sep; 406 char *sep;
415- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) 407- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
416+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) 408+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
417 extract_field (line, ptr, sep - ptr); 409 extract_field (line, ptr, sep - ptr);
418 } 410 }
419 else 411 else
420@@ -229,6 +248,148 @@ 412@@ -229,6 +248,148 @@
421 extract_field (line, ptr, lim - ptr); 413 extract_field (line, ptr, lim - ptr);
422 } 414 }
584 const struct outlist *outlist; 576 const struct outlist *outlist;
585- char output_separator = tab < 0 ? ' ' : tab; 577- char output_separator = tab < 0 ? ' ' : tab;
586 578
587 outlist = outlist_head.next; 579 outlist = outlist_head.next;
588 if (outlist) 580 if (outlist)
589@@ -397,12 +628,12 @@
590 if (o->file == 0)
591 {
592 if (line1 == &uni_blank)
593- {
594+ {
595 line = line2;
596 field = join_field_2;
597 }
598 else
599- {
600+ {
601 line = line1;
602 field = join_field_1;
603 }
604@@ -416,7 +647,7 @@ 581@@ -416,7 +647,7 @@
605 o = o->next; 582 o = o->next;
606 if (o == NULL) 583 if (o == NULL)
607 break; 584 break;
608- putchar (output_separator); 585- putchar (output_separator);
609+ PUT_TAB_CHAR; 586+ PUT_TAB_CHAR;
610 } 587 }
611 putchar ('\n'); 588 putchar ('\n');
612 } 589 }
613@@ -434,23 +665,23 @@ 590@@ -434,23 +665,23 @@
614 prfield (join_field_1, line1); 591 prfield (join_field_1, line1);
615 for (i = 0; i < join_field_1 && i < line1->nfields; ++i) 592 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
616 { 593 {
617- putchar (output_separator); 594- putchar (output_separator);
618+ PUT_TAB_CHAR; 595+ PUT_TAB_CHAR;
619 prfield (i, line1); 596 prfield (i, line1);
620 } 597 }
621 for (i = join_field_1 + 1; i < line1->nfields; ++i) 598 for (i = join_field_1 + 1; i < line1->nfields; ++i)
622 { 599 {
623- putchar (output_separator); 600- putchar (output_separator);
624+ PUT_TAB_CHAR; 601+ PUT_TAB_CHAR;
625 prfield (i, line1); 602 prfield (i, line1);
626 } 603 }
627 604
628 for (i = 0; i < join_field_2 && i < line2->nfields; ++i) 605 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
629 { 606 {
630- putchar (output_separator); 607- putchar (output_separator);
631+ PUT_TAB_CHAR; 608+ PUT_TAB_CHAR;
632 prfield (i, line2); 609 prfield (i, line2);
633 } 610 }
634 for (i = join_field_2 + 1; i < line2->nfields; ++i) 611 for (i = join_field_2 + 1; i < line2->nfields; ++i)
635 { 612 {
636- putchar (output_separator); 613- putchar (output_separator);
637+ PUT_TAB_CHAR; 614+ PUT_TAB_CHAR;
638 prfield (i, line2); 615 prfield (i, line2);
639 } 616 }
640 putchar ('\n'); 617 putchar ('\n');
641@@ -859,20 +1090,41 @@ 618@@ -859,20 +1090,41 @@
642 619
643 case 't': 620 case 't':
644 { 621 {
645- unsigned char newtab = optarg[0]; 622- unsigned char newtab = optarg[0];
646- if (! newtab) 623- if (! newtab)
647+ char *newtab; 624+ char *newtab;
648+ size_t newtablen; 625+ size_t newtablen;
649+ if (! optarg[0]) 626+ if (! optarg[0])
650 error (EXIT_FAILURE, 0, _("empty tab")); 627 error (EXIT_FAILURE, 0, _("empty tab"));
651- if (optarg[1]) 628- if (optarg[1])
652+ newtab = xstrdup (optarg); 629+ newtab = xstrdup (optarg);
653+#if HAVE_MBRTOWC 630+#if HAVE_MBRTOWC
654+ if (MB_CUR_MAX > 1) 631+ if (MB_CUR_MAX > 1)
655+ { 632+ {
656+ mbstate_t state; 633+ mbstate_t state;
657+ 634+
658+ memset (&state, 0, sizeof (mbstate_t)); 635+ memset (&state, 0, sizeof (mbstate_t));
659+ newtablen = mbrtowc (NULL, newtab, 636+ newtablen = mbrtowc (NULL, newtab,
660+ strnlen (newtab, MB_LEN_MAX), 637+ strnlen (newtab, MB_LEN_MAX),
661+ &state); 638+ &state);
662+ if (newtablen == (size_t) 0 639+ if (newtablen == (size_t) 0
663+ || newtablen == (size_t) -1 640+ || newtablen == (size_t) -1
664+ || newtablen == (size_t) -2) 641+ || newtablen == (size_t) -2)
665+ newtablen = 1; 642+ newtablen = 1;
666+ } 643+ }
667+ else 644+ else
668+#endif 645+#endif
669+ newtablen = 1; 646+ newtablen = 1;
670+ 647+
671+ if (newtablen == 1 && newtab[1]) 648+ if (newtablen == 1 && newtab[1])
672+ { 649+ {
673+ if (STREQ (newtab, "\\0")) 650+ if (STREQ (newtab, "\\0"))
674+ newtab[0] = '\0'; 651+ newtab[0] = '\0';
675+ } 652+ }
676+ if (tab != NULL && strcmp (tab, newtab)) 653+ if (tab != NULL && strcmp (tab, newtab))
677 { 654 {
678- if (STREQ (optarg, "\\0")) 655- if (STREQ (optarg, "\\0"))
679- newtab = '\0'; 656- newtab = '\0';
680- else 657- else
681- error (EXIT_FAILURE, 0, _("multi-character tab %s"), 658- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
682- quote (optarg)); 659- quote (optarg));
683+ free (newtab); 660+ free (newtab);
684+ error (EXIT_FAILURE, 0, _("incompatible tabs")); 661+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
685 } 662 }
686- if (0 <= tab && tab != newtab) 663- if (0 <= tab && tab != newtab)
687- error (EXIT_FAILURE, 0, _("incompatible tabs")); 664- error (EXIT_FAILURE, 0, _("incompatible tabs"));
688 tab = newtab; 665 tab = newtab;
689+ tablen = newtablen; 666+ tablen = newtablen;
690 } 667 }
691 break; 668 break;
692 669
693diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c 670diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
694--- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 671--- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200
695+++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 672+++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200
696@@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct 673@@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct
697 size_t jf_1, size_t jf_2) 674 size_t jf_1, size_t jf_2)
698 { 675 {
699 /* Start of field to compare in each file. */ 676 /* Start of field to compare in each file. */
700- char *beg1; 677- char *beg1;
701- char *beg2; 678- char *beg2;
702- 679-
812+ } 789+ }
813 } 790 }
814 else 791 else
815 { 792 {
816- if (hard_LC_COLLATE) 793- if (hard_LC_COLLATE)
817- return xmemcoll (beg1, len1, beg2, len2); 794- return xmemcoll (beg1, len1, beg2, len2);
818- diff = memcmp (beg1, beg2, MIN (len1, len2)); 795- diff = memcmp (beg1, beg2, MIN (len1, len2));
819+ copy[0] = (unsigned char *) beg[0]; 796+ copy[0] = (unsigned char *) beg[0];
820+ copy[1] = (unsigned char *) beg[1]; 797+ copy[1] = (unsigned char *) beg[1];
821 } 798 }
822 799
897 } 874 }
898 875
899+#if HAVE_MBRTOWC 876+#if HAVE_MBRTOWC
900+ 877+
901+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ 878+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
902+ do \ 879+ do \
903+ { \ 880+ { \
904+ mbstate_t state_bak; \ 881+ mbstate_t state_bak; \
905+ \ 882+ \
906+ CONVFAIL = 0; \ 883+ CONVFAIL = 0; \
907+ state_bak = *STATEP; \ 884+ state_bak = *STATEP; \
908+ \ 885+ \
909+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ 886+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
910+ \ 887+ \
911+ switch (MBLENGTH) \ 888+ switch (MBLENGTH) \
912+ { \ 889+ { \
913+ case (size_t)-2: \ 890+ case (size_t)-2: \
914+ case (size_t)-1: \ 891+ case (size_t)-1: \
915+ *STATEP = state_bak; \ 892+ *STATEP = state_bak; \
916+ CONVFAIL++; \ 893+ CONVFAIL++; \
917+ /* Fall through */ \ 894+ /* Fall through */ \
918+ case 0: \ 895+ case 0: \
919+ MBLENGTH = 1; \ 896+ MBLENGTH = 1; \
920+ } \ 897+ } \
921+ } \ 898+ } \
922+ while (0) 899+ while (0)
923+ 900+
924+static char * 901+static char *
925+find_field_multi (struct linebuffer *line) 902+find_field_multi (struct linebuffer *line)
926+{ 903+{
938+ 915+
939+ /* skip fields. */ 916+ /* skip fields. */
940+ for (count = 0; count < skip_fields && pos < size; count++) 917+ for (count = 0; count < skip_fields && pos < size; count++)
941+ { 918+ {
942+ while (pos < size) 919+ while (pos < size)
943+ { 920+ {
944+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 921+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
945+ 922+
946+ if (convfail || !iswblank (wc)) 923+ if (convfail || !iswblank (wc))
947+ { 924+ {
925+ pos += mblength;
926+ break;
927+ }
948+ pos += mblength; 928+ pos += mblength;
949+ break; 929+ }
950+ }
951+ pos += mblength;
952+ }
953+ 930+
954+ while (pos < size) 931+ while (pos < size)
955+ { 932+ {
956+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 933+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
957+ 934+
958+ if (!convfail && iswblank (wc)) 935+ if (!convfail && iswblank (wc))
959+ break; 936+ break;
960+ 937+
961+ pos += mblength; 938+ pos += mblength;
962+ } 939+ }
963+ } 940+ }
964+ 941+
965+ /* skip fields. */ 942+ /* skip fields. */
966+ for (count = 0; count < skip_chars && pos < size; count++) 943+ for (count = 0; count < skip_chars && pos < size; count++)
967+ { 944+ {
995+ 972+
996+ copy_old = alloca (oldlen + 1); 973+ copy_old = alloca (oldlen + 1);
997+ copy_new = alloca (oldlen + 1); 974+ copy_new = alloca (oldlen + 1);
998+ 975+
999+ for (i = 0; i < oldlen; i++) 976+ for (i = 0; i < oldlen; i++)
1000+ { 977+ {
1001+ copy_old[i] = toupper (old[i]); 978+ copy_old[i] = toupper (old[i]);
1002+ copy_new[i] = toupper (new[i]); 979+ copy_new[i] = toupper (new[i]);
1003+ } 980+ }
1004 } 981 }
1005- else if (hard_LC_COLLATE) 982- else if (hard_LC_COLLATE)
1006- return xmemcoll (old, oldlen, new, newlen) != 0; 983- return xmemcoll (old, oldlen, new, newlen) != 0;
1007 else 984 else
1008- return oldlen != newlen || memcmp (old, new, oldlen); 985- return oldlen != newlen || memcmp (old, new, oldlen);
1037+ for (i = 0; i < 2; i++) 1014+ for (i = 0; i < 2; i++)
1038+ { 1015+ {
1039+ copy[i] = alloca (len[i] + 1); 1016+ copy[i] = alloca (len[i] + 1);
1040+ 1017+
1041+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) 1018+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1042+ { 1019+ {
1043+ state_bak = state[i]; 1020+ state_bak = state[i];
1044+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); 1021+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1045+ 1022+
1046+ switch (mblength) 1023+ switch (mblength)
1047+ { 1024+ {
1048+ case (size_t)-1: 1025+ case (size_t)-1:
1049+ case (size_t)-2: 1026+ case (size_t)-2:
1050+ state[i] = state_bak; 1027+ state[i] = state_bak;
1051+ /* Fall through */ 1028+ /* Fall through */
1052+ case 0: 1029+ case 0:
1053+ mblength = 1; 1030+ mblength = 1;
1054+ break; 1031+ break;
1055+ 1032+
1056+ default: 1033+ default:
1057+ if (ignore_case) 1034+ if (ignore_case)
1058+ { 1035+ {
1059+ uwc = towupper (wc); 1036+ uwc = towupper (wc);
1060+ 1037+
1061+ if (uwc != wc) 1038+ if (uwc != wc)
1062+ { 1039+ {
1063+ mbstate_t state_wc; 1040+ mbstate_t state_wc;
1064+ 1041+
1065+ memset (&state_wc, '\0', sizeof(mbstate_t)); 1042+ memset (&state_wc, '\0', sizeof(mbstate_t));
1066+ wcrtomb (copy[i] + j, uwc, &state_wc); 1043+ wcrtomb (copy[i] + j, uwc, &state_wc);
1067+ } 1044+ }
1068+ else 1045+ else
1046+ memcpy (copy[i] + j, str[i] + j, mblength);
1047+ }
1048+ else
1069+ memcpy (copy[i] + j, str[i] + j, mblength); 1049+ memcpy (copy[i] + j, str[i] + j, mblength);
1070+ } 1050+ }
1071+ else
1072+ memcpy (copy[i] + j, str[i] + j, mblength);
1073+ }
1074+ j += mblength; 1051+ j += mblength;
1075+ } 1052+ }
1076+ copy[i][j] = '\0'; 1053+ copy[i][j] = '\0';
1077+ len[i] = j; 1054+ len[i] = j;
1078+ } 1055+ }
1079+ 1056+
1080+ return xmemcoll (copy[0], len[0], copy[1], len[1]); 1057+ return xmemcoll (copy[0], len[0], copy[1], len[1]);
1092+ 1069+
1093+ memset (&prevstate, '\0', sizeof (mbstate_t)); 1070+ memset (&prevstate, '\0', sizeof (mbstate_t));
1094+#endif 1071+#endif
1095 1072
1096 while (!feof (stdin)) 1073 while (!feof (stdin))
1097 { 1074 {
1098 char *thisfield; 1075 char *thisfield;
1099 size_t thislen; 1076 size_t thislen;
1100+#if HAVE_MBRTOWC 1077+#if HAVE_MBRTOWC
1101+ mbstate_t thisstate; 1078+ mbstate_t thisstate;
1102+#endif 1079+#endif
1103+ 1080+
1104 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 1081 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1105 break; 1082 break;
1106 thisfield = find_field (thisline); 1083 thisfield = find_field (thisline);
1107 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 1084 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1108+#if HAVE_MBRTOWC 1085+#if HAVE_MBRTOWC
1109+ if (MB_CUR_MAX > 1) 1086+ if (MB_CUR_MAX > 1)
1110+ { 1087+ {
1111+ thisstate = thisline->state; 1088+ thisstate = thisline->state;
1112+ 1089+
1113+ if (prevline->length == 0 || different_multi 1090+ if (prevline->length == 0 || different_multi
1114+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) 1091+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1120+ prevfield = thisfield; 1097+ prevfield = thisfield;
1121+ prevlen = thislen; 1098+ prevlen = thislen;
1122+ prevstate = thisstate; 1099+ prevstate = thisstate;
1123+ } 1100+ }
1124+ } 1101+ }
1125+ else 1102+ else
1126+#endif 1103+#endif
1127 if (prevline->length == 0 1104 if (prevline->length == 0
1128 || different (thisfield, prevfield, thislen, prevlen)) 1105 || different (thisfield, prevfield, thislen, prevlen))
1129 { 1106 {
1130@@ -322,17 +533,26 @@ 1107@@ -322,17 +533,26 @@
1131 size_t prevlen; 1108 size_t prevlen;
1132 uintmax_t match_count = 0; 1109 uintmax_t match_count = 0;
1133 bool first_delimiter = true; 1110 bool first_delimiter = true;
1134+#if HAVE_MBRTOWC 1111+#if HAVE_MBRTOWC
1135+ mbstate_t prevstate; 1112+ mbstate_t prevstate;
1136+#endif 1113+#endif
1137 1114
1138 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) 1115 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
1139 goto closefiles; 1116 goto closefiles;
1140 prevfield = find_field (prevline); 1117 prevfield = find_field (prevline);
1141 prevlen = prevline->length - 1 - (prevfield - prevline->buffer); 1118 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1142+#if HAVE_MBRTOWC 1119+#if HAVE_MBRTOWC
1143+ prevstate = prevline->state; 1120+ prevstate = prevline->state;
1144+#endif 1121+#endif
1145 1122
1146 while (!feof (stdin)) 1123 while (!feof (stdin))
1147 { 1124 {
1148 bool match; 1125 bool match;
1149 char *thisfield; 1126 char *thisfield;
1150 size_t thislen; 1127 size_t thislen;
1151+#if HAVE_MBRTOWC 1128+#if HAVE_MBRTOWC
1152+ mbstate_t thisstate; 1129+ mbstate_t thisstate;
1153+#endif 1130+#endif
1154 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 1131 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
1155 { 1132 {
1156 if (ferror (stdin)) 1133 if (ferror (stdin))
1157@@ -341,6 +561,15 @@ 1134@@ -341,6 +561,15 @@
1158 } 1135 }
1159 thisfield = find_field (thisline); 1136 thisfield = find_field (thisline);
1160 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 1137 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1161+#if HAVE_MBRTOWC 1138+#if HAVE_MBRTOWC
1162+ if (MB_CUR_MAX > 1) 1139+ if (MB_CUR_MAX > 1)
1163+ { 1140+ {
1164+ thisstate = thisline->state; 1141+ thisstate = thisline->state;
1165+ match = !different_multi (thisfield, prevfield, 1142+ match = !different_multi (thisfield, prevfield,
1166+ thislen, prevlen, thisstate, prevstate); 1143+ thislen, prevlen, thisstate, prevstate);
1167+ } 1144+ }
1168+ else 1145+ else
1169+#endif 1146+#endif
1170 match = !different (thisfield, prevfield, thislen, prevlen); 1147 match = !different (thisfield, prevfield, thislen, prevlen);
1171 match_count += match; 1148 match_count += match;
1172 1149
1173@@ -373,6 +602,9 @@ 1150@@ -373,6 +602,9 @@
1174 SWAP_LINES (prevline, thisline); 1151 SWAP_LINES (prevline, thisline);
1175 prevfield = thisfield; 1152 prevfield = thisfield;
1176 prevlen = thislen; 1153 prevlen = thislen;
1177+#if HAVE_MBRTOWC 1154+#if HAVE_MBRTOWC
1178+ prevstate = thisstate; 1155+ prevstate = thisstate;
1179+#endif 1156+#endif
1180 if (!match) 1157 if (!match)
1181 match_count = 0; 1158 match_count = 0;
1182 } 1159 }
1183@@ -417,6 +649,19 @@ 1160@@ -417,6 +649,19 @@
1184 1161
1185 atexit (close_stdout); 1162 atexit (close_stdout);
1186 1163
1187+#if HAVE_MBRTOWC 1164+#if HAVE_MBRTOWC
1296 { 1273 {
1297- if (!count_bytes) 1274- if (!count_bytes)
1298+ if (operating_mode != byte_mode) 1275+ if (operating_mode != byte_mode)
1299 { 1276 {
1300 if (c == '\b') 1277 if (c == '\b')
1301 { 1278 {
1302@@ -121,30 +165,14 @@ 1279@@ -121,30 +165,14 @@
1303 to stdout, with maximum line length WIDTH. 1280 to stdout, with maximum line length WIDTH.
1304 Return true if successful. */ 1281 Return true if successful. */
1305 1282
1306-static bool 1283-static bool
1331- } 1308- }
1332 1309
1333 while ((c = getc (istream)) != EOF) 1310 while ((c = getc (istream)) != EOF)
1334 { 1311 {
1335@@ -172,6 +200,15 @@ 1312@@ -172,6 +200,15 @@
1336 bool found_blank = false; 1313 bool found_blank = false;
1337 size_t logical_end = offset_out; 1314 size_t logical_end = offset_out;
1338 1315
1339+ /* If LINE_OUT has no wide character, 1316+ /* If LINE_OUT has no wide character,
1340+ put a new wide character in LINE_OUT 1317+ put a new wide character in LINE_OUT
1341+ if column is bigger than width. */ 1318+ if column is bigger than width. */
1342+ if (offset_out == 0) 1319+ if (offset_out == 0)
1343+ { 1320+ {
1344+ line_out[offset_out++] = c; 1321+ line_out[offset_out++] = c;
1345+ continue; 1322+ continue;
1346+ } 1323+ }
1347+ 1324+
1348 /* Look for the last blank. */ 1325 /* Look for the last blank. */
1349 while (logical_end) 1326 while (logical_end)
1350 { 1327 {
1351@@ -218,11 +255,222 @@ 1328@@ -218,11 +255,222 @@
1352 line_out[offset_out++] = c; 1329 line_out[offset_out++] = c;
1353 } 1330 }
1354 1331
1355- saved_errno = errno; 1332- saved_errno = errno;
1363+#if HAVE_MBRTOWC 1340+#if HAVE_MBRTOWC
1364+static void 1341+static void
1365+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) 1342+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1366+{ 1343+{
1367+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1344+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1368+ size_t buflen = 0; /* The length of the byte sequence in buf. */ 1345+ size_t buflen = 0; /* The length of the byte sequence in buf. */
1369+ char *bufpos = NULL; /* Next read position of BUF. */ 1346+ char *bufpos = NULL; /* Next read position of BUF. */
1370+ wint_t wc; /* A gotten wide character. */ 1347+ wint_t wc; /* A gotten wide character. */
1371+ size_t mblength; /* The byte size of a multibyte character which shows 1348+ size_t mblength; /* The byte size of a multibyte character which shows
1372+ as same character as WC. */ 1349+ as same character as WC. */
1373+ mbstate_t state, state_bak; /* State of the stream. */ 1350+ mbstate_t state, state_bak; /* State of the stream. */
1374+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ 1351+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
1375+ 1352+
1376+ static char *line_out = NULL; 1353+ static char *line_out = NULL;
1377+ size_t offset_out = 0; /* Index in `line_out' for next char. */ 1354+ size_t offset_out = 0; /* Index in `line_out' for next char. */
1378+ static size_t allocated_out = 0; 1355+ static size_t allocated_out = 0;
1379+ 1356+
1380+ int increment; 1357+ int increment;
1381+ size_t column = 0; 1358+ size_t column = 0;
1382+ 1359+
1386+ int last_blank_increment = 0; 1363+ int last_blank_increment = 0;
1387+ int is_bs_following_last_blank; 1364+ int is_bs_following_last_blank;
1388+ size_t bs_following_last_blank_num; 1365+ size_t bs_following_last_blank_num;
1389+ int is_cr_after_last_blank; 1366+ int is_cr_after_last_blank;
1390+ 1367+
1391+#define CLEAR_FLAGS \ 1368+#define CLEAR_FLAGS \
1392+ do \ 1369+ do \
1393+ { \ 1370+ { \
1394+ last_blank_pos = 0; \ 1371+ last_blank_pos = 0; \
1395+ last_blank_column = 0; \ 1372+ last_blank_column = 0; \
1396+ is_blank_seen = 0; \ 1373+ is_blank_seen = 0; \
1397+ is_bs_following_last_blank = 0; \ 1374+ is_bs_following_last_blank = 0; \
1398+ bs_following_last_blank_num = 0; \ 1375+ bs_following_last_blank_num = 0; \
1399+ is_cr_after_last_blank = 0; \ 1376+ is_cr_after_last_blank = 0; \
1400+ } \ 1377+ } \
1401+ while (0) 1378+ while (0)
1402+ 1379+
1403+#define START_NEW_LINE \ 1380+#define START_NEW_LINE \
1404+ do \ 1381+ do \
1405+ { \ 1382+ { \
1406+ putchar ('\n'); \ 1383+ putchar ('\n'); \
1407+ column = 0; \ 1384+ column = 0; \
1408+ offset_out = 0; \ 1385+ offset_out = 0; \
1409+ CLEAR_FLAGS; \ 1386+ CLEAR_FLAGS; \
1410+ } \ 1387+ } \
1411+ while (0) 1388+ while (0)
1412+ 1389+
1413+ CLEAR_FLAGS; 1390+ CLEAR_FLAGS;
1414+ memset (&state, '\0', sizeof(mbstate_t)); 1391+ memset (&state, '\0', sizeof(mbstate_t));
1415+ 1392+
1416+ for (;; bufpos += mblength, buflen -= mblength) 1393+ for (;; bufpos += mblength, buflen -= mblength)
1417+ { 1394+ {
1418+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) 1395+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1419+ { 1396+ {
1420+ memmove (buf, bufpos, buflen); 1397+ memmove (buf, bufpos, buflen);
1421+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); 1398+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1422+ bufpos = buf; 1399+ bufpos = buf;
1423+ } 1400+ }
1424+ 1401+
1425+ if (buflen < 1) 1402+ if (buflen < 1)
1426+ break; 1403+ break;
1427+ 1404+
1428+ /* Get a wide character. */ 1405+ /* Get a wide character. */
1429+ convfail = 0; 1406+ convfail = 0;
1430+ state_bak = state; 1407+ state_bak = state;
1431+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); 1408+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1432+ 1409+
1433+ switch (mblength) 1410+ switch (mblength)
1434+ { 1411+ {
1435+ case (size_t)-1: 1412+ case (size_t)-1:
1436+ case (size_t)-2: 1413+ case (size_t)-2:
1437+ convfail++; 1414+ convfail++;
1438+ state = state_bak; 1415+ state = state_bak;
1439+ /* Fall through. */ 1416+ /* Fall through. */
1440+ 1417+
1441+ case 0: 1418+ case 0:
1442+ mblength = 1; 1419+ mblength = 1;
1443+ break; 1420+ break;
1444+ } 1421+ }
1445+ 1422+
1446+rescan: 1423+rescan:
1447+ if (operating_mode == byte_mode) /* byte mode */ 1424+ if (operating_mode == byte_mode) /* byte mode */
1448+ increment = mblength; 1425+ increment = mblength;
1449+ else if (operating_mode == character_mode) /* character mode */ 1426+ else if (operating_mode == character_mode) /* character mode */
1450+ increment = 1;
1451+ else /* column mode */
1452+ {
1453+ if (convfail)
1454+ increment = 1; 1427+ increment = 1;
1455+ else 1428+ else /* column mode */
1456+ { 1429+ {
1430+ if (convfail)
1431+ increment = 1;
1432+ else
1433+ {
1457+ switch (wc) 1434+ switch (wc)
1458+ { 1435+ {
1459+ case L'\n': 1436+ case L'\n':
1460+ fwrite (line_out, sizeof(char), offset_out, stdout); 1437+ fwrite (line_out, sizeof(char), offset_out, stdout);
1461+ START_NEW_LINE; 1438+ START_NEW_LINE;
1462+ continue; 1439+ continue;
1463+ 1440+
1464+ case L'\b': 1441+ case L'\b':
1465+ increment = (column > 0) ? -1 : 0; 1442+ increment = (column > 0) ? -1 : 0;
1466+ break; 1443+ break;
1467+ 1444+
1468+ case L'\r': 1445+ case L'\r':
1469+ increment = -1 * column; 1446+ increment = -1 * column;
1470+ break; 1447+ break;
1471+ 1448+
1472+ case L'\t': 1449+ case L'\t':
1473+ increment = 8 - column % 8; 1450+ increment = 8 - column % 8;
1474+ break; 1451+ break;
1475+ 1452+
1476+ default: 1453+ default:
1477+ increment = wcwidth (wc); 1454+ increment = wcwidth (wc);
1478+ increment = (increment < 0) ? 0 : increment; 1455+ increment = (increment < 0) ? 0 : increment;
1479+ } 1456+ }
1480+ } 1457+ }
1481+ } 1458+ }
1482+ 1459+
1483+ if (column + increment > width && break_spaces && last_blank_pos) 1460+ if (column + increment > width && break_spaces && last_blank_pos)
1484+ { 1461+ {
1485+ fwrite (line_out, sizeof(char), last_blank_pos, stdout); 1462+ fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1486+ putchar ('\n'); 1463+ putchar ('\n');
1487+ 1464+
1488+ offset_out = offset_out - last_blank_pos; 1465+ offset_out = offset_out - last_blank_pos;
1489+ column = column - last_blank_column + ((is_cr_after_last_blank) 1466+ column = column - last_blank_column + ((is_cr_after_last_blank)
1490+ ? last_blank_increment : bs_following_last_blank_num); 1467+ ? last_blank_increment : bs_following_last_blank_num);
1491+ memmove (line_out, line_out + last_blank_pos, offset_out); 1468+ memmove (line_out, line_out + last_blank_pos, offset_out);
1492+ CLEAR_FLAGS; 1469+ CLEAR_FLAGS;
1493+ goto rescan; 1470+ goto rescan;
1494+ } 1471+ }
1495+ 1472+
1496+ if (column + increment > width && column != 0) 1473+ if (column + increment > width && column != 0)
1497+ { 1474+ {
1498+ fwrite (line_out, sizeof(char), offset_out, stdout); 1475+ fwrite (line_out, sizeof(char), offset_out, stdout);
1499+ START_NEW_LINE; 1476+ START_NEW_LINE;
1500+ goto rescan; 1477+ goto rescan;
1501+ } 1478+ }
1502+ 1479+
1503+ if (allocated_out < offset_out + mblength) 1480+ if (allocated_out < offset_out + mblength)
1504+ { 1481+ {
1505+ line_out = X2REALLOC (line_out, &allocated_out); 1482+ line_out = X2REALLOC (line_out, &allocated_out);
1506+ } 1483+ }
1507+ 1484+
1508+ memcpy (line_out + offset_out, bufpos, mblength); 1485+ memcpy (line_out + offset_out, bufpos, mblength);
1509+ offset_out += mblength; 1486+ offset_out += mblength;
1510+ column += increment; 1487+ column += increment;
1511+ 1488+
1512+ if (is_blank_seen && !convfail && wc == L'\r') 1489+ if (is_blank_seen && !convfail && wc == L'\r')
1513+ is_cr_after_last_blank = 1; 1490+ is_cr_after_last_blank = 1;
1514+ 1491+
1515+ if (is_bs_following_last_blank && !convfail && wc == L'\b') 1492+ if (is_bs_following_last_blank && !convfail && wc == L'\b')
1516+ ++bs_following_last_blank_num; 1493+ ++bs_following_last_blank_num;
1517+ else 1494+ else
1518+ is_bs_following_last_blank = 0; 1495+ is_bs_following_last_blank = 0;
1519+ 1496+
1520+ if (break_spaces && !convfail && iswblank (wc)) 1497+ if (break_spaces && !convfail && iswblank (wc))
1521+ { 1498+ {
1522+ last_blank_pos = offset_out; 1499+ last_blank_pos = offset_out;
1523+ last_blank_column = column; 1500+ last_blank_column = column;
1524+ is_blank_seen = 1; 1501+ is_blank_seen = 1;
1525+ last_blank_increment = increment; 1502+ last_blank_increment = increment;
1526+ is_bs_following_last_blank = 1; 1503+ is_bs_following_last_blank = 1;
1527+ bs_following_last_blank_num = 0; 1504+ bs_following_last_blank_num = 0;
1528+ is_cr_after_last_blank = 0; 1505+ is_cr_after_last_blank = 0;
1529+ } 1506+ }
1530+ } 1507+ }
1531+ 1508+
1532+ *saved_errno = errno; 1509+ *saved_errno = errno;
1533 1510
1534 if (offset_out) 1511 if (offset_out)
1582 1559
1583 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 1560 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1584 { 1561 {
1585@@ -264,7 +516,15 @@ 1562@@ -264,7 +516,15 @@
1586 switch (optc) 1563 switch (optc)
1587 { 1564 {
1588 case 'b': /* Count bytes rather than columns. */ 1565 case 'b': /* Count bytes rather than columns. */
1589- count_bytes = true; 1566- count_bytes = true;
1590+ if (operating_mode != column_mode) 1567+ if (operating_mode != column_mode)
1591+ FATAL_ERROR (_("only one way of folding may be specified")); 1568+ FATAL_ERROR (_("only one way of folding may be specified"));
1592+ operating_mode = byte_mode; 1569+ operating_mode = byte_mode;
1593+ break; 1570+ break;
1594+ 1571+
1595+ case 'c': 1572+ case 'c':
1596+ if (operating_mode != column_mode) 1573+ if (operating_mode != column_mode)
1597+ FATAL_ERROR (_("only one way of folding may be specified")); 1574+ FATAL_ERROR (_("only one way of folding may be specified"));
1598+ operating_mode = character_mode; 1575+ operating_mode = character_mode;
1599 break; 1576 break;
1600 1577
1601 case 's': /* Break at word boundaries. */ 1578 case 's': /* Break at word boundaries. */
1602--- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000 1579--- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1603+++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000 1580+++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1604@@ -23,10 +23,19 @@ 1581@@ -23,10 +23,19 @@
1605 1582
1606 #include <config.h> 1583 #include <config.h>
1635 #endif 1612 #endif
1636 1613
1637 #define NONZERO(x) ((x) != 0) 1614 #define NONZERO(x) ((x) != 0)
1638 1615
1639+/* get a multibyte character's byte length. */ 1616+/* get a multibyte character's byte length. */
1640+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ 1617+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1641+ do \ 1618+ do \
1642+ { \ 1619+ { \
1643+ wchar_t wc; \ 1620+ wchar_t wc; \
1644+ mbstate_t state_bak; \ 1621+ mbstate_t state_bak; \
1645+ \ 1622+ \
1646+ state_bak = STATE; \ 1623+ state_bak = STATE; \
1647+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ 1624+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1648+ \ 1625+ \
1649+ switch (MBLENGTH) \ 1626+ switch (MBLENGTH) \
1650+ { \ 1627+ { \
1651+ case (size_t)-1: \ 1628+ case (size_t)-1: \
1652+ case (size_t)-2: \ 1629+ case (size_t)-2: \
1653+ STATE = state_bak; \ 1630+ STATE = state_bak; \
1654+ /* Fall through. */ \ 1631+ /* Fall through. */ \
1655+ case 0: \ 1632+ case 0: \
1656+ MBLENGTH = 1; \ 1633+ MBLENGTH = 1; \
1657+ } \ 1634+ } \
1658+ } \ 1635+ } \
1659+ while (0) 1636+ while (0)
1660+ 1637+
1661 /* The kind of blanks for '-b' to skip in various options. */ 1638 /* The kind of blanks for '-b' to skip in various options. */
1662 enum blanktype { bl_start, bl_end, bl_both }; 1639 enum blanktype { bl_start, bl_end, bl_both };
1663 1640
1774+ 1751+
1775+ memset (&state_mb, '\0', sizeof (mbstate_t)); 1752+ memset (&state_mb, '\0', sizeof (mbstate_t));
1776+ memset (&state_wc, '\0', sizeof (mbstate_t)); 1753+ memset (&state_wc, '\0', sizeof (mbstate_t));
1777+ 1754+
1778+ for (j = 0; j < s_len;) 1755+ for (j = 0; j < s_len;)
1779+ { 1756+ {
1780+ if (!ismbblank (s + j, s_len - j, &mblength)) 1757+ if (!ismbblank (s + j, s_len - j, &mblength))
1781+ break; 1758+ break;
1782+ j += mblength; 1759+ j += mblength;
1783+ } 1760+ }
1784+ 1761+
1785+ for (k = 0; j < s_len;) 1762+ for (k = 0; j < s_len;)
1786+ { 1763+ {
1787+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); 1764+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1788+ assert (mblength != (size_t)-1 && mblength != (size_t)-2); 1765+ assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1789+ if (mblength == 0) 1766+ if (mblength == 0)
1790+ break; 1767+ break;
1791+ 1768+
1792+ pwc = towupper (wc); 1769+ pwc = towupper (wc);
1793+ if (pwc == wc) 1770+ if (pwc == wc)
1794+ { 1771+ {
1795+ memcpy (mbc, s + j, mblength); 1772+ memcpy (mbc, s + j, mblength);
1796+ j += mblength; 1773+ j += mblength;
1797+ } 1774+ }
1798+ else 1775+ else
1799+ { 1776+ {
1800+ j += mblength; 1777+ j += mblength;
1801+ mblength = wcrtomb (mbc, pwc, &state_wc); 1778+ mblength = wcrtomb (mbc, pwc, &state_wc);
1802+ assert (mblength != (size_t)0 && mblength != (size_t)-1); 1779+ assert (mblength != (size_t)0 && mblength != (size_t)-1);
1803+ } 1780+ }
1804+ 1781+
1805+ for (l = 0; l < mblength; l++) 1782+ for (l = 0; l < mblength; l++)
1806+ name[k++] = mbc[l]; 1783+ name[k++] = mbc[l];
1807+ } 1784+ }
1808+ name[k] = '\0'; 1785+ name[k] = '\0';
1809+ } 1786+ }
1810+ qsort ((void *) monthtab, MONTHS_PER_YEAR, 1787+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
1811+ sizeof (struct month), struct_month_cmp); 1788+ sizeof (struct month), struct_month_cmp);
1812+} 1789+}
1830 1807
1831- if (tab != TAB_DEFAULT) 1808- if (tab != TAB_DEFAULT)
1832+ if (tab_length) 1809+ if (tab_length)
1833 while (ptr < lim && sword--) 1810 while (ptr < lim && sword--)
1834 { 1811 {
1835- while (ptr < lim && *ptr != tab) 1812- while (ptr < lim && *ptr != tab)
1836+ while (ptr < lim && *ptr != tab[0]) 1813+ while (ptr < lim && *ptr != tab[0])
1837 ++ptr; 1814 ++ptr;
1838 if (ptr < lim) 1815 if (ptr < lim)
1839 ++ptr; 1816 ++ptr;
1840@@ -1282,11 +1409,70 @@ 1817@@ -1282,11 +1409,70 @@
1841 return ptr; 1818 return ptr;
1842 } 1819 }
1843 1820
1844+#if HAVE_MBRTOWC 1821+#if HAVE_MBRTOWC
1855+ memset (&state, '\0', sizeof(mbstate_t)); 1832+ memset (&state, '\0', sizeof(mbstate_t));
1856+ 1833+
1857+ if (tab_length) 1834+ if (tab_length)
1858+ while (ptr < lim && sword--) 1835+ while (ptr < lim && sword--)
1859+ { 1836+ {
1860+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1837+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1861+ { 1838+ {
1862+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1839+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1863+ ptr += mblength; 1840+ ptr += mblength;
1864+ } 1841+ }
1865+ if (ptr < lim) 1842+ if (ptr < lim)
1866+ { 1843+ {
1867+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1844+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1868+ ptr += mblength; 1845+ ptr += mblength;
1869+ } 1846+ }
1870+ } 1847+ }
1871+ else 1848+ else
1872+ while (ptr < lim && sword--) 1849+ while (ptr < lim && sword--)
1873+ { 1850+ {
1874+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1851+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1875+ ptr += mblength; 1852+ ptr += mblength;
1876+ if (ptr < lim) 1853+ if (ptr < lim)
1877+ { 1854+ {
1878+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1855+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1879+ ptr += mblength; 1856+ ptr += mblength;
1880+ } 1857+ }
1881+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1858+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1882+ ptr += mblength; 1859+ ptr += mblength;
1883+ } 1860+ }
1884+ 1861+
1885+ if (key->skipsblanks) 1862+ if (key->skipsblanks)
1886+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1863+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1887+ ptr += mblength; 1864+ ptr += mblength;
1889+ for (i = 0; i < schar; i++) 1866+ for (i = 0; i < schar; i++)
1890+ { 1867+ {
1891+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1868+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1892+ 1869+
1893+ if (ptr + mblength > lim) 1870+ if (ptr + mblength > lim)
1894+ break; 1871+ break;
1895+ else 1872+ else
1896+ ptr += mblength; 1873+ ptr += mblength;
1897+ } 1874+ }
1898+ 1875+
1899+ return ptr; 1876+ return ptr;
1900+} 1877+}
1901+#endif 1878+#endif
1915 the preceding field. */ 1892 the preceding field. */
1916- if (tab != TAB_DEFAULT) 1893- if (tab != TAB_DEFAULT)
1917+ if (tab_length) 1894+ if (tab_length)
1918 while (ptr < lim && eword--) 1895 while (ptr < lim && eword--)
1919 { 1896 {
1920- while (ptr < lim && *ptr != tab) 1897- while (ptr < lim && *ptr != tab)
1921+ while (ptr < lim && *ptr != tab[0]) 1898+ while (ptr < lim && *ptr != tab[0])
1922 ++ptr; 1899 ++ptr;
1923 if (ptr < lim && (eword | echar)) 1900 if (ptr < lim && (eword | echar))
1924 ++ptr; 1901 ++ptr;
1925@@ -1348,10 +1534,10 @@ 1902@@ -1348,10 +1534,10 @@
1926 */ 1903 */
1927 1904
1928 /* Make LIM point to the end of (one byte past) the current field. */ 1905 /* Make LIM point to the end of (one byte past) the current field. */
1929- if (tab != TAB_DEFAULT) 1906- if (tab != TAB_DEFAULT)
1931 { 1908 {
1932 char *newlim; 1909 char *newlim;
1933- newlim = memchr (ptr, tab, lim - ptr); 1910- newlim = memchr (ptr, tab, lim - ptr);
1934+ newlim = memchr (ptr, tab[0], lim - ptr); 1911+ newlim = memchr (ptr, tab[0], lim - ptr);
1935 if (newlim) 1912 if (newlim)
1936 lim = newlim; 1913 lim = newlim;
1937 } 1914 }
1938@@ -1384,6 +1570,113 @@ 1915@@ -1384,6 +1570,113 @@
1939 return ptr; 1916 return ptr;
1940 } 1917 }
1941 1918
1955+ memset (&state, '\0', sizeof(mbstate_t)); 1932+ memset (&state, '\0', sizeof(mbstate_t));
1956+ 1933+
1957+ if (tab_length) 1934+ if (tab_length)
1958+ while (ptr < lim && eword--) 1935+ while (ptr < lim && eword--)
1959+ { 1936+ {
1960+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1937+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1961+ { 1938+ {
1962+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1939+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1963+ ptr += mblength; 1940+ ptr += mblength;
1964+ } 1941+ }
1965+ if (ptr < lim && (eword | echar)) 1942+ if (ptr < lim && (eword | echar))
1966+ { 1943+ {
1967+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1944+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1968+ ptr += mblength; 1945+ ptr += mblength;
1969+ } 1946+ }
1970+ } 1947+ }
1971+ else 1948+ else
1972+ while (ptr < lim && eword--) 1949+ while (ptr < lim && eword--)
1973+ { 1950+ {
1974+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1951+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1975+ ptr += mblength; 1952+ ptr += mblength;
1976+ if (ptr < lim) 1953+ if (ptr < lim)
1977+ { 1954+ {
1978+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1955+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1979+ ptr += mblength; 1956+ ptr += mblength;
1980+ } 1957+ }
1981+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1958+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1982+ ptr += mblength; 1959+ ptr += mblength;
1983+ } 1960+ }
1984+ 1961+
1985+ 1962+
1986+# ifdef POSIX_UNSPECIFIED 1963+# ifdef POSIX_UNSPECIFIED
1987+ /* Make LIM point to the end of (one byte past) the current field. */ 1964+ /* Make LIM point to the end of (one byte past) the current field. */
1989+ { 1966+ {
1990+ char *newlim, *p; 1967+ char *newlim, *p;
1991+ 1968+
1992+ newlim = NULL; 1969+ newlim = NULL;
1993+ for (p = ptr; p < lim;) 1970+ for (p = ptr; p < lim;)
1994+ { 1971+ {
1995+ if (memcmp (p, tab, tab_length) == 0) 1972+ if (memcmp (p, tab, tab_length) == 0)
1996+ { 1973+ {
1997+ newlim = p; 1974+ newlim = p;
1998+ break; 1975+ break;
1999+ } 1976+ }
2000+ 1977+
2001+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1978+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2002+ p += mblength; 1979+ p += mblength;
2003+ } 1980+ }
2004+ } 1981+ }
2005+ else 1982+ else
2006+ { 1983+ {
2007+ char *newlim; 1984+ char *newlim;
2008+ newlim = ptr; 1985+ newlim = ptr;
2009+ 1986+
2010+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) 1987+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2011+ newlim += mblength; 1988+ newlim += mblength;
2012+ if (ptr < lim) 1989+ if (ptr < lim)
2013+ { 1990+ {
2014+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1991+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2015+ ptr += mblength; 1992+ ptr += mblength;
2016+ } 1993+ }
2017+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) 1994+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2018+ newlim += mblength; 1995+ newlim += mblength;
2019+ lim = newlim; 1996+ lim = newlim;
2020+ } 1997+ }
2021+# endif 1998+# endif
2022+ 1999+
2023+ if (echar != 0) 2000+ if (echar != 0)
2034+ for (i = 0; i < echar; i++) 2011+ for (i = 0; i < echar; i++)
2035+ { 2012+ {
2036+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 2013+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2037+ 2014+
2038+ if (ptr + mblength > lim) 2015+ if (ptr + mblength > lim)
2039+ break; 2016+ break;
2040+ else 2017+ else
2041+ ptr += mblength; 2018+ ptr += mblength;
2042+ } 2019+ }
2043+ } 2020+ }
2044+ 2021+
2045+ return ptr; 2022+ return ptr;
2046+} 2023+}
2048+ 2025+
2049 /* Fill BUF reading from FP, moving buf->left bytes from the end 2026 /* Fill BUF reading from FP, moving buf->left bytes from the end
2050 of buf->buf to the beginning first. If EOF is reached and the 2027 of buf->buf to the beginning first. If EOF is reached and the
2051 file wasn't terminated by a newline, supply one. Set up BUF's line 2028 file wasn't terminated by a newline, supply one. Set up BUF's line
2052@@ -1466,8 +1753,24 @@ 2029@@ -1466,8 +1753,24 @@
2053 else 2030 else
2054 { 2031 {
2055 if (key->skipsblanks) 2032 if (key->skipsblanks)
2056- while (blanks[to_uchar (*line_start)]) 2033- while (blanks[to_uchar (*line_start)])
2057- line_start++; 2034- line_start++;
2058+ { 2035+ {
2059+#if HAVE_MBRTOWC 2036+#if HAVE_MBRTOWC
2060+ if (MB_CUR_MAX > 1) 2037+ if (MB_CUR_MAX > 1)
2061+ { 2038+ {
2062+ size_t mblength; 2039+ size_t mblength;
2063+ mbstate_t state; 2040+ mbstate_t state;
2064+ memset (&state, '\0', sizeof(mbstate_t)); 2041+ memset (&state, '\0', sizeof(mbstate_t));
2065+ while (line_start < line->keylim && 2042+ while (line_start < line->keylim &&
2066+ ismbblank (line_start, 2043+ ismbblank (line_start,
2067+ line->keylim - line_start, 2044+ line->keylim - line_start,
2068+ &mblength)) 2045+ &mblength))
2069+ line_start += mblength; 2046+ line_start += mblength;
2070+ } 2047+ }
2071+ else 2048+ else
2072+#endif 2049+#endif
2073+ while (blanks[to_uchar (*line_start)]) 2050+ while (blanks[to_uchar (*line_start)])
2074+ line_start++; 2051+ line_start++;
2075+ } 2052+ }
2076 line->keybeg = line_start; 2053 line->keybeg = line_start;
2077 } 2054 }
2078 } 2055 }
2079@@ -1500,7 +1803,7 @@ 2056@@ -1500,7 +1803,7 @@
2080 hideously fast. */ 2057 hideously fast. */
2081 2058
2082 static int 2059 static int
2083-numcompare (const char *a, const char *b) 2060-numcompare (const char *a, const char *b)
2084+numcompare_uni (const char *a, const char *b) 2061+numcompare_uni (const char *a, const char *b)
2085 { 2062 {
2086 while (blanks[to_uchar (*a)]) 2063 while (blanks[to_uchar (*a)])
2087 a++; 2064 a++;
2088@@ -1510,6 +1813,25 @@ 2065@@ -1510,6 +1813,25 @@
2089 : strnumcmp (a, b, decimal_point, thousands_sep)); 2066 : strnumcmp (a, b, decimal_point, thousands_sep));
2090 } 2067 }
2091 2068
2092+#if HAVE_MBRTOWC 2069+#if HAVE_MBRTOWC
2093+static int 2070+static int
2094+numcompare_mb (const char *a, const char *b) 2071+numcompare_mb (const char *a, const char *b)
2161+ 2138+
2162+ for (i = 0; i < wclength; i++) 2139+ for (i = 0; i < wclength; i++)
2163+ { 2140+ {
2164+ month_wcs[i] = towupper(month_wcs[i]); 2141+ month_wcs[i] = towupper(month_wcs[i]);
2165+ if (iswblank (month_wcs[i])) 2142+ if (iswblank (month_wcs[i]))
2166+ { 2143+ {
2167+ month_wcs[i] = L'\0'; 2144+ month_wcs[i] = L'\0';
2168+ break; 2145+ break;
2169+ } 2146+ }
2170+ } 2147+ }
2171+ 2148+
2172+ wpp = (const wchar_t **)&month_wcs; 2149+ wpp = (const wchar_t **)&month_wcs;
2173+ 2150+
2174+ mblength = wcsrtombs (month, wpp, len + 1, &state); 2151+ mblength = wcsrtombs (month, wpp, len + 1, &state);
2177+ do 2154+ do
2178+ { 2155+ {
2179+ int ix = (lo + hi) / 2; 2156+ int ix = (lo + hi) / 2;
2180+ 2157+
2181+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) 2158+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
2182+ hi = ix; 2159+ hi = ix;
2183+ else 2160+ else
2184+ lo = ix; 2161+ lo = ix;
2185+ } 2162+ }
2186+ while (hi - lo > 1); 2163+ while (hi - lo > 1);
2187+ 2164+
2188+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) 2165+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
2189+ ? monthtab[lo].val : 0); 2166+ ? monthtab[lo].val : 0);
2238+ 2215+
2239+ /* Actually compare the fields. */ 2216+ /* Actually compare the fields. */
2240+ if (key->random) 2217+ if (key->random)
2241+ diff = compare_random (texta, lena, textb, lenb); 2218+ diff = compare_random (texta, lena, textb, lenb);
2242+ else if (key->numeric | key->general_numeric | key->human_numeric) 2219+ else if (key->numeric | key->general_numeric | key->human_numeric)
2243+ { 2220+ {
2244+ char savea = *lima, saveb = *limb; 2221+ char savea = *lima, saveb = *limb;
2245+ 2222+
2246+ *lima = *limb = '\0'; 2223+ *lima = *limb = '\0';
2247+ diff = (key->numeric ? numcompare (texta, textb) 2224+ diff = (key->numeric ? numcompare (texta, textb)
2248+ : key->general_numeric ? general_numcompare (texta, textb) 2225+ : key->general_numeric ? general_numcompare (texta, textb)
2249+ : human_numcompare (texta, textb, key)); 2226+ : human_numcompare (texta, textb, key));
2250+ *lima = savea, *limb = saveb; 2227+ *lima = savea, *limb = saveb;
2251+ } 2228+ }
2252+ else if (key->version) 2229+ else if (key->version)
2253+ diff = compare_version (texta, lena, textb, lenb); 2230+ diff = compare_version (texta, lena, textb, lenb);
2254+ else if (key->month) 2231+ else if (key->month)
2255+ diff = getmonth (texta, lena) - getmonth (textb, lenb); 2232+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
2256+ else 2233+ else
2257+ { 2234+ {
2258+ if (ignore || translate) 2235+ if (ignore || translate)
2259+ { 2236+ {
2260+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1); 2237+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
2261+ char *copy_b = copy_a + lena + 1; 2238+ char *copy_b = copy_a + lena + 1;
2262+ size_t new_len_a, new_len_b; 2239+ size_t new_len_a, new_len_b;
2263+ size_t i, j; 2240+ size_t i, j;
2264+ 2241+
2265+ /* Ignore and/or translate chars before comparing. */ 2242+ /* Ignore and/or translate chars before comparing. */
2266+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ 2243+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
2267+ do \ 2244+ do \
2268+ { \ 2245+ { \
2269+ wchar_t uwc; \ 2246+ wchar_t uwc; \
2270+ char mbc[MB_LEN_MAX]; \ 2247+ char mbc[MB_LEN_MAX]; \
2271+ mbstate_t state_wc; \ 2248+ mbstate_t state_wc; \
2272+ \ 2249+ \
2273+ for (NEW_LEN = i = 0; i < LEN;) \ 2250+ for (NEW_LEN = i = 0; i < LEN;) \
2274+ { \ 2251+ { \
2275+ mbstate_t state_bak; \ 2252+ mbstate_t state_bak; \
2276+ \ 2253+ \
2277+ state_bak = STATE; \ 2254+ state_bak = STATE; \
2278+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ 2255+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
2279+ \ 2256+ \
2280+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ 2257+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
2281+ || MBLENGTH == 0) \ 2258+ || MBLENGTH == 0) \
2282+ { \ 2259+ { \
2283+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ 2260+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
2284+ STATE = state_bak; \ 2261+ STATE = state_bak; \
2285+ if (!ignore) \ 2262+ if (!ignore) \
2286+ COPY[NEW_LEN++] = TEXT[i++]; \ 2263+ COPY[NEW_LEN++] = TEXT[i++]; \
2287+ continue; \ 2264+ continue; \
2288+ } \ 2265+ } \
2289+ \ 2266+ \
2290+ if (ignore) \ 2267+ if (ignore) \
2291+ { \ 2268+ { \
2292+ if ((ignore == nonprinting && !iswprint (WC)) \ 2269+ if ((ignore == nonprinting && !iswprint (WC)) \
2293+ || (ignore == nondictionary \ 2270+ || (ignore == nondictionary \
2294+ && !iswalnum (WC) && !iswblank (WC))) \ 2271+ && !iswalnum (WC) && !iswblank (WC))) \
2295+ { \ 2272+ { \
2296+ i += MBLENGTH; \ 2273+ i += MBLENGTH; \
2297+ continue; \ 2274+ continue; \
2298+ } \ 2275+ } \
2299+ } \ 2276+ } \
2300+ \ 2277+ \
2301+ if (translate) \ 2278+ if (translate) \
2302+ { \ 2279+ { \
2303+ \ 2280+ \
2304+ uwc = towupper(WC); \ 2281+ uwc = towupper(WC); \
2305+ if (WC == uwc) \ 2282+ if (WC == uwc) \
2306+ { \ 2283+ { \
2307+ memcpy (mbc, TEXT + i, MBLENGTH); \ 2284+ memcpy (mbc, TEXT + i, MBLENGTH); \
2308+ i += MBLENGTH; \ 2285+ i += MBLENGTH; \
2309+ } \ 2286+ } \
2310+ else \ 2287+ else \
2311+ { \ 2288+ { \
2312+ i += MBLENGTH; \ 2289+ i += MBLENGTH; \
2313+ WC = uwc; \ 2290+ WC = uwc; \
2314+ memset (&state_wc, '\0', sizeof (mbstate_t)); \ 2291+ memset (&state_wc, '\0', sizeof (mbstate_t)); \
2315+ \ 2292+ \
2316+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ 2293+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
2317+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ 2294+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
2318+ } \ 2295+ } \
2319+ \ 2296+ \
2320+ for (j = 0; j < MBLENGTH; j++) \ 2297+ for (j = 0; j < MBLENGTH; j++) \
2321+ COPY[NEW_LEN++] = mbc[j]; \ 2298+ COPY[NEW_LEN++] = mbc[j]; \
2322+ } \ 2299+ } \
2323+ else \ 2300+ else \
2324+ for (j = 0; j < MBLENGTH; j++) \ 2301+ for (j = 0; j < MBLENGTH; j++) \
2325+ COPY[NEW_LEN++] = TEXT[i++]; \ 2302+ COPY[NEW_LEN++] = TEXT[i++]; \
2326+ } \ 2303+ } \
2327+ COPY[NEW_LEN] = '\0'; \ 2304+ COPY[NEW_LEN] = '\0'; \
2328+ } \ 2305+ } \
2329+ while (0) 2306+ while (0)
2330+ IGNORE_CHARS (new_len_a, lena, texta, copy_a, 2307+ IGNORE_CHARS (new_len_a, lena, texta, copy_a,
2331+ wc_a, mblength_a, state_a); 2308+ wc_a, mblength_a, state_a);
2332+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, 2309+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
2333+ wc_b, mblength_b, state_b); 2310+ wc_b, mblength_b, state_b);
2334+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 2311+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2335+ } 2312+ }
2336+ else if (lena == 0) 2313+ else if (lena == 0)
2337+ diff = - NONZERO (lenb); 2314+ diff = - NONZERO (lenb);
2338+ else if (lenb == 0) 2315+ else if (lenb == 0)
2339+ goto greater; 2316+ goto greater;
2340+ else 2317+ else
2341+ diff = xmemcoll (texta, lena, textb, lenb); 2318+ diff = xmemcoll (texta, lena, textb, lenb);
2342+ } 2319+ }
2343+ 2320+
2344+ if (diff) 2321+ if (diff)
2345+ goto not_equal; 2322+ goto not_equal;
2346+ 2323+
2347+ key = key->next; 2324+ key = key->next;
2348+ if (! key) 2325+ if (! key)
2349+ break; 2326+ break;
2350+ 2327+
2351+ /* Find the beginning and limit of the next field. */ 2328+ /* Find the beginning and limit of the next field. */
2352+ if (key->eword != -1) 2329+ if (key->eword != -1)
2353+ lima = limfield (a, key), limb = limfield (b, key); 2330+ lima = limfield (a, key), limb = limfield (b, key);
2354+ else 2331+ else
2355+ lima = a->text + a->length - 1, limb = b->text + b->length - 1; 2332+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2356+ 2333+
2357+ if (key->sword != -1) 2334+ if (key->sword != -1)
2358+ texta = begfield (a, key), textb = begfield (b, key); 2335+ texta = begfield (a, key), textb = begfield (b, key);
2359+ else 2336+ else
2360+ { 2337+ {
2361+ texta = a->text, textb = b->text; 2338+ texta = a->text, textb = b->text;
2362+ if (key->skipsblanks) 2339+ if (key->skipsblanks)
2363+ { 2340+ {
2364+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) 2341+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
2365+ texta += mblength_a; 2342+ texta += mblength_a;
2366+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) 2343+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
2367+ textb += mblength_b; 2344+ textb += mblength_b;
2368+ } 2345+ }
2369+ } 2346+ }
2370+ } 2347+ }
2371+ 2348+
2372+ return 0; 2349+ return 0;
2373+ 2350+
2374+greater: 2351+greater:
2418 have_read_stdin = false; 2395 have_read_stdin = false;
2419 inittables (); 2396 inittables ();
2420 2397
2421@@ -3015,13 +3599,35 @@ 2398@@ -3015,13 +3599,35 @@
2422 2399
2423 case 't': 2400 case 't':
2424 { 2401 {
2425- char newtab = optarg[0]; 2402- char newtab = optarg[0];
2426- if (! newtab) 2403- if (! newtab)
2427+ char newtab[MB_LEN_MAX + 1]; 2404+ char newtab[MB_LEN_MAX + 1];
2428+ size_t newtab_length = 1; 2405+ size_t newtab_length = 1;
2429+ strncpy (newtab, optarg, MB_LEN_MAX); 2406+ strncpy (newtab, optarg, MB_LEN_MAX);
2430+ if (! newtab[0]) 2407+ if (! newtab[0])
2431 error (SORT_FAILURE, 0, _("empty tab")); 2408 error (SORT_FAILURE, 0, _("empty tab"));
2432- if (optarg[1]) 2409- if (optarg[1])
2433+#if HAVE_MBRTOWC 2410+#if HAVE_MBRTOWC
2434+ if (MB_CUR_MAX > 1) 2411+ if (MB_CUR_MAX > 1)
2435+ { 2412+ {
2436+ wchar_t wc; 2413+ wchar_t wc;
2437+ mbstate_t state; 2414+ mbstate_t state;
2438+ size_t i; 2415+ size_t i;
2439+ 2416+
2440+ memset (&state, '\0', sizeof (mbstate_t)); 2417+ memset (&state, '\0', sizeof (mbstate_t));
2441+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, 2418+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
2442+ MB_LEN_MAX), 2419+ MB_LEN_MAX),
2443+ &state); 2420+ &state);
2444+ switch (newtab_length) 2421+ switch (newtab_length)
2445+ { 2422+ {
2446+ case (size_t) -1: 2423+ case (size_t) -1:
2447+ case (size_t) -2: 2424+ case (size_t) -2:
2448+ case 0: 2425+ case 0:
2449+ newtab_length = 1; 2426+ newtab_length = 1;
2450+ } 2427+ }
2451+ } 2428+ }
2452+#endif 2429+#endif
2453+ if (newtab_length == 1 && optarg[1]) 2430+ if (newtab_length == 1 && optarg[1])
2454 { 2431 {
2455 if (STREQ (optarg, "\\0")) 2432 if (STREQ (optarg, "\\0"))
2456- newtab = '\0'; 2433- newtab = '\0';
2457+ newtab[0] = '\0'; 2434+ newtab[0] = '\0';
2458 else 2435 else
2459 { 2436 {
2460 /* Provoke with `sort -txx'. Complain about 2437 /* Provoke with `sort -txx'. Complain about
2461@@ -3032,9 +3638,12 @@ 2438@@ -3032,9 +3638,12 @@
2462 quote (optarg)); 2439 quote (optarg));
2463 } 2440 }
2464 } 2441 }
2465- if (tab != TAB_DEFAULT && tab != newtab) 2442- if (tab != TAB_DEFAULT && tab != newtab)
2466+ if (tab_length 2443+ if (tab_length
2467+ && (tab_length != newtab_length 2444+ && (tab_length != newtab_length
2468+ || memcmp (tab, newtab, tab_length) != 0)) 2445+ || memcmp (tab, newtab, tab_length) != 0))
2469 error (SORT_FAILURE, 0, _("incompatible tabs")); 2446 error (SORT_FAILURE, 0, _("incompatible tabs"));
2470- tab = newtab; 2447- tab = newtab;
2471+ memcpy (tab, newtab, newtab_length); 2448+ memcpy (tab, newtab, newtab_length);
2472+ tab_length = newtab_length; 2449+ tab_length = newtab_length;
2473 } 2450 }
2474 break; 2451 break;
2475 2452
2476--- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000 2453--- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000
2477+++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000 2454+++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000
2478@@ -39,11 +39,28 @@ 2455@@ -39,11 +39,28 @@
2479 #include <stdio.h> 2456 #include <stdio.h>
2805 static bool print_stored (COLUMN *p); 2782 static bool print_stored (COLUMN *p);
2806@@ -426,6 +477,7 @@ 2783@@ -426,6 +477,7 @@
2807 static void pad_across_to (int position); 2784 static void pad_across_to (int position);
2808 static void add_line_number (COLUMN *p); 2785 static void add_line_number (COLUMN *p);
2809 static void getoptarg (char *arg, char switch_char, char *character, 2786 static void getoptarg (char *arg, char switch_char, char *character,
2810+ int *character_length, int *character_width, 2787+ int *character_length, int *character_width,
2811 int *number); 2788 int *number);
2812 void usage (int status); 2789 void usage (int status);
2813 static void print_files (int number_of_files, char **av); 2790 static void print_files (int number_of_files, char **av);
2814@@ -440,7 +492,6 @@ 2791@@ -440,7 +492,6 @@
2815 static void pad_down (int lines); 2792 static void pad_down (int lines);
2816 static void read_rest_of_line (COLUMN *p); 2793 static void read_rest_of_line (COLUMN *p);
2905+ char_to_clump = char_to_clump_single; 2882+ char_to_clump = char_to_clump_single;
2906+ } 2883+ }
2907+ 2884+
2908 n_files = 0; 2885 n_files = 0;
2909 file_names = (argc > 1 2886 file_names = (argc > 1
2910 ? xmalloc ((argc - 1) * sizeof (char *)) 2887 ? xmalloc ((argc - 1) * sizeof (char *))
2911@@ -949,8 +1032,12 @@ 2888@@ -949,8 +1032,12 @@
2912 break; 2889 break;
2913 case 'e': 2890 case 'e':
2914 if (optarg) 2891 if (optarg)
2915- getoptarg (optarg, 'e', &input_tab_char, 2892- getoptarg (optarg, 'e', &input_tab_char,
2916- &chars_per_input_tab); 2893- &chars_per_input_tab);
2917+ { 2894+ {
2918+ int dummy_length, dummy_width; 2895+ int dummy_length, dummy_width;
2919+ 2896+
2920+ getoptarg (optarg, 'e', input_tab_char, &dummy_length, 2897+ getoptarg (optarg, 'e', input_tab_char, &dummy_length,
2921+ &dummy_width, &chars_per_input_tab); 2898+ &dummy_width, &chars_per_input_tab);
2922+ } 2899+ }
2923 /* Could check tab width > 0. */ 2900 /* Could check tab width > 0. */
2924 untabify_input = true; 2901 untabify_input = true;
2925 break; 2902 break;
2926@@ -963,8 +1050,12 @@ 2903@@ -963,8 +1050,12 @@
2927 break; 2904 break;
2928 case 'i': 2905 case 'i':
2929 if (optarg) 2906 if (optarg)
2930- getoptarg (optarg, 'i', &output_tab_char, 2907- getoptarg (optarg, 'i', &output_tab_char,
2931- &chars_per_output_tab); 2908- &chars_per_output_tab);
2932+ { 2909+ {
2933+ int dummy_width; 2910+ int dummy_width;
2934+ 2911+
2935+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, 2912+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
2936+ &dummy_width, &chars_per_output_tab); 2913+ &dummy_width, &chars_per_output_tab);
2937+ } 2914+ }
2938 /* Could check tab width > 0. */ 2915 /* Could check tab width > 0. */
2939 tabify_output = true; 2916 tabify_output = true;
2940 break; 2917 break;
2941@@ -991,8 +1082,8 @@ 2918@@ -991,8 +1082,8 @@
2942 case 'n': 2919 case 'n':
2943 numbered_lines = true; 2920 numbered_lines = true;
2944 if (optarg) 2921 if (optarg)
2945- getoptarg (optarg, 'n', &number_separator, 2922- getoptarg (optarg, 'n', &number_separator,
2946- &chars_per_number); 2923- &chars_per_number);
2947+ getoptarg (optarg, 'n', number_separator, &number_separator_length, 2924+ getoptarg (optarg, 'n', number_separator, &number_separator_length,
2948+ &number_separator_width, &chars_per_number); 2925+ &number_separator_width, &chars_per_number);
2949 break; 2926 break;
2950 case 'N': 2927 case 'N':
2951 skip_count = false; 2928 skip_count = false;
2952@@ -1031,7 +1122,7 @@ 2929@@ -1031,7 +1122,7 @@
2953 old_s = false; 2930 old_s = false;
2954 /* Reset an additional input of -s, -S dominates -s */ 2931 /* Reset an additional input of -s, -S dominates -s */
2955 col_sep_string = bad_cast (""); 2932 col_sep_string = bad_cast ("");
2956- col_sep_length = 0; 2933- col_sep_length = 0;
2957+ col_sep_length = col_sep_width = 0; 2934+ col_sep_length = col_sep_width = 0;
2958 use_col_separator = true; 2935 use_col_separator = true;
2959 if (optarg) 2936 if (optarg)
2960 separator_string (optarg); 2937 separator_string (optarg);
2961@@ -1188,10 +1279,45 @@ 2938@@ -1188,10 +1279,45 @@
2962 a number. */ 2939 a number. */
2963 2940
2964 static void 2941 static void
2965-getoptarg (char *arg, char switch_char, char *character, int *number) 2942-getoptarg (char *arg, char switch_char, char *character, int *number)
2966+getoptarg (char *arg, char switch_char, char *character, int *character_length, 2943+getoptarg (char *arg, char switch_char, char *character, int *character_length,
2967+ int *character_width, int *number) 2944+ int *character_width, int *number)
2968 { 2945 {
2969 if (!ISDIGIT (*arg)) 2946 if (!ISDIGIT (*arg))
2970- *character = *arg++; 2947- *character = *arg++;
2971+ { 2948+ {
2972+#ifdef HAVE_MBRTOWC 2949+#ifdef HAVE_MBRTOWC
2973+ if (MB_CUR_MAX > 1) /* for multibyte locale. */ 2950+ if (MB_CUR_MAX > 1) /* for multibyte locale. */
2974+ { 2951+ {
2975+ wchar_t wc; 2952+ wchar_t wc;
2976+ size_t mblength; 2953+ size_t mblength;
2977+ int width; 2954+ int width;
2978+ mbstate_t state = {'\0'}; 2955+ mbstate_t state = {'\0'};
2979+ 2956+
2980+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); 2957+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
2981+ 2958+
2982+ if (mblength == (size_t)-1 || mblength == (size_t)-2) 2959+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2983+ { 2960+ {
2984+ *character_length = 1; 2961+ *character_length = 1;
2985+ *character_width = 1; 2962+ *character_width = 1;
2986+ } 2963+ }
2987+ else 2964+ else
2988+ { 2965+ {
2989+ *character_length = (mblength < 1) ? 1 : mblength; 2966+ *character_length = (mblength < 1) ? 1 : mblength;
2990+ width = wcwidth (wc); 2967+ width = wcwidth (wc);
2991+ *character_width = (width < 0) ? 0 : width; 2968+ *character_width = (width < 0) ? 0 : width;
2992+ } 2969+ }
2993+ 2970+
2994+ strncpy (character, arg, *character_length); 2971+ strncpy (character, arg, *character_length);
2995+ arg += *character_length; 2972+ arg += *character_length;
2996+ } 2973+ }
2997+ else /* for single byte locale. */ 2974+ else /* for single byte locale. */
2998+#endif 2975+#endif
2999+ { 2976+ {
3000+ *character = *arg++; 2977+ *character = *arg++;
3001+ *character_length = 1; 2978+ *character_length = 1;
3002+ *character_width = 1; 2979+ *character_width = 1;
3003+ } 2980+ }
3004+ } 2981+ }
3005+ 2982+
3006 if (*arg) 2983 if (*arg)
3007 { 2984 {
3008 long int tmp_long; 2985 long int tmp_long;
3009@@ -1256,7 +1382,7 @@ 2986@@ -1256,7 +1382,7 @@
3010 else 2987 else
3011 col_sep_string = column_separator; 2988 col_sep_string = column_separator;
3012 2989
3013- col_sep_length = 1; 2990- col_sep_length = 1;
3014+ col_sep_length = col_sep_width = 1; 2991+ col_sep_length = col_sep_width = 1;
3015 use_col_separator = true; 2992 use_col_separator = true;
3016 } 2993 }
3017 /* It's rather pointless to define a TAB separator with column 2994 /* It's rather pointless to define a TAB separator with column
3018@@ -1288,11 +1414,11 @@ 2995@@ -1288,11 +1414,11 @@
3019 TAB_WIDTH (chars_per_input_tab, chars_per_number); */ 2996 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
3020 2997
3021 /* Estimate chars_per_text without any margin and keep it constant. */ 2998 /* Estimate chars_per_text without any margin and keep it constant. */
3022- if (number_separator == '\t') 2999- if (number_separator == '\t')
3023+ if (number_separator[0] == '\t') 3000+ if (number_separator[0] == '\t')
3024 number_width = chars_per_number + 3001 number_width = chars_per_number +
3025 TAB_WIDTH (chars_per_default_tab, chars_per_number); 3002 TAB_WIDTH (chars_per_default_tab, chars_per_number);
3026 else 3003 else
3027- number_width = chars_per_number + 1; 3004- number_width = chars_per_number + 1;
3028+ number_width = chars_per_number + number_separator_width; 3005+ number_width = chars_per_number + number_separator_width;
3029 3006
3030 /* The number is part of the column width unless we are 3007 /* The number is part of the column width unless we are
3031 printing files in parallel. */ 3008 printing files in parallel. */
3032@@ -1307,7 +1433,7 @@ 3009@@ -1307,7 +1433,7 @@
3033 } 3010 }
3034 3011
3035 chars_per_column = (chars_per_line - chars_used_by_number - 3012 chars_per_column = (chars_per_line - chars_used_by_number -
3036- (columns - 1) * col_sep_length) / columns; 3013- (columns - 1) * col_sep_length) / columns;
3037+ (columns - 1) * col_sep_width) / columns; 3014+ (columns - 1) * col_sep_width) / columns;
3038 3015
3039 if (chars_per_column < 1) 3016 if (chars_per_column < 1)
3040 error (EXIT_FAILURE, 0, _("page width too narrow")); 3017 error (EXIT_FAILURE, 0, _("page width too narrow"));
3041@@ -1432,7 +1558,7 @@ 3018@@ -1432,7 +1558,7 @@
3042 3019
3046+ h = h + col_sep_width; 3023+ h = h + col_sep_width;
3047 3024
3048 /* This loop takes care of all but the rightmost column. */ 3025 /* This loop takes care of all but the rightmost column. */
3049 3026
3050@@ -1466,7 +1592,7 @@ 3027@@ -1466,7 +1592,7 @@
3051 } 3028 }
3052 else 3029 else
3053 { 3030 {
3054- h = h_next + col_sep_length; 3031- h = h_next + col_sep_length;
3055+ h = h_next + col_sep_width; 3032+ h = h_next + col_sep_width;
3056 h_next = h + chars_per_column; 3033 h_next = h + chars_per_column;
3057 } 3034 }
3058 } 3035 }
3059@@ -1756,9 +1882,9 @@ 3036@@ -1756,9 +1882,9 @@
3060 align_column (COLUMN *p) 3037 align_column (COLUMN *p)
3061 { 3038 {
3062 padding_not_printed = p->start_position; 3039 padding_not_printed = p->start_position;
3084 char *s; 3061 char *s;
3085 int left_cut; 3062 int left_cut;
3086 3063
3087@@ -2058,22 +2184,24 @@ 3064@@ -2058,22 +2184,24 @@
3088 /* Tabification is assumed for multiple columns, also for n-separators, 3065 /* Tabification is assumed for multiple columns, also for n-separators,
3089 but `default n-separator = TAB' hasn't been given priority over 3066 but `default n-separator = TAB' hasn't been given priority over
3090 equal column_width also specified by POSIX. */ 3067 equal column_width also specified by POSIX. */
3091- if (number_separator == '\t') 3068- if (number_separator == '\t')
3092+ if (number_separator[0] == '\t') 3069+ if (number_separator[0] == '\t')
3093 { 3070 {
3094 i = number_width - chars_per_number; 3071 i = number_width - chars_per_number;
3095 while (i-- > 0) 3072 while (i-- > 0)
3096 (p->char_func) (' '); 3073 (p->char_func) (' ');
3097 } 3074 }
3098 else 3075 else
3099- (p->char_func) (number_separator); 3076- (p->char_func) (number_separator);
3100+ for (j = 0; j < number_separator_length; j++) 3077+ for (j = 0; j < number_separator_length; j++)
3101+ (p->char_func) (number_separator[j]); 3078+ (p->char_func) (number_separator[j]);
3102 } 3079 }
3103 else 3080 else
3104 /* To comply with POSIX, we avoid any expansion of default TAB 3081 /* To comply with POSIX, we avoid any expansion of default TAB
3105 separator with a single column output. No column_width requirement 3082 separator with a single column output. No column_width requirement
3106 has to be considered. */ 3083 has to be considered. */
3107 { 3084 {
3108- (p->char_func) (number_separator); 3085- (p->char_func) (number_separator);
3109- if (number_separator == '\t') 3086- if (number_separator == '\t')
3110+ for (j = 0; j < number_separator_length; j++) 3087+ for (j = 0; j < number_separator_length; j++)
3111+ (p->char_func) (number_separator[j]); 3088+ (p->char_func) (number_separator[j]);
3112+ if (number_separator[0] == '\t') 3089+ if (number_separator[0] == '\t')
3113 output_position = POS_AFTER_TAB (chars_per_output_tab, 3090 output_position = POS_AFTER_TAB (chars_per_output_tab,
3114 output_position); 3091 output_position);
3115 } 3092 }
3116@@ -2234,7 +2362,7 @@ 3093@@ -2234,7 +2362,7 @@
3117 while (goal - h_old > 1 3094 while (goal - h_old > 1
3118 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) 3095 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
3119 { 3096 {
3120- putchar (output_tab_char); 3097- putchar (output_tab_char);
3121+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); 3098+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
3122 h_old = h_new; 3099 h_old = h_new;
3123 } 3100 }
3131 s = col_sep_string; 3108 s = col_sep_string;
3132 3109
3133@@ -2267,6 +2396,7 @@ 3110@@ -2267,6 +2396,7 @@
3134 { 3111 {
3135 for (; separators_not_printed > 0; --separators_not_printed) 3112 for (; separators_not_printed > 0; --separators_not_printed)
3136 { 3113 {
3137+ not_space_flag = 0; 3114+ not_space_flag = 0;
3138 while (l-- > 0) 3115 while (l-- > 0)
3139 { 3116 {
3140 /* 3 types of sep_strings: spaces only, spaces and chars, 3117 /* 3 types of sep_strings: spaces only, spaces and chars,
3141@@ -2280,12 +2410,15 @@ 3118@@ -2280,12 +2410,15 @@
3142 } 3119 }
3143 else 3120 else
3144 { 3121 {
3145+ not_space_flag = 1; 3122+ not_space_flag = 1;
3146 if (spaces_not_printed > 0) 3123 if (spaces_not_printed > 0)
3147 print_white_space (); 3124 print_white_space ();
3148 putchar (*s++); 3125 putchar (*s++);
3149- ++output_position; 3126- ++output_position;
3150 } 3127 }
3151 } 3128 }
3152+ if (not_space_flag) 3129+ if (not_space_flag)
3153+ output_position += col_sep_width; 3130+ output_position += col_sep_width;
3154+ 3131+
3155 /* sep_string ends with some spaces */ 3132 /* sep_string ends with some spaces */
3156 if (spaces_not_printed > 0) 3133 if (spaces_not_printed > 0)
3157 print_white_space (); 3134 print_white_space ();
3158@@ -2313,7 +2446,7 @@ 3135@@ -2313,7 +2446,7 @@
3159 required number of tabs and spaces. */ 3136 required number of tabs and spaces. */
3160 3137
3161 static void 3138 static void
3162-print_char (char c) 3139-print_char (char c)
3185+ state_bak = state; 3162+ state_bak = state;
3186+ mbc[mbc_pos++] = c; 3163+ mbc[mbc_pos++] = c;
3187+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 3164+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
3188+ 3165+
3189+ while (mbc_pos > 0) 3166+ while (mbc_pos > 0)
3190+ { 3167+ {
3191+ switch (mblength) 3168+ switch (mblength)
3192+ { 3169+ {
3193+ case (size_t)-2: 3170+ case (size_t)-2:
3194+ state = state_bak; 3171+ state = state_bak;
3195+ return; 3172+ return;
3196+ 3173+
3197+ case (size_t)-1: 3174+ case (size_t)-1:
3198+ state = state_bak; 3175+ state = state_bak;
3199+ ++output_position; 3176+ ++output_position;
3200+ putchar (mbc[0]); 3177+ putchar (mbc[0]);
3201+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); 3178+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
3202+ --mbc_pos; 3179+ --mbc_pos;
3203+ break; 3180+ break;
3204+ 3181+
3205+ case 0: 3182+ case 0:
3206+ mblength = 1; 3183+ mblength = 1;
3207+ 3184+
3208+ default: 3185+ default:
3209+ if (wc == L' ') 3186+ if (wc == L' ')
3210+ { 3187+ {
3211+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3188+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3212+ --mbc_pos; 3189+ --mbc_pos;
3213+ ++spaces_not_printed; 3190+ ++spaces_not_printed;
3214+ return; 3191+ return;
3215+ } 3192+ }
3216+ else if (spaces_not_printed > 0) 3193+ else if (spaces_not_printed > 0)
3217+ print_white_space (); 3194+ print_white_space ();
3218+ 3195+
3219+ /* Nonprintables are assumed to have width 0, except L'\b'. */ 3196+ /* Nonprintables are assumed to have width 0, except L'\b'. */
3220+ if ((width = wcwidth (wc)) < 1) 3197+ if ((width = wcwidth (wc)) < 1)
3221+ { 3198+ {
3222+ if (wc == L'\b') 3199+ if (wc == L'\b')
3223+ --output_position; 3200+ --output_position;
3224+ } 3201+ }
3225+ else 3202+ else
3226+ output_position += width; 3203+ output_position += width;
3227+ 3204+
3228+ fwrite (mbc, sizeof(char), mblength, stdout); 3205+ fwrite (mbc, sizeof(char), mblength, stdout);
3229+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3206+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3230+ mbc_pos -= mblength; 3207+ mbc_pos -= mblength;
3231+ } 3208+ }
3232+ } 3209+ }
3233+ return; 3210+ return;
3234+ } 3211+ }
3235+ putchar (c); 3212+ putchar (c);
3236+} 3213+}
3237+#endif 3214+#endif
3238+ 3215+
3239 /* Skip to page PAGE before printing. 3216 /* Skip to page PAGE before printing.
3240 PAGE may be larger than total number of pages. */ 3217 PAGE may be larger than total number of pages. */
3241 3218
3242@@ -2517,9 +2718,9 @@ 3219@@ -2517,9 +2718,9 @@
3243 align_empty_cols = false; 3220 align_empty_cols = false;
3244 } 3221 }
3245 3222
3246- if (padding_not_printed - col_sep_length > 0) 3223- if (padding_not_printed - col_sep_length > 0)
3247+ if (padding_not_printed - col_sep_width > 0) 3224+ if (padding_not_printed - col_sep_width > 0)
3248 { 3225 {
3249- pad_across_to (padding_not_printed - col_sep_length); 3226- pad_across_to (padding_not_printed - col_sep_length);
3250+ pad_across_to (padding_not_printed - col_sep_width); 3227+ pad_across_to (padding_not_printed - col_sep_width);
3251 padding_not_printed = ANYWHERE; 3228 padding_not_printed = ANYWHERE;
3252 } 3229 }
3253 3230
3254@@ -2620,9 +2821,9 @@ 3231@@ -2620,9 +2821,9 @@
3255 } 3232 }
3256 } 3233 }
3257 3234
3258- if (padding_not_printed - col_sep_length > 0) 3235- if (padding_not_printed - col_sep_length > 0)
3259+ if (padding_not_printed - col_sep_width > 0) 3236+ if (padding_not_printed - col_sep_width > 0)
3260 { 3237 {
3266@@ -2635,8 +2836,8 @@ 3243@@ -2635,8 +2836,8 @@
3267 if (spaces_not_printed == 0) 3244 if (spaces_not_printed == 0)
3268 { 3245 {
3269 output_position = p->start_position + end_vector[line]; 3246 output_position = p->start_position + end_vector[line];
3270- if (p->start_position - col_sep_length == chars_per_margin) 3247- if (p->start_position - col_sep_length == chars_per_margin)
3271- output_position -= col_sep_length; 3248- output_position -= col_sep_length;
3272+ if (p->start_position - col_sep_width == chars_per_margin) 3249+ if (p->start_position - col_sep_width == chars_per_margin)
3273+ output_position -= col_sep_width; 3250+ output_position -= col_sep_width;
3274 } 3251 }
3275 3252
3276 return true; 3253 return true;
3277@@ -2655,7 +2856,7 @@ 3254@@ -2655,7 +2856,7 @@
3278 number of characters is 1.) */ 3255 number of characters is 1.) */
3325+ width = 0; 3302+ width = 0;
3326+ chars = 0; 3303+ chars = 0;
3327+ while (mbc_pos > 0) 3304+ while (mbc_pos > 0)
3328+ { 3305+ {
3329+ switch (mblength) 3306+ switch (mblength)
3330+ { 3307+ {
3331+ case (size_t)-2: 3308+ case (size_t)-2:
3332+ state = state_bak; 3309+ state = state_bak;
3333+ return 0; 3310+ return 0;
3334+ 3311+
3335+ case (size_t)-1: 3312+ case (size_t)-1:
3336+ state = state_bak; 3313+ state = state_bak;
3337+ mblength = 1; 3314+ mblength = 1;
3338+ 3315+
3339+ if (use_esc_sequence || use_cntrl_prefix) 3316+ if (use_esc_sequence || use_cntrl_prefix)
3340+ { 3317+ {
3341+ width = +4; 3318+ width = +4;
3342+ chars = +4; 3319+ chars = +4;
3343+ *s++ = '\\'; 3320+ *s++ = '\\';
3344+ sprintf (esc_buff, "%03o", mbc[0]); 3321+ sprintf (esc_buff, "%03o", mbc[0]);
3345+ for (i = 0; i <= 2; ++i) 3322+ for (i = 0; i <= 2; ++i)
3346+ *s++ = (int) esc_buff[i]; 3323+ *s++ = (int) esc_buff[i];
3347+ } 3324+ }
3348+ else 3325+ else
3349+ { 3326+ {
3350+ width += 1; 3327+ width += 1;
3351+ chars += 1; 3328+ chars += 1;
3352+ *s++ = mbc[0]; 3329+ *s++ = mbc[0];
3353+ } 3330+ }
3354+ break; 3331+ break;
3355+ 3332+
3356+ case 0: 3333+ case 0:
3357+ mblength = 1; 3334+ mblength = 1;
3358+ /* Fall through */ 3335+ /* Fall through */
3359+ 3336+
3360+ default: 3337+ default:
3361+ if (memcmp (mbc, input_tab_char, mblength) == 0) 3338+ if (memcmp (mbc, input_tab_char, mblength) == 0)
3362+ chars_per_c = chars_per_input_tab; 3339+ chars_per_c = chars_per_input_tab;
3363+ 3340+
3364+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') 3341+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
3365+ { 3342+ {
3366+ int width_inc; 3343+ int width_inc;
3367+ 3344+
3368+ width_inc = TAB_WIDTH (chars_per_c, input_position); 3345+ width_inc = TAB_WIDTH (chars_per_c, input_position);
3369+ width += width_inc; 3346+ width += width_inc;
3370+ 3347+
3371+ if (untabify_input) 3348+ if (untabify_input)
3372+ { 3349+ {
3373+ for (i = width_inc; i; --i) 3350+ for (i = width_inc; i; --i)
3374+ *s++ = ' '; 3351+ *s++ = ' ';
3375+ chars += width_inc; 3352+ chars += width_inc;
3376+ } 3353+ }
3377+ else 3354+ else
3378+ { 3355+ {
3379+ for (i = 0; i < mblength; i++) 3356+ for (i = 0; i < mblength; i++)
3380+ *s++ = mbc[i]; 3357+ *s++ = mbc[i];
3381+ chars += mblength; 3358+ chars += mblength;
3382+ } 3359+ }
3383+ } 3360+ }
3384+ else if ((wc_width = wcwidth (wc)) < 1) 3361+ else if ((wc_width = wcwidth (wc)) < 1)
3385+ { 3362+ {
3386+ if (use_esc_sequence) 3363+ if (use_esc_sequence)
3387+ { 3364+ {
3388+ for (i = 0; i < mblength; i++) 3365+ for (i = 0; i < mblength; i++)
3389+ { 3366+ {
3390+ width += 4; 3367+ width += 4;
3391+ chars += 4; 3368+ chars += 4;
3392+ *s++ = '\\'; 3369+ *s++ = '\\';
3393+ sprintf (esc_buff, "%03o", c); 3370+ sprintf (esc_buff, "%03o", c);
3394+ for (j = 0; j <= 2; ++j) 3371+ for (j = 0; j <= 2; ++j)
3395+ *s++ = (int) esc_buff[j]; 3372+ *s++ = (int) esc_buff[j];
3396+ } 3373+ }
3397+ } 3374+ }
3398+ else if (use_cntrl_prefix) 3375+ else if (use_cntrl_prefix)
3399+ { 3376+ {
3400+ if (wc < 0200) 3377+ if (wc < 0200)
3401+ { 3378+ {
3402+ width += 2; 3379+ width += 2;
3403+ chars += 2; 3380+ chars += 2;
3404+ *s++ = '^'; 3381+ *s++ = '^';
3405+ *s++ = wc ^ 0100; 3382+ *s++ = wc ^ 0100;
3406+ } 3383+ }
3407+ else 3384+ else
3408+ { 3385+ {
3409+ for (i = 0; i < mblength; i++) 3386+ for (i = 0; i < mblength; i++)
3410+ { 3387+ {
3411+ width += 4; 3388+ width += 4;
3412+ chars += 4; 3389+ chars += 4;
3413+ *s++ = '\\'; 3390+ *s++ = '\\';
3414+ sprintf (esc_buff, "%03o", c); 3391+ sprintf (esc_buff, "%03o", c);
3415+ for (j = 0; j <= 2; ++j) 3392+ for (j = 0; j <= 2; ++j)
3416+ *s++ = (int) esc_buff[j]; 3393+ *s++ = (int) esc_buff[j];
3417+ } 3394+ }
3418+ } 3395+ }
3419+ } 3396+ }
3420+ else if (wc == L'\b') 3397+ else if (wc == L'\b')
3421+ { 3398+ {
3422+ width += -1; 3399+ width += -1;
3423+ chars += 1; 3400+ chars += 1;
3424+ *s++ = c; 3401+ *s++ = c;
3425+ } 3402+ }
3403+ else
3404+ {
3405+ width += 0;
3406+ chars += mblength;
3407+ for (i = 0; i < mblength; i++)
3408+ *s++ = mbc[i];
3409+ }
3410+ }
3426+ else 3411+ else
3427+ { 3412+ {
3428+ width += 0;
3429+ chars += mblength;
3430+ for (i = 0; i < mblength; i++)
3431+ *s++ = mbc[i];
3432+ }
3433+ }
3434+ else
3435+ {
3436+ width += wc_width; 3413+ width += wc_width;
3437+ chars += mblength; 3414+ chars += mblength;
3438+ for (i = 0; i < mblength; i++) 3415+ for (i = 0; i < mblength; i++)
3439+ *s++ = mbc[i]; 3416+ *s++ = mbc[i];
3440+ } 3417+ }
3441+ } 3418+ }
3442+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3419+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3443+ mbc_pos -= mblength; 3420+ mbc_pos -= mblength;
3444+ } 3421+ }
3445+ 3422+
3446+ input_position += width; 3423+ input_position += width;
3468@@ -37,6 +42,18 @@ 3445@@ -37,6 +42,18 @@
3469 #include "quote.h" 3446 #include "quote.h"
3470 #include "xstrndup.h" 3447 #include "xstrndup.h"
3471 3448
3472+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 3449+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3473+ installation; work around this configuration error. */ 3450+ installation; work around this configuration error. */
3474+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 3451+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3475+# undef MB_LEN_MAX 3452+# undef MB_LEN_MAX
3476+# define MB_LEN_MAX 16 3453+# define MB_LEN_MAX 16
3477+#endif 3454+#endif
3478+ 3455+
3487@@ -67,6 +84,52 @@ 3464@@ -67,6 +84,52 @@
3488 } \ 3465 } \
3489 while (0) 3466 while (0)
3490 3467
3491+/* Refill the buffer BUF to get a multibyte character. */ 3468+/* Refill the buffer BUF to get a multibyte character. */
3492+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 3469+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
3493+ do \ 3470+ do \
3494+ { \ 3471+ { \
3495+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 3472+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
3496+ { \ 3473+ { \
3497+ memmove (BUF, BUFPOS, BUFLEN); \ 3474+ memmove (BUF, BUFPOS, BUFLEN); \
3498+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 3475+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
3499+ BUFPOS = BUF; \ 3476+ BUFPOS = BUF; \
3500+ } \ 3477+ } \
3501+ } \ 3478+ } \
3502+ while (0) 3479+ while (0)
3503+ 3480+
3504+/* Get wide character on BUFPOS. BUFPOS is not included after that. 3481+/* Get wide character on BUFPOS. BUFPOS is not included after that.
3505+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ 3482+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
3506+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 3483+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
3507+ do \ 3484+ do \
3508+ { \ 3485+ { \
3509+ mbstate_t state_bak; \ 3486+ mbstate_t state_bak; \
3510+ \ 3487+ \
3511+ if (BUFLEN < 1) \ 3488+ if (BUFLEN < 1) \
3512+ { \ 3489+ { \
3513+ WC = WEOF; \ 3490+ WC = WEOF; \
3514+ break; \ 3491+ break; \
3515+ } \ 3492+ } \
3516+ \ 3493+ \
3517+ /* Get a wide character. */ \ 3494+ /* Get a wide character. */ \
3518+ CONVFAIL = 0; \ 3495+ CONVFAIL = 0; \
3519+ state_bak = STATE; \ 3496+ state_bak = STATE; \
3520+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 3497+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
3521+ \ 3498+ \
3522+ switch (MBLENGTH) \ 3499+ switch (MBLENGTH) \
3523+ { \ 3500+ { \
3524+ case (size_t)-1: \ 3501+ case (size_t)-1: \
3525+ case (size_t)-2: \ 3502+ case (size_t)-2: \
3526+ CONVFAIL++; \ 3503+ CONVFAIL++; \
3527+ STATE = state_bak; \ 3504+ STATE = state_bak; \
3528+ /* Fall througn. */ \ 3505+ /* Fall througn. */ \
3529+ \ 3506+ \
3530+ case 0: \ 3507+ case 0: \
3531+ MBLENGTH = 1; \ 3508+ MBLENGTH = 1; \
3532+ break; \ 3509+ break; \
3533+ } \ 3510+ } \
3534+ } \ 3511+ } \
3535+ while (0) 3512+ while (0)
3536+ 3513+
3537 struct range_pair 3514 struct range_pair
3538 { 3515 {
3539 size_t lo; 3516 size_t lo;
3606+ -n with -b: don't split multibyte characters\n\ 3583+ -n with -b: don't split multibyte characters\n\
3607 "), stdout); 3584 "), stdout);
3608 fputs (_("\ 3585 fputs (_("\
3609 --complement complement the set of selected bytes, characters\n\ 3586 --complement complement the set of selected bytes, characters\n\
3610@@ -362,7 +439,7 @@ 3587@@ -362,7 +439,7 @@
3611 in_digits = false; 3588 in_digits = false;
3612 /* Starting a range. */ 3589 /* Starting a range. */
3613 if (dash_found) 3590 if (dash_found)
3614- FATAL_ERROR (_("invalid byte or field list")); 3591- FATAL_ERROR (_("invalid byte or field list"));
3615+ FATAL_ERROR (_("invalid byte, character or field list")); 3592+ FATAL_ERROR (_("invalid byte, character or field list"));
3616 dash_found = true; 3593 dash_found = true;
3617 fieldstr++; 3594 fieldstr++;
3618 3595
3619@@ -387,14 +464,16 @@ 3596@@ -387,14 +464,16 @@
3620 if (!rhs_specified) 3597 if (!rhs_specified)
3621 { 3598 {
3622 /* `n-'. From `initial' to end of line. */ 3599 /* `n-'. From `initial' to end of line. */
3623- eol_range_start = initial; 3600- eol_range_start = initial;
3624+ if (eol_range_start == 0 || 3601+ if (eol_range_start == 0 ||
3625+ (eol_range_start != 0 && eol_range_start > initial)) 3602+ (eol_range_start != 0 && eol_range_start > initial))
3626+ eol_range_start = initial; 3603+ eol_range_start = initial;
3627 field_found = true; 3604 field_found = true;
3628 } 3605 }
3629 else 3606 else
3630 { 3607 {
3631 /* `m-n' or `-n' (1-n). */ 3608 /* `m-n' or `-n' (1-n). */
3632 if (value < initial) 3609 if (value < initial)
3633- FATAL_ERROR (_("invalid decreasing range")); 3610- FATAL_ERROR (_("invalid decreasing range"));
3634+ FATAL_ERROR (_("invalid byte, character or field list")); 3611+ FATAL_ERROR (_("invalid byte, character or field list"));
3635 3612
3636 /* Is there already a range going to end of line? */ 3613 /* Is there already a range going to end of line? */
3637 if (eol_range_start != 0) 3614 if (eol_range_start != 0)
3638@@ -467,6 +546,9 @@ 3615@@ -467,6 +546,9 @@
3639 if (operating_mode == byte_mode) 3616 if (operating_mode == byte_mode)
3640 error (0, 0, 3617 error (0, 0,
3641 _("byte offset %s is too large"), quote (bad_num)); 3618 _("byte offset %s is too large"), quote (bad_num));
3642+ else if (operating_mode == character_mode) 3619+ else if (operating_mode == character_mode)
3643+ error (0, 0, 3620+ error (0, 0,
3644+ _("character offset %s is too large"), quote (bad_num)); 3621+ _("character offset %s is too large"), quote (bad_num));
3645 else 3622 else
3646 error (0, 0, 3623 error (0, 0,
3647 _("field number %s is too large"), quote (bad_num)); 3624 _("field number %s is too large"), quote (bad_num));
3648@@ -477,7 +559,7 @@ 3625@@ -477,7 +559,7 @@
3649 fieldstr++; 3626 fieldstr++;
3650 } 3627 }
3651 else 3628 else
3652- FATAL_ERROR (_("invalid byte or field list")); 3629- FATAL_ERROR (_("invalid byte or field list"));
3653+ FATAL_ERROR (_("invalid byte, character or field list")); 3630+ FATAL_ERROR (_("invalid byte, character or field list"));
3654 } 3631 }
3655 3632
3656 max_range_endpoint = 0; 3633 max_range_endpoint = 0;
3657@@ -570,6 +652,63 @@ 3634@@ -570,6 +652,63 @@
3658 } 3635 }
3668+ without splitting multibyte characters. */ 3645+ without splitting multibyte characters. */
3669+ 3646+
3670+static void 3647+static void
3671+cut_characters_or_cut_bytes_no_split (FILE *stream) 3648+cut_characters_or_cut_bytes_no_split (FILE *stream)
3672+{ 3649+{
3673+ int idx; /* number of bytes or characters in the line so far. */ 3650+ int idx; /* number of bytes or characters in the line so far. */
3674+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 3651+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3675+ char *bufpos; /* Next read position of BUF. */ 3652+ char *bufpos; /* Next read position of BUF. */
3676+ size_t buflen; /* The length of the byte sequence in buf. */ 3653+ size_t buflen; /* The length of the byte sequence in buf. */
3677+ wint_t wc; /* A gotten wide character. */ 3654+ wint_t wc; /* A gotten wide character. */
3678+ size_t mblength; /* The byte size of a multibyte character which shows 3655+ size_t mblength; /* The byte size of a multibyte character which shows
3679+ as same character as WC. */ 3656+ as same character as WC. */
3680+ mbstate_t state; /* State of the stream. */ 3657+ mbstate_t state; /* State of the stream. */
3681+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ 3658+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
3682+ 3659+
3683+ idx = 0; 3660+ idx = 0;
3684+ buflen = 0; 3661+ buflen = 0;
3685+ bufpos = buf; 3662+ bufpos = buf;
3686+ memset (&state, '\0', sizeof(mbstate_t)); 3663+ memset (&state, '\0', sizeof(mbstate_t));
3690+ REFILL_BUFFER (buf, bufpos, buflen, stream); 3667+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3691+ 3668+
3692+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 3669+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
3693+ 3670+
3694+ if (wc == WEOF) 3671+ if (wc == WEOF)
3695+ { 3672+ {
3696+ if (idx > 0) 3673+ if (idx > 0)
3697+ putchar ('\n'); 3674+ putchar ('\n');
3698+ break; 3675+ break;
3699+ } 3676+ }
3700+ else if (wc == L'\n') 3677+ else if (wc == L'\n')
3701+ { 3678+ {
3702+ putchar ('\n'); 3679+ putchar ('\n');
3703+ idx = 0; 3680+ idx = 0;
3704+ } 3681+ }
3705+ else 3682+ else
3706+ { 3683+ {
3707+ idx += (operating_mode == byte_mode) ? mblength : 1; 3684+ idx += (operating_mode == byte_mode) ? mblength : 1;
3708+ if (print_kth (idx, NULL)) 3685+ if (print_kth (idx, NULL))
3709+ fwrite (bufpos, mblength, sizeof(char), stdout); 3686+ fwrite (bufpos, mblength, sizeof(char), stdout);
3710+ } 3687+ }
3711+ 3688+
3712+ buflen -= mblength; 3689+ buflen -= mblength;
3713+ bufpos += mblength; 3690+ bufpos += mblength;
3714+ } 3691+ }
3715+} 3692+}
3716+#endif 3693+#endif
3717+ 3694+
3718 /* Read from stream STREAM, printing to standard output any selected fields. */ 3695 /* Read from stream STREAM, printing to standard output any selected fields. */
3719 3696
3720 static void 3697 static void
3721@@ -692,13 +831,192 @@ 3698@@ -692,13 +831,192 @@
3722 } 3699 }
3730+ unsigned int field_idx; 3707+ unsigned int field_idx;
3731+ int found_any_selected_field; 3708+ int found_any_selected_field;
3732+ int buffer_first_field; 3709+ int buffer_first_field;
3733+ int empty_input; 3710+ int empty_input;
3734+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 3711+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3735+ char *bufpos; /* Next read position of BUF. */ 3712+ char *bufpos; /* Next read position of BUF. */
3736+ size_t buflen; /* The length of the byte sequence in buf. */ 3713+ size_t buflen; /* The length of the byte sequence in buf. */
3737+ wint_t wc = 0; /* A gotten wide character. */ 3714+ wint_t wc = 0; /* A gotten wide character. */
3738+ size_t mblength; /* The byte size of a multibyte character which shows 3715+ size_t mblength; /* The byte size of a multibyte character which shows
3739+ as same character as WC. */ 3716+ as same character as WC. */
3740+ mbstate_t state; /* State of the stream. */ 3717+ mbstate_t state; /* State of the stream. */
3741+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ 3718+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
3742+ 3719+
3743+ found_any_selected_field = 0; 3720+ found_any_selected_field = 0;
3744+ field_idx = 1; 3721+ field_idx = 1;
3745+ bufpos = buf; 3722+ bufpos = buf;
3746+ buflen = 0; 3723+ buflen = 0;
3762+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 3739+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
3763+ 3740+
3764+ while (1) 3741+ while (1)
3765+ { 3742+ {
3766+ if (field_idx == 1 && buffer_first_field) 3743+ if (field_idx == 1 && buffer_first_field)
3767+ { 3744+ {
3768+ int len = 0; 3745+ int len = 0;
3769+ 3746+
3770+ while (1) 3747+ while (1)
3771+ { 3748+ {
3772+ REFILL_BUFFER (buf, bufpos, buflen, stream); 3749+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3773+ 3750+
3774+ GET_NEXT_WC_FROM_BUFFER 3751+ GET_NEXT_WC_FROM_BUFFER
3775+ (wc, bufpos, buflen, mblength, state, convfail); 3752+ (wc, bufpos, buflen, mblength, state, convfail);
3776+ 3753+
3777+ if (wc == WEOF) 3754+ if (wc == WEOF)
3778+ break; 3755+ break;
3779+ 3756+
3780+ field_1_buffer = xrealloc (field_1_buffer, len + mblength); 3757+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
3781+ memcpy (field_1_buffer + len, bufpos, mblength); 3758+ memcpy (field_1_buffer + len, bufpos, mblength);
3782+ len += mblength; 3759+ len += mblength;
3783+ buflen -= mblength; 3760+ buflen -= mblength;
3784+ bufpos += mblength; 3761+ bufpos += mblength;
3785+ 3762+
3786+ if (!convfail && (wc == L'\n' || wc == wcdelim)) 3763+ if (!convfail && (wc == L'\n' || wc == wcdelim))
3787+ break; 3764+ break;
3788+ } 3765+ }
3789+ 3766+
3790+ if (wc == WEOF) 3767+ if (wc == WEOF)
3791+ break; 3768+ break;
3792+ 3769+
3793+ /* If the first field extends to the end of line (it is not 3770+ /* If the first field extends to the end of line (it is not
3794+ delimited) and we are printing all non-delimited lines, 3771+ delimited) and we are printing all non-delimited lines,
3795+ print this one. */ 3772+ print this one. */
3796+ if (convfail || (!convfail && wc != wcdelim)) 3773+ if (convfail || (!convfail && wc != wcdelim))
3797+ { 3774+ {
3798+ if (suppress_non_delimited) 3775+ if (suppress_non_delimited)
3799+ { 3776+ {
3800+ /* Empty. */ 3777+ /* Empty. */
3801+ } 3778+ }
3802+ else 3779+ else
3803+ { 3780+ {
3804+ fwrite (field_1_buffer, sizeof (char), len, stdout); 3781+ fwrite (field_1_buffer, sizeof (char), len, stdout);
3805+ /* Make sure the output line is newline terminated. */ 3782+ /* Make sure the output line is newline terminated. */
3806+ if (convfail || (!convfail && wc != L'\n')) 3783+ if (convfail || (!convfail && wc != L'\n'))
3807+ putchar ('\n'); 3784+ putchar ('\n');
3808+ } 3785+ }
3809+ continue; 3786+ continue;
3810+ } 3787+ }
3811+ 3788+
3812+ if (print_kth (1, NULL)) 3789+ if (print_kth (1, NULL))
3813+ { 3790+ {
3814+ /* Print the field, but not the trailing delimiter. */ 3791+ /* Print the field, but not the trailing delimiter. */
3815+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 3792+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
3816+ found_any_selected_field = 1; 3793+ found_any_selected_field = 1;
3817+ } 3794+ }
3818+ ++field_idx; 3795+ ++field_idx;
3819+ } 3796+ }
3820+ 3797+
3821+ if (wc != WEOF) 3798+ if (wc != WEOF)
3822+ { 3799+ {
3823+ if (print_kth (field_idx, NULL))
3824+ {
3825+ if (found_any_selected_field)
3826+ {
3827+ fwrite (output_delimiter_string, sizeof (char),
3828+ output_delimiter_length, stdout);
3829+ }
3830+ found_any_selected_field = 1;
3831+ }
3832+
3833+ while (1)
3834+ {
3835+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3836+
3837+ GET_NEXT_WC_FROM_BUFFER
3838+ (wc, bufpos, buflen, mblength, state, convfail);
3839+
3840+ if (wc == WEOF)
3841+ break;
3842+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
3843+ {
3844+ buflen -= mblength;
3845+ bufpos += mblength;
3846+ break;
3847+ }
3848+
3849+ if (print_kth (field_idx, NULL)) 3800+ if (print_kth (field_idx, NULL))
3801+ {
3802+ if (found_any_selected_field)
3803+ {
3804+ fwrite (output_delimiter_string, sizeof (char),
3805+ output_delimiter_length, stdout);
3806+ }
3807+ found_any_selected_field = 1;
3808+ }
3809+
3810+ while (1)
3811+ {
3812+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3813+
3814+ GET_NEXT_WC_FROM_BUFFER
3815+ (wc, bufpos, buflen, mblength, state, convfail);
3816+
3817+ if (wc == WEOF)
3818+ break;
3819+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
3820+ {
3821+ buflen -= mblength;
3822+ bufpos += mblength;
3823+ break;
3824+ }
3825+
3826+ if (print_kth (field_idx, NULL))
3850+ fwrite (bufpos, mblength, sizeof(char), stdout); 3827+ fwrite (bufpos, mblength, sizeof(char), stdout);
3851+ 3828+
3852+ buflen -= mblength; 3829+ buflen -= mblength;
3853+ bufpos += mblength; 3830+ bufpos += mblength;
3854+ } 3831+ }
3855+ } 3832+ }
3856+ 3833+
3857+ if ((!convfail || wc == L'\n') && buflen < 1) 3834+ if ((!convfail || wc == L'\n') && buflen < 1)
3858+ wc = WEOF; 3835+ wc = WEOF;
3859+ 3836+
3860+ if (!convfail && wc == wcdelim) 3837+ if (!convfail && wc == wcdelim)
3861+ ++field_idx; 3838+ ++field_idx;
3862+ else if (wc == WEOF || (!convfail && wc == L'\n')) 3839+ else if (wc == WEOF || (!convfail && wc == L'\n'))
3863+ { 3840+ {
3864+ if (found_any_selected_field 3841+ if (found_any_selected_field
3865+ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 3842+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
3866+ putchar ('\n'); 3843+ putchar ('\n');
3867+ if (wc == WEOF) 3844+ if (wc == WEOF)
3868+ break; 3845+ break;
3869+ field_idx = 1; 3846+ field_idx = 1;
3870+ found_any_selected_field = 0; 3847+ found_any_selected_field = 0;
3871+ } 3848+ }
3872+ } 3849+ }
3873+} 3850+}
3874+#endif 3851+#endif
3875+ 3852+
3876 static void 3853 static void
3880- cut_bytes (stream); 3857- cut_bytes (stream);
3881+#if HAVE_MBRTOWC 3858+#if HAVE_MBRTOWC
3882+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 3859+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
3883+ { 3860+ {
3884+ switch (operating_mode) 3861+ switch (operating_mode)
3885+ { 3862+ {
3886+ case byte_mode: 3863+ case byte_mode:
3887+ if (byte_mode_character_aware) 3864+ if (byte_mode_character_aware)
3865+ cut_characters_or_cut_bytes_no_split (stream);
3866+ else
3867+ cut_bytes (stream);
3868+ break;
3869+
3870+ case character_mode:
3888+ cut_characters_or_cut_bytes_no_split (stream); 3871+ cut_characters_or_cut_bytes_no_split (stream);
3889+ else 3872+ break;
3890+ cut_bytes (stream);
3891+ break;
3892+ 3873+
3893+ case character_mode:
3894+ cut_characters_or_cut_bytes_no_split (stream);
3895+ break;
3896+
3897+ case field_mode: 3874+ case field_mode:
3898+ cut_fields_mb (stream); 3875+ cut_fields_mb (stream);
3899+ break; 3876+ break;
3900+ 3877+
3901+ default: 3878+ default:
3902+ abort (); 3879+ abort ();
3903+ } 3880+ }
3904+ } 3881+ }
3905 else 3882 else
3906- cut_fields (stream); 3883- cut_fields (stream);
3907+#endif 3884+#endif
3908+ { 3885+ {
3909+ if (operating_mode == field_mode) 3886+ if (operating_mode == field_mode)
3910+ cut_fields (stream); 3887+ cut_fields (stream);
3911+ else 3888+ else
3912+ cut_bytes (stream); 3889+ cut_bytes (stream);
3913+ } 3890+ }
3914 } 3891 }
3915 3892
3916 /* Process file FILE to standard output. 3893 /* Process file FILE to standard output.
3917@@ -748,6 +1066,8 @@ 3894@@ -748,6 +1066,8 @@
3923 3900
3924 initialize_main (&argc, &argv); 3901 initialize_main (&argc, &argv);
3925 set_program_name (argv[0]); 3902 set_program_name (argv[0]);
3926@@ -770,7 +1090,6 @@ 3903@@ -770,7 +1090,6 @@