Parent Directory
|
Revision Log
|
Patch
| Revision 1.38 | Revision 1.39 | ||
|---|---|---|---|
| … | … | ||
| 185 | +#endif | 185 | +#endif |
| 186 | + | 186 | + |
| 187 | /* The official name of this program (e.g., no `g' prefix). */ | 187 | /* The official name of this program (e.g., no `g' prefix). */ |
| 188 | #define PROGRAM_NAME "expand" | 188 | #define PROGRAM_NAME "expand" |
| 189 | 189 | ||
| 190 | @@ -183,6 +200,7 @@ | ||
| 191 | stops = num_start + len - 1; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | + | ||
| 195 | else | ||
| 196 | { | ||
| 197 | error (0, 0, _("tab size contains invalid character(s): %s"), | ||
| 198 | @@ -365,6 +383,142 @@ | 190 | @@ -365,6 +383,142 @@ |
| 199 | } | 191 | } |
| 200 | } | 192 | } |
| 201 | 193 | ||
| 202 | +#if HAVE_MBRTOWC | 194 | +#if HAVE_MBRTOWC |
| … | … | ||
| 412 | { | 404 | { |
| 413 | + unsigned char t = tab[0]; | 405 | + unsigned char t = tab[0]; |
| 414 | char *sep; | 406 | char *sep; |
| 415 | - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) | 407 | - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) |
| 416 | + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) | 408 | + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) |
| 417 | extract_field (line, ptr, sep - ptr); | 409 | extract_field (line, ptr, sep - ptr); |
| 418 | } | 410 | } |
| 419 | else | 411 | else |
| 420 | @@ -229,6 +248,148 @@ | 412 | @@ -229,6 +248,148 @@ |
| 421 | extract_field (line, ptr, lim - ptr); | 413 | extract_field (line, ptr, lim - ptr); |
| 422 | } | 414 | } |
| … | … | ||
| 584 | const struct outlist *outlist; | 576 | const struct outlist *outlist; |
| 585 | - char output_separator = tab < 0 ? ' ' : tab; | 577 | - char output_separator = tab < 0 ? ' ' : tab; |
| 586 | 578 | ||
| 587 | outlist = outlist_head.next; | 579 | outlist = outlist_head.next; |
| 588 | if (outlist) | 580 | if (outlist) |
| 589 | @@ -397,12 +628,12 @@ | ||
| 590 | if (o->file == 0) | ||
| 591 | { | ||
| 592 | if (line1 == &uni_blank) | ||
| 593 | - { | ||
| 594 | + { | ||
| 595 | line = line2; | ||
| 596 | field = join_field_2; | ||
| 597 | } | ||
| 598 | else | ||
| 599 | - { | ||
| 600 | + { | ||
| 601 | line = line1; | ||
| 602 | field = join_field_1; | ||
| 603 | } | ||
| 604 | @@ -416,7 +647,7 @@ | 581 | @@ -416,7 +647,7 @@ |
| 605 | o = o->next; | 582 | o = o->next; |
| 606 | if (o == NULL) | 583 | if (o == NULL) |
| 607 | break; | 584 | break; |
| 608 | - putchar (output_separator); | 585 | - putchar (output_separator); |
| 609 | + PUT_TAB_CHAR; | 586 | + PUT_TAB_CHAR; |
| 610 | } | 587 | } |
| 611 | putchar ('\n'); | 588 | putchar ('\n'); |
| 612 | } | 589 | } |
| 613 | @@ -434,23 +665,23 @@ | 590 | @@ -434,23 +665,23 @@ |
| 614 | prfield (join_field_1, line1); | 591 | prfield (join_field_1, line1); |
| 615 | for (i = 0; i < join_field_1 && i < line1->nfields; ++i) | 592 | for (i = 0; i < join_field_1 && i < line1->nfields; ++i) |
| 616 | { | 593 | { |
| 617 | - putchar (output_separator); | 594 | - putchar (output_separator); |
| 618 | + PUT_TAB_CHAR; | 595 | + PUT_TAB_CHAR; |
| 619 | prfield (i, line1); | 596 | prfield (i, line1); |
| 620 | } | 597 | } |
| 621 | for (i = join_field_1 + 1; i < line1->nfields; ++i) | 598 | for (i = join_field_1 + 1; i < line1->nfields; ++i) |
| 622 | { | 599 | { |
| 623 | - putchar (output_separator); | 600 | - putchar (output_separator); |
| 624 | + PUT_TAB_CHAR; | 601 | + PUT_TAB_CHAR; |
| 625 | prfield (i, line1); | 602 | prfield (i, line1); |
| 626 | } | 603 | } |
| 627 | 604 | ||
| 628 | for (i = 0; i < join_field_2 && i < line2->nfields; ++i) | 605 | for (i = 0; i < join_field_2 && i < line2->nfields; ++i) |
| 629 | { | 606 | { |
| 630 | - putchar (output_separator); | 607 | - putchar (output_separator); |
| 631 | + PUT_TAB_CHAR; | 608 | + PUT_TAB_CHAR; |
| 632 | prfield (i, line2); | 609 | prfield (i, line2); |
| 633 | } | 610 | } |
| 634 | for (i = join_field_2 + 1; i < line2->nfields; ++i) | 611 | for (i = join_field_2 + 1; i < line2->nfields; ++i) |
| 635 | { | 612 | { |
| 636 | - putchar (output_separator); | 613 | - putchar (output_separator); |
| 637 | + PUT_TAB_CHAR; | 614 | + PUT_TAB_CHAR; |
| 638 | prfield (i, line2); | 615 | prfield (i, line2); |
| 639 | } | 616 | } |
| 640 | putchar ('\n'); | 617 | putchar ('\n'); |
| 641 | @@ -859,20 +1090,41 @@ | 618 | @@ -859,20 +1090,41 @@ |
| 642 | 619 | ||
| 643 | case 't': | 620 | case 't': |
| 644 | { | 621 | { |
| 645 | - unsigned char newtab = optarg[0]; | 622 | - unsigned char newtab = optarg[0]; |
| 646 | - if (! newtab) | 623 | - if (! newtab) |
| 647 | + char *newtab; | 624 | + char *newtab; |
| 648 | + size_t newtablen; | 625 | + size_t newtablen; |
| 649 | + if (! optarg[0]) | 626 | + if (! optarg[0]) |
| 650 | error (EXIT_FAILURE, 0, _("empty tab")); | 627 | error (EXIT_FAILURE, 0, _("empty tab")); |
| 651 | - if (optarg[1]) | 628 | - if (optarg[1]) |
| 652 | + newtab = xstrdup (optarg); | 629 | + newtab = xstrdup (optarg); |
| 653 | +#if HAVE_MBRTOWC | 630 | +#if HAVE_MBRTOWC |
| 654 | + if (MB_CUR_MAX > 1) | 631 | + if (MB_CUR_MAX > 1) |
| 655 | + { | 632 | + { |
| 656 | + mbstate_t state; | 633 | + mbstate_t state; |
| 657 | + | 634 | + |
| 658 | + memset (&state, 0, sizeof (mbstate_t)); | 635 | + memset (&state, 0, sizeof (mbstate_t)); |
| 659 | + newtablen = mbrtowc (NULL, newtab, | 636 | + newtablen = mbrtowc (NULL, newtab, |
| 660 | + strnlen (newtab, MB_LEN_MAX), | 637 | + strnlen (newtab, MB_LEN_MAX), |
| 661 | + &state); | 638 | + &state); |
| 662 | + if (newtablen == (size_t) 0 | 639 | + if (newtablen == (size_t) 0 |
| 663 | + || newtablen == (size_t) -1 | 640 | + || newtablen == (size_t) -1 |
| 664 | + || newtablen == (size_t) -2) | 641 | + || newtablen == (size_t) -2) |
| 665 | + newtablen = 1; | 642 | + newtablen = 1; |
| 666 | + } | 643 | + } |
| 667 | + else | 644 | + else |
| 668 | +#endif | 645 | +#endif |
| 669 | + newtablen = 1; | 646 | + newtablen = 1; |
| 670 | + | 647 | + |
| 671 | + if (newtablen == 1 && newtab[1]) | 648 | + if (newtablen == 1 && newtab[1]) |
| 672 | + { | 649 | + { |
| 673 | + if (STREQ (newtab, "\\0")) | 650 | + if (STREQ (newtab, "\\0")) |
| 674 | + newtab[0] = '\0'; | 651 | + newtab[0] = '\0'; |
| 675 | + } | 652 | + } |
| 676 | + if (tab != NULL && strcmp (tab, newtab)) | 653 | + if (tab != NULL && strcmp (tab, newtab)) |
| 677 | { | 654 | { |
| 678 | - if (STREQ (optarg, "\\0")) | 655 | - if (STREQ (optarg, "\\0")) |
| 679 | - newtab = '\0'; | 656 | - newtab = '\0'; |
| 680 | - else | 657 | - else |
| 681 | - error (EXIT_FAILURE, 0, _("multi-character tab %s"), | 658 | - error (EXIT_FAILURE, 0, _("multi-character tab %s"), |
| 682 | - quote (optarg)); | 659 | - quote (optarg)); |
| 683 | + free (newtab); | 660 | + free (newtab); |
| 684 | + error (EXIT_FAILURE, 0, _("incompatible tabs")); | 661 | + error (EXIT_FAILURE, 0, _("incompatible tabs")); |
| 685 | } | 662 | } |
| 686 | - if (0 <= tab && tab != newtab) | 663 | - if (0 <= tab && tab != newtab) |
| 687 | - error (EXIT_FAILURE, 0, _("incompatible tabs")); | 664 | - error (EXIT_FAILURE, 0, _("incompatible tabs")); |
| 688 | tab = newtab; | 665 | tab = newtab; |
| 689 | + tablen = newtablen; | 666 | + tablen = newtablen; |
| 690 | } | 667 | } |
| 691 | break; | 668 | break; |
| 692 | 669 | ||
| 693 | diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c | 670 | diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c |
| 694 | --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 | 671 | --- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 |
| 695 | +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 | 672 | +++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 |
| 696 | @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct | 673 | @@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct |
| 697 | size_t jf_1, size_t jf_2) | 674 | size_t jf_1, size_t jf_2) |
| 698 | { | 675 | { |
| 699 | /* Start of field to compare in each file. */ | 676 | /* Start of field to compare in each file. */ |
| 700 | - char *beg1; | 677 | - char *beg1; |
| 701 | - char *beg2; | 678 | - char *beg2; |
| 702 | - | 679 | - |
| … | … | ||
| 812 | + } | 789 | + } |
| 813 | } | 790 | } |
| 814 | else | 791 | else |
| 815 | { | 792 | { |
| 816 | - if (hard_LC_COLLATE) | 793 | - if (hard_LC_COLLATE) |
| 817 | - return xmemcoll (beg1, len1, beg2, len2); | 794 | - return xmemcoll (beg1, len1, beg2, len2); |
| 818 | - diff = memcmp (beg1, beg2, MIN (len1, len2)); | 795 | - diff = memcmp (beg1, beg2, MIN (len1, len2)); |
| 819 | + copy[0] = (unsigned char *) beg[0]; | 796 | + copy[0] = (unsigned char *) beg[0]; |
| 820 | + copy[1] = (unsigned char *) beg[1]; | 797 | + copy[1] = (unsigned char *) beg[1]; |
| 821 | } | 798 | } |
| 822 | 799 | ||
| … | … | ||
| 897 | } | 874 | } |
| 898 | 875 | ||
| 899 | +#if HAVE_MBRTOWC | 876 | +#if HAVE_MBRTOWC |
| 900 | + | 877 | + |
| 901 | +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ | 878 | +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ |
| 902 | + do \ | 879 | + do \ |
| 903 | + { \ | 880 | + { \ |
| 904 | + mbstate_t state_bak; \ | 881 | + mbstate_t state_bak; \ |
| 905 | + \ | 882 | + \ |
| 906 | + CONVFAIL = 0; \ | 883 | + CONVFAIL = 0; \ |
| 907 | + state_bak = *STATEP; \ | 884 | + state_bak = *STATEP; \ |
| 908 | + \ | 885 | + \ |
| 909 | + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ | 886 | + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ |
| 910 | + \ | 887 | + \ |
| 911 | + switch (MBLENGTH) \ | 888 | + switch (MBLENGTH) \ |
| 912 | + { \ | 889 | + { \ |
| 913 | + case (size_t)-2: \ | 890 | + case (size_t)-2: \ |
| 914 | + case (size_t)-1: \ | 891 | + case (size_t)-1: \ |
| 915 | + *STATEP = state_bak; \ | 892 | + *STATEP = state_bak; \ |
| 916 | + CONVFAIL++; \ | 893 | + CONVFAIL++; \ |
| 917 | + /* Fall through */ \ | 894 | + /* Fall through */ \ |
| 918 | + case 0: \ | 895 | + case 0: \ |
| 919 | + MBLENGTH = 1; \ | 896 | + MBLENGTH = 1; \ |
| 920 | + } \ | 897 | + } \ |
| 921 | + } \ | 898 | + } \ |
| 922 | + while (0) | 899 | + while (0) |
| 923 | + | 900 | + |
| 924 | +static char * | 901 | +static char * |
| 925 | +find_field_multi (struct linebuffer *line) | 902 | +find_field_multi (struct linebuffer *line) |
| 926 | +{ | 903 | +{ |
| … | … | ||
| 938 | + | 915 | + |
| 939 | + /* skip fields. */ | 916 | + /* skip fields. */ |
| 940 | + for (count = 0; count < skip_fields && pos < size; count++) | 917 | + for (count = 0; count < skip_fields && pos < size; count++) |
| 941 | + { | 918 | + { |
| 942 | + while (pos < size) | 919 | + while (pos < size) |
| 943 | + { | 920 | + { |
| 944 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); | 921 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); |
| 945 | + | 922 | + |
| 946 | + if (convfail || !iswblank (wc)) | 923 | + if (convfail || !iswblank (wc)) |
| 947 | + { | 924 | + { |
| 925 | + pos += mblength; | ||
| 926 | + break; | ||
| 927 | + } | ||
| 948 | + pos += mblength; | 928 | + pos += mblength; |
| 949 | + break; | 929 | + } |
| 950 | + } | ||
| 951 | + pos += mblength; | ||
| 952 | + } | ||
| 953 | + | 930 | + |
| 954 | + while (pos < size) | 931 | + while (pos < size) |
| 955 | + { | 932 | + { |
| 956 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); | 933 | + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); |
| 957 | + | 934 | + |
| 958 | + if (!convfail && iswblank (wc)) | 935 | + if (!convfail && iswblank (wc)) |
| 959 | + break; | 936 | + break; |
| 960 | + | 937 | + |
| 961 | + pos += mblength; | 938 | + pos += mblength; |
| 962 | + } | 939 | + } |
| 963 | + } | 940 | + } |
| 964 | + | 941 | + |
| 965 | + /* skip fields. */ | 942 | + /* skip fields. */ |
| 966 | + for (count = 0; count < skip_chars && pos < size; count++) | 943 | + for (count = 0; count < skip_chars && pos < size; count++) |
| 967 | + { | 944 | + { |
| … | … | ||
| 995 | + | 972 | + |
| 996 | + copy_old = alloca (oldlen + 1); | 973 | + copy_old = alloca (oldlen + 1); |
| 997 | + copy_new = alloca (oldlen + 1); | 974 | + copy_new = alloca (oldlen + 1); |
| 998 | + | 975 | + |
| 999 | + for (i = 0; i < oldlen; i++) | 976 | + for (i = 0; i < oldlen; i++) |
| 1000 | + { | 977 | + { |
| 1001 | + copy_old[i] = toupper (old[i]); | 978 | + copy_old[i] = toupper (old[i]); |
| 1002 | + copy_new[i] = toupper (new[i]); | 979 | + copy_new[i] = toupper (new[i]); |
| 1003 | + } | 980 | + } |
| 1004 | } | 981 | } |
| 1005 | - else if (hard_LC_COLLATE) | 982 | - else if (hard_LC_COLLATE) |
| 1006 | - return xmemcoll (old, oldlen, new, newlen) != 0; | 983 | - return xmemcoll (old, oldlen, new, newlen) != 0; |
| 1007 | else | 984 | else |
| 1008 | - return oldlen != newlen || memcmp (old, new, oldlen); | 985 | - return oldlen != newlen || memcmp (old, new, oldlen); |
| … | … | ||
| 1037 | + for (i = 0; i < 2; i++) | 1014 | + for (i = 0; i < 2; i++) |
| 1038 | + { | 1015 | + { |
| 1039 | + copy[i] = alloca (len[i] + 1); | 1016 | + copy[i] = alloca (len[i] + 1); |
| 1040 | + | 1017 | + |
| 1041 | + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) | 1018 | + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) |
| 1042 | + { | 1019 | + { |
| 1043 | + state_bak = state[i]; | 1020 | + state_bak = state[i]; |
| 1044 | + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); | 1021 | + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); |
| 1045 | + | 1022 | + |
| 1046 | + switch (mblength) | 1023 | + switch (mblength) |
| 1047 | + { | 1024 | + { |
| 1048 | + case (size_t)-1: | 1025 | + case (size_t)-1: |
| 1049 | + case (size_t)-2: | 1026 | + case (size_t)-2: |
| 1050 | + state[i] = state_bak; | 1027 | + state[i] = state_bak; |
| 1051 | + /* Fall through */ | 1028 | + /* Fall through */ |
| 1052 | + case 0: | 1029 | + case 0: |
| 1053 | + mblength = 1; | 1030 | + mblength = 1; |
| 1054 | + break; | 1031 | + break; |
| 1055 | + | 1032 | + |
| 1056 | + default: | 1033 | + default: |
| 1057 | + if (ignore_case) | 1034 | + if (ignore_case) |
| 1058 | + { | 1035 | + { |
| 1059 | + uwc = towupper (wc); | 1036 | + uwc = towupper (wc); |
| 1060 | + | 1037 | + |
| 1061 | + if (uwc != wc) | 1038 | + if (uwc != wc) |
| 1062 | + { | 1039 | + { |
| 1063 | + mbstate_t state_wc; | 1040 | + mbstate_t state_wc; |
| 1064 | + | 1041 | + |
| 1065 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | 1042 | + memset (&state_wc, '\0', sizeof(mbstate_t)); |
| 1066 | + wcrtomb (copy[i] + j, uwc, &state_wc); | 1043 | + wcrtomb (copy[i] + j, uwc, &state_wc); |
| 1067 | + } | 1044 | + } |
| 1068 | + else | 1045 | + else |
| 1046 | + memcpy (copy[i] + j, str[i] + j, mblength); | ||
| 1047 | + } | ||
| 1048 | + else | ||
| 1069 | + memcpy (copy[i] + j, str[i] + j, mblength); | 1049 | + memcpy (copy[i] + j, str[i] + j, mblength); |
| 1070 | + } | 1050 | + } |
| 1071 | + else | ||
| 1072 | + memcpy (copy[i] + j, str[i] + j, mblength); | ||
| 1073 | + } | ||
| 1074 | + j += mblength; | 1051 | + j += mblength; |
| 1075 | + } | 1052 | + } |
| 1076 | + copy[i][j] = '\0'; | 1053 | + copy[i][j] = '\0'; |
| 1077 | + len[i] = j; | 1054 | + len[i] = j; |
| 1078 | + } | 1055 | + } |
| 1079 | + | 1056 | + |
| 1080 | + return xmemcoll (copy[0], len[0], copy[1], len[1]); | 1057 | + return xmemcoll (copy[0], len[0], copy[1], len[1]); |
| … | … | ||
| 1092 | + | 1069 | + |
| 1093 | + memset (&prevstate, '\0', sizeof (mbstate_t)); | 1070 | + memset (&prevstate, '\0', sizeof (mbstate_t)); |
| 1094 | +#endif | 1071 | +#endif |
| 1095 | 1072 | ||
| 1096 | while (!feof (stdin)) | 1073 | while (!feof (stdin)) |
| 1097 | { | 1074 | { |
| 1098 | char *thisfield; | 1075 | char *thisfield; |
| 1099 | size_t thislen; | 1076 | size_t thislen; |
| 1100 | +#if HAVE_MBRTOWC | 1077 | +#if HAVE_MBRTOWC |
| 1101 | + mbstate_t thisstate; | 1078 | + mbstate_t thisstate; |
| 1102 | +#endif | 1079 | +#endif |
| 1103 | + | 1080 | + |
| 1104 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) | 1081 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) |
| 1105 | break; | 1082 | break; |
| 1106 | thisfield = find_field (thisline); | 1083 | thisfield = find_field (thisline); |
| 1107 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); | 1084 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); |
| 1108 | +#if HAVE_MBRTOWC | 1085 | +#if HAVE_MBRTOWC |
| 1109 | + if (MB_CUR_MAX > 1) | 1086 | + if (MB_CUR_MAX > 1) |
| 1110 | + { | 1087 | + { |
| 1111 | + thisstate = thisline->state; | 1088 | + thisstate = thisline->state; |
| 1112 | + | 1089 | + |
| 1113 | + if (prevline->length == 0 || different_multi | 1090 | + if (prevline->length == 0 || different_multi |
| 1114 | + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) | 1091 | + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) |
| … | … | ||
| 1120 | + prevfield = thisfield; | 1097 | + prevfield = thisfield; |
| 1121 | + prevlen = thislen; | 1098 | + prevlen = thislen; |
| 1122 | + prevstate = thisstate; | 1099 | + prevstate = thisstate; |
| 1123 | + } | 1100 | + } |
| 1124 | + } | 1101 | + } |
| 1125 | + else | 1102 | + else |
| 1126 | +#endif | 1103 | +#endif |
| 1127 | if (prevline->length == 0 | 1104 | if (prevline->length == 0 |
| 1128 | || different (thisfield, prevfield, thislen, prevlen)) | 1105 | || different (thisfield, prevfield, thislen, prevlen)) |
| 1129 | { | 1106 | { |
| 1130 | @@ -322,17 +533,26 @@ | 1107 | @@ -322,17 +533,26 @@ |
| 1131 | size_t prevlen; | 1108 | size_t prevlen; |
| 1132 | uintmax_t match_count = 0; | 1109 | uintmax_t match_count = 0; |
| 1133 | bool first_delimiter = true; | 1110 | bool first_delimiter = true; |
| 1134 | +#if HAVE_MBRTOWC | 1111 | +#if HAVE_MBRTOWC |
| 1135 | + mbstate_t prevstate; | 1112 | + mbstate_t prevstate; |
| 1136 | +#endif | 1113 | +#endif |
| 1137 | 1114 | ||
| 1138 | if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) | 1115 | if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) |
| 1139 | goto closefiles; | 1116 | goto closefiles; |
| 1140 | prevfield = find_field (prevline); | 1117 | prevfield = find_field (prevline); |
| 1141 | prevlen = prevline->length - 1 - (prevfield - prevline->buffer); | 1118 | prevlen = prevline->length - 1 - (prevfield - prevline->buffer); |
| 1142 | +#if HAVE_MBRTOWC | 1119 | +#if HAVE_MBRTOWC |
| 1143 | + prevstate = prevline->state; | 1120 | + prevstate = prevline->state; |
| 1144 | +#endif | 1121 | +#endif |
| 1145 | 1122 | ||
| 1146 | while (!feof (stdin)) | 1123 | while (!feof (stdin)) |
| 1147 | { | 1124 | { |
| 1148 | bool match; | 1125 | bool match; |
| 1149 | char *thisfield; | 1126 | char *thisfield; |
| 1150 | size_t thislen; | 1127 | size_t thislen; |
| 1151 | +#if HAVE_MBRTOWC | 1128 | +#if HAVE_MBRTOWC |
| 1152 | + mbstate_t thisstate; | 1129 | + mbstate_t thisstate; |
| 1153 | +#endif | 1130 | +#endif |
| 1154 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) | 1131 | if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) |
| 1155 | { | 1132 | { |
| 1156 | if (ferror (stdin)) | 1133 | if (ferror (stdin)) |
| 1157 | @@ -341,6 +561,15 @@ | 1134 | @@ -341,6 +561,15 @@ |
| 1158 | } | 1135 | } |
| 1159 | thisfield = find_field (thisline); | 1136 | thisfield = find_field (thisline); |
| 1160 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); | 1137 | thislen = thisline->length - 1 - (thisfield - thisline->buffer); |
| 1161 | +#if HAVE_MBRTOWC | 1138 | +#if HAVE_MBRTOWC |
| 1162 | + if (MB_CUR_MAX > 1) | 1139 | + if (MB_CUR_MAX > 1) |
| 1163 | + { | 1140 | + { |
| 1164 | + thisstate = thisline->state; | 1141 | + thisstate = thisline->state; |
| 1165 | + match = !different_multi (thisfield, prevfield, | 1142 | + match = !different_multi (thisfield, prevfield, |
| 1166 | + thislen, prevlen, thisstate, prevstate); | 1143 | + thislen, prevlen, thisstate, prevstate); |
| 1167 | + } | 1144 | + } |
| 1168 | + else | 1145 | + else |
| 1169 | +#endif | 1146 | +#endif |
| 1170 | match = !different (thisfield, prevfield, thislen, prevlen); | 1147 | match = !different (thisfield, prevfield, thislen, prevlen); |
| 1171 | match_count += match; | 1148 | match_count += match; |
| 1172 | 1149 | ||
| 1173 | @@ -373,6 +602,9 @@ | 1150 | @@ -373,6 +602,9 @@ |
| 1174 | SWAP_LINES (prevline, thisline); | 1151 | SWAP_LINES (prevline, thisline); |
| 1175 | prevfield = thisfield; | 1152 | prevfield = thisfield; |
| 1176 | prevlen = thislen; | 1153 | prevlen = thislen; |
| 1177 | +#if HAVE_MBRTOWC | 1154 | +#if HAVE_MBRTOWC |
| 1178 | + prevstate = thisstate; | 1155 | + prevstate = thisstate; |
| 1179 | +#endif | 1156 | +#endif |
| 1180 | if (!match) | 1157 | if (!match) |
| 1181 | match_count = 0; | 1158 | match_count = 0; |
| 1182 | } | 1159 | } |
| 1183 | @@ -417,6 +649,19 @@ | 1160 | @@ -417,6 +649,19 @@ |
| 1184 | 1161 | ||
| 1185 | atexit (close_stdout); | 1162 | atexit (close_stdout); |
| 1186 | 1163 | ||
| 1187 | +#if HAVE_MBRTOWC | 1164 | +#if HAVE_MBRTOWC |
| … | … | ||
| 1296 | { | 1273 | { |
| 1297 | - if (!count_bytes) | 1274 | - if (!count_bytes) |
| 1298 | + if (operating_mode != byte_mode) | 1275 | + if (operating_mode != byte_mode) |
| 1299 | { | 1276 | { |
| 1300 | if (c == '\b') | 1277 | if (c == '\b') |
| 1301 | { | 1278 | { |
| 1302 | @@ -121,30 +165,14 @@ | 1279 | @@ -121,30 +165,14 @@ |
| 1303 | to stdout, with maximum line length WIDTH. | 1280 | to stdout, with maximum line length WIDTH. |
| 1304 | Return true if successful. */ | 1281 | Return true if successful. */ |
| 1305 | 1282 | ||
| 1306 | -static bool | 1283 | -static bool |
| … | … | ||
| 1331 | - } | 1308 | - } |
| 1332 | 1309 | ||
| 1333 | while ((c = getc (istream)) != EOF) | 1310 | while ((c = getc (istream)) != EOF) |
| 1334 | { | 1311 | { |
| 1335 | @@ -172,6 +200,15 @@ | 1312 | @@ -172,6 +200,15 @@ |
| 1336 | bool found_blank = false; | 1313 | bool found_blank = false; |
| 1337 | size_t logical_end = offset_out; | 1314 | size_t logical_end = offset_out; |
| 1338 | 1315 | ||
| 1339 | + /* If LINE_OUT has no wide character, | 1316 | + /* If LINE_OUT has no wide character, |
| 1340 | + put a new wide character in LINE_OUT | 1317 | + put a new wide character in LINE_OUT |
| 1341 | + if column is bigger than width. */ | 1318 | + if column is bigger than width. */ |
| 1342 | + if (offset_out == 0) | 1319 | + if (offset_out == 0) |
| 1343 | + { | 1320 | + { |
| 1344 | + line_out[offset_out++] = c; | 1321 | + line_out[offset_out++] = c; |
| 1345 | + continue; | 1322 | + continue; |
| 1346 | + } | 1323 | + } |
| 1347 | + | 1324 | + |
| 1348 | /* Look for the last blank. */ | 1325 | /* Look for the last blank. */ |
| 1349 | while (logical_end) | 1326 | while (logical_end) |
| 1350 | { | 1327 | { |
| 1351 | @@ -218,11 +255,222 @@ | 1328 | @@ -218,11 +255,222 @@ |
| 1352 | line_out[offset_out++] = c; | 1329 | line_out[offset_out++] = c; |
| 1353 | } | 1330 | } |
| 1354 | 1331 | ||
| 1355 | - saved_errno = errno; | 1332 | - saved_errno = errno; |
| … | … | ||
| 1363 | +#if HAVE_MBRTOWC | 1340 | +#if HAVE_MBRTOWC |
| 1364 | +static void | 1341 | +static void |
| 1365 | +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) | 1342 | +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) |
| 1366 | +{ | 1343 | +{ |
| 1367 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ | 1344 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ |
| 1368 | + size_t buflen = 0; /* The length of the byte sequence in buf. */ | 1345 | + size_t buflen = 0; /* The length of the byte sequence in buf. */ |
| 1369 | + char *bufpos = NULL; /* Next read position of BUF. */ | 1346 | + char *bufpos = NULL; /* Next read position of BUF. */ |
| 1370 | + wint_t wc; /* A gotten wide character. */ | 1347 | + wint_t wc; /* A gotten wide character. */ |
| 1371 | + size_t mblength; /* The byte size of a multibyte character which shows | 1348 | + size_t mblength; /* The byte size of a multibyte character which shows |
| 1372 | + as same character as WC. */ | 1349 | + as same character as WC. */ |
| 1373 | + mbstate_t state, state_bak; /* State of the stream. */ | 1350 | + mbstate_t state, state_bak; /* State of the stream. */ |
| 1374 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ | 1351 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ |
| 1375 | + | 1352 | + |
| 1376 | + static char *line_out = NULL; | 1353 | + static char *line_out = NULL; |
| 1377 | + size_t offset_out = 0; /* Index in `line_out' for next char. */ | 1354 | + size_t offset_out = 0; /* Index in `line_out' for next char. */ |
| 1378 | + static size_t allocated_out = 0; | 1355 | + static size_t allocated_out = 0; |
| 1379 | + | 1356 | + |
| 1380 | + int increment; | 1357 | + int increment; |
| 1381 | + size_t column = 0; | 1358 | + size_t column = 0; |
| 1382 | + | 1359 | + |
| … | … | ||
| 1386 | + int last_blank_increment = 0; | 1363 | + int last_blank_increment = 0; |
| 1387 | + int is_bs_following_last_blank; | 1364 | + int is_bs_following_last_blank; |
| 1388 | + size_t bs_following_last_blank_num; | 1365 | + size_t bs_following_last_blank_num; |
| 1389 | + int is_cr_after_last_blank; | 1366 | + int is_cr_after_last_blank; |
| 1390 | + | 1367 | + |
| 1391 | +#define CLEAR_FLAGS \ | 1368 | +#define CLEAR_FLAGS \ |
| 1392 | + do \ | 1369 | + do \ |
| 1393 | + { \ | 1370 | + { \ |
| 1394 | + last_blank_pos = 0; \ | 1371 | + last_blank_pos = 0; \ |
| 1395 | + last_blank_column = 0; \ | 1372 | + last_blank_column = 0; \ |
| 1396 | + is_blank_seen = 0; \ | 1373 | + is_blank_seen = 0; \ |
| 1397 | + is_bs_following_last_blank = 0; \ | 1374 | + is_bs_following_last_blank = 0; \ |
| 1398 | + bs_following_last_blank_num = 0; \ | 1375 | + bs_following_last_blank_num = 0; \ |
| 1399 | + is_cr_after_last_blank = 0; \ | 1376 | + is_cr_after_last_blank = 0; \ |
| 1400 | + } \ | 1377 | + } \ |
| 1401 | + while (0) | 1378 | + while (0) |
| 1402 | + | 1379 | + |
| 1403 | +#define START_NEW_LINE \ | 1380 | +#define START_NEW_LINE \ |
| 1404 | + do \ | 1381 | + do \ |
| 1405 | + { \ | 1382 | + { \ |
| 1406 | + putchar ('\n'); \ | 1383 | + putchar ('\n'); \ |
| 1407 | + column = 0; \ | 1384 | + column = 0; \ |
| 1408 | + offset_out = 0; \ | 1385 | + offset_out = 0; \ |
| 1409 | + CLEAR_FLAGS; \ | 1386 | + CLEAR_FLAGS; \ |
| 1410 | + } \ | 1387 | + } \ |
| 1411 | + while (0) | 1388 | + while (0) |
| 1412 | + | 1389 | + |
| 1413 | + CLEAR_FLAGS; | 1390 | + CLEAR_FLAGS; |
| 1414 | + memset (&state, '\0', sizeof(mbstate_t)); | 1391 | + memset (&state, '\0', sizeof(mbstate_t)); |
| 1415 | + | 1392 | + |
| 1416 | + for (;; bufpos += mblength, buflen -= mblength) | 1393 | + for (;; bufpos += mblength, buflen -= mblength) |
| 1417 | + { | 1394 | + { |
| 1418 | + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) | 1395 | + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) |
| 1419 | + { | 1396 | + { |
| 1420 | + memmove (buf, bufpos, buflen); | 1397 | + memmove (buf, bufpos, buflen); |
| 1421 | + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); | 1398 | + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); |
| 1422 | + bufpos = buf; | 1399 | + bufpos = buf; |
| 1423 | + } | 1400 | + } |
| 1424 | + | 1401 | + |
| 1425 | + if (buflen < 1) | 1402 | + if (buflen < 1) |
| 1426 | + break; | 1403 | + break; |
| 1427 | + | 1404 | + |
| 1428 | + /* Get a wide character. */ | 1405 | + /* Get a wide character. */ |
| 1429 | + convfail = 0; | 1406 | + convfail = 0; |
| 1430 | + state_bak = state; | 1407 | + state_bak = state; |
| 1431 | + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); | 1408 | + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); |
| 1432 | + | 1409 | + |
| 1433 | + switch (mblength) | 1410 | + switch (mblength) |
| 1434 | + { | 1411 | + { |
| 1435 | + case (size_t)-1: | 1412 | + case (size_t)-1: |
| 1436 | + case (size_t)-2: | 1413 | + case (size_t)-2: |
| 1437 | + convfail++; | 1414 | + convfail++; |
| 1438 | + state = state_bak; | 1415 | + state = state_bak; |
| 1439 | + /* Fall through. */ | 1416 | + /* Fall through. */ |
| 1440 | + | 1417 | + |
| 1441 | + case 0: | 1418 | + case 0: |
| 1442 | + mblength = 1; | 1419 | + mblength = 1; |
| 1443 | + break; | 1420 | + break; |
| 1444 | + } | 1421 | + } |
| 1445 | + | 1422 | + |
| 1446 | +rescan: | 1423 | +rescan: |
| 1447 | + if (operating_mode == byte_mode) /* byte mode */ | 1424 | + if (operating_mode == byte_mode) /* byte mode */ |
| 1448 | + increment = mblength; | 1425 | + increment = mblength; |
| 1449 | + else if (operating_mode == character_mode) /* character mode */ | 1426 | + else if (operating_mode == character_mode) /* character mode */ |
| 1450 | + increment = 1; | ||
| 1451 | + else /* column mode */ | ||
| 1452 | + { | ||
| 1453 | + if (convfail) | ||
| 1454 | + increment = 1; | 1427 | + increment = 1; |
| 1455 | + else | 1428 | + else /* column mode */ |
| 1456 | + { | 1429 | + { |
| 1430 | + if (convfail) | ||
| 1431 | + increment = 1; | ||
| 1432 | + else | ||
| 1433 | + { | ||
| 1457 | + switch (wc) | 1434 | + switch (wc) |
| 1458 | + { | 1435 | + { |
| 1459 | + case L'\n': | 1436 | + case L'\n': |
| 1460 | + fwrite (line_out, sizeof(char), offset_out, stdout); | 1437 | + fwrite (line_out, sizeof(char), offset_out, stdout); |
| 1461 | + START_NEW_LINE; | 1438 | + START_NEW_LINE; |
| 1462 | + continue; | 1439 | + continue; |
| 1463 | + | 1440 | + |
| 1464 | + case L'\b': | 1441 | + case L'\b': |
| 1465 | + increment = (column > 0) ? -1 : 0; | 1442 | + increment = (column > 0) ? -1 : 0; |
| 1466 | + break; | 1443 | + break; |
| 1467 | + | 1444 | + |
| 1468 | + case L'\r': | 1445 | + case L'\r': |
| 1469 | + increment = -1 * column; | 1446 | + increment = -1 * column; |
| 1470 | + break; | 1447 | + break; |
| 1471 | + | 1448 | + |
| 1472 | + case L'\t': | 1449 | + case L'\t': |
| 1473 | + increment = 8 - column % 8; | 1450 | + increment = 8 - column % 8; |
| 1474 | + break; | 1451 | + break; |
| 1475 | + | 1452 | + |
| 1476 | + default: | 1453 | + default: |
| 1477 | + increment = wcwidth (wc); | 1454 | + increment = wcwidth (wc); |
| 1478 | + increment = (increment < 0) ? 0 : increment; | 1455 | + increment = (increment < 0) ? 0 : increment; |
| 1479 | + } | 1456 | + } |
| 1480 | + } | 1457 | + } |
| 1481 | + } | 1458 | + } |
| 1482 | + | 1459 | + |
| 1483 | + if (column + increment > width && break_spaces && last_blank_pos) | 1460 | + if (column + increment > width && break_spaces && last_blank_pos) |
| 1484 | + { | 1461 | + { |
| 1485 | + fwrite (line_out, sizeof(char), last_blank_pos, stdout); | 1462 | + fwrite (line_out, sizeof(char), last_blank_pos, stdout); |
| 1486 | + putchar ('\n'); | 1463 | + putchar ('\n'); |
| 1487 | + | 1464 | + |
| 1488 | + offset_out = offset_out - last_blank_pos; | 1465 | + offset_out = offset_out - last_blank_pos; |
| 1489 | + column = column - last_blank_column + ((is_cr_after_last_blank) | 1466 | + column = column - last_blank_column + ((is_cr_after_last_blank) |
| 1490 | + ? last_blank_increment : bs_following_last_blank_num); | 1467 | + ? last_blank_increment : bs_following_last_blank_num); |
| 1491 | + memmove (line_out, line_out + last_blank_pos, offset_out); | 1468 | + memmove (line_out, line_out + last_blank_pos, offset_out); |
| 1492 | + CLEAR_FLAGS; | 1469 | + CLEAR_FLAGS; |
| 1493 | + goto rescan; | 1470 | + goto rescan; |
| 1494 | + } | 1471 | + } |
| 1495 | + | 1472 | + |
| 1496 | + if (column + increment > width && column != 0) | 1473 | + if (column + increment > width && column != 0) |
| 1497 | + { | 1474 | + { |
| 1498 | + fwrite (line_out, sizeof(char), offset_out, stdout); | 1475 | + fwrite (line_out, sizeof(char), offset_out, stdout); |
| 1499 | + START_NEW_LINE; | 1476 | + START_NEW_LINE; |
| 1500 | + goto rescan; | 1477 | + goto rescan; |
| 1501 | + } | 1478 | + } |
| 1502 | + | 1479 | + |
| 1503 | + if (allocated_out < offset_out + mblength) | 1480 | + if (allocated_out < offset_out + mblength) |
| 1504 | + { | 1481 | + { |
| 1505 | + line_out = X2REALLOC (line_out, &allocated_out); | 1482 | + line_out = X2REALLOC (line_out, &allocated_out); |
| 1506 | + } | 1483 | + } |
| 1507 | + | 1484 | + |
| 1508 | + memcpy (line_out + offset_out, bufpos, mblength); | 1485 | + memcpy (line_out + offset_out, bufpos, mblength); |
| 1509 | + offset_out += mblength; | 1486 | + offset_out += mblength; |
| 1510 | + column += increment; | 1487 | + column += increment; |
| 1511 | + | 1488 | + |
| 1512 | + if (is_blank_seen && !convfail && wc == L'\r') | 1489 | + if (is_blank_seen && !convfail && wc == L'\r') |
| 1513 | + is_cr_after_last_blank = 1; | 1490 | + is_cr_after_last_blank = 1; |
| 1514 | + | 1491 | + |
| 1515 | + if (is_bs_following_last_blank && !convfail && wc == L'\b') | 1492 | + if (is_bs_following_last_blank && !convfail && wc == L'\b') |
| 1516 | + ++bs_following_last_blank_num; | 1493 | + ++bs_following_last_blank_num; |
| 1517 | + else | 1494 | + else |
| 1518 | + is_bs_following_last_blank = 0; | 1495 | + is_bs_following_last_blank = 0; |
| 1519 | + | 1496 | + |
| 1520 | + if (break_spaces && !convfail && iswblank (wc)) | 1497 | + if (break_spaces && !convfail && iswblank (wc)) |
| 1521 | + { | 1498 | + { |
| 1522 | + last_blank_pos = offset_out; | 1499 | + last_blank_pos = offset_out; |
| 1523 | + last_blank_column = column; | 1500 | + last_blank_column = column; |
| 1524 | + is_blank_seen = 1; | 1501 | + is_blank_seen = 1; |
| 1525 | + last_blank_increment = increment; | 1502 | + last_blank_increment = increment; |
| 1526 | + is_bs_following_last_blank = 1; | 1503 | + is_bs_following_last_blank = 1; |
| 1527 | + bs_following_last_blank_num = 0; | 1504 | + bs_following_last_blank_num = 0; |
| 1528 | + is_cr_after_last_blank = 0; | 1505 | + is_cr_after_last_blank = 0; |
| 1529 | + } | 1506 | + } |
| 1530 | + } | 1507 | + } |
| 1531 | + | 1508 | + |
| 1532 | + *saved_errno = errno; | 1509 | + *saved_errno = errno; |
| 1533 | 1510 | ||
| 1534 | if (offset_out) | 1511 | if (offset_out) |
| … | … | ||
| 1582 | 1559 | ||
| 1583 | while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) | 1560 | while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) |
| 1584 | { | 1561 | { |
| 1585 | @@ -264,7 +516,15 @@ | 1562 | @@ -264,7 +516,15 @@ |
| 1586 | switch (optc) | 1563 | switch (optc) |
| 1587 | { | 1564 | { |
| 1588 | case 'b': /* Count bytes rather than columns. */ | 1565 | case 'b': /* Count bytes rather than columns. */ |
| 1589 | - count_bytes = true; | 1566 | - count_bytes = true; |
| 1590 | + if (operating_mode != column_mode) | 1567 | + if (operating_mode != column_mode) |
| 1591 | + FATAL_ERROR (_("only one way of folding may be specified")); | 1568 | + FATAL_ERROR (_("only one way of folding may be specified")); |
| 1592 | + operating_mode = byte_mode; | 1569 | + operating_mode = byte_mode; |
| 1593 | + break; | 1570 | + break; |
| 1594 | + | 1571 | + |
| 1595 | + case 'c': | 1572 | + case 'c': |
| 1596 | + if (operating_mode != column_mode) | 1573 | + if (operating_mode != column_mode) |
| 1597 | + FATAL_ERROR (_("only one way of folding may be specified")); | 1574 | + FATAL_ERROR (_("only one way of folding may be specified")); |
| 1598 | + operating_mode = character_mode; | 1575 | + operating_mode = character_mode; |
| 1599 | break; | 1576 | break; |
| 1600 | 1577 | ||
| 1601 | case 's': /* Break at word boundaries. */ | 1578 | case 's': /* Break at word boundaries. */ |
| 1602 | --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000 | 1579 | --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000 |
| 1603 | +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000 | 1580 | +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000 |
| 1604 | @@ -23,10 +23,19 @@ | 1581 | @@ -23,10 +23,19 @@ |
| 1605 | 1582 | ||
| 1606 | #include <config.h> | 1583 | #include <config.h> |
| … | … | ||
| 1635 | #endif | 1612 | #endif |
| 1636 | 1613 | ||
| 1637 | #define NONZERO(x) ((x) != 0) | 1614 | #define NONZERO(x) ((x) != 0) |
| 1638 | 1615 | ||
| 1639 | +/* get a multibyte character's byte length. */ | 1616 | +/* get a multibyte character's byte length. */ |
| 1640 | +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ | 1617 | +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ |
| 1641 | + do \ | 1618 | + do \ |
| 1642 | + { \ | 1619 | + { \ |
| 1643 | + wchar_t wc; \ | 1620 | + wchar_t wc; \ |
| 1644 | + mbstate_t state_bak; \ | 1621 | + mbstate_t state_bak; \ |
| 1645 | + \ | 1622 | + \ |
| 1646 | + state_bak = STATE; \ | 1623 | + state_bak = STATE; \ |
| 1647 | + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ | 1624 | + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ |
| 1648 | + \ | 1625 | + \ |
| 1649 | + switch (MBLENGTH) \ | 1626 | + switch (MBLENGTH) \ |
| 1650 | + { \ | 1627 | + { \ |
| 1651 | + case (size_t)-1: \ | 1628 | + case (size_t)-1: \ |
| 1652 | + case (size_t)-2: \ | 1629 | + case (size_t)-2: \ |
| 1653 | + STATE = state_bak; \ | 1630 | + STATE = state_bak; \ |
| 1654 | + /* Fall through. */ \ | 1631 | + /* Fall through. */ \ |
| 1655 | + case 0: \ | 1632 | + case 0: \ |
| 1656 | + MBLENGTH = 1; \ | 1633 | + MBLENGTH = 1; \ |
| 1657 | + } \ | 1634 | + } \ |
| 1658 | + } \ | 1635 | + } \ |
| 1659 | + while (0) | 1636 | + while (0) |
| 1660 | + | 1637 | + |
| 1661 | /* The kind of blanks for '-b' to skip in various options. */ | 1638 | /* The kind of blanks for '-b' to skip in various options. */ |
| 1662 | enum blanktype { bl_start, bl_end, bl_both }; | 1639 | enum blanktype { bl_start, bl_end, bl_both }; |
| 1663 | 1640 | ||
| … | … | ||
| 1774 | + | 1751 | + |
| 1775 | + memset (&state_mb, '\0', sizeof (mbstate_t)); | 1752 | + memset (&state_mb, '\0', sizeof (mbstate_t)); |
| 1776 | + memset (&state_wc, '\0', sizeof (mbstate_t)); | 1753 | + memset (&state_wc, '\0', sizeof (mbstate_t)); |
| 1777 | + | 1754 | + |
| 1778 | + for (j = 0; j < s_len;) | 1755 | + for (j = 0; j < s_len;) |
| 1779 | + { | 1756 | + { |
| 1780 | + if (!ismbblank (s + j, s_len - j, &mblength)) | 1757 | + if (!ismbblank (s + j, s_len - j, &mblength)) |
| 1781 | + break; | 1758 | + break; |
| 1782 | + j += mblength; | 1759 | + j += mblength; |
| 1783 | + } | 1760 | + } |
| 1784 | + | 1761 | + |
| 1785 | + for (k = 0; j < s_len;) | 1762 | + for (k = 0; j < s_len;) |
| 1786 | + { | 1763 | + { |
| 1787 | + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); | 1764 | + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); |
| 1788 | + assert (mblength != (size_t)-1 && mblength != (size_t)-2); | 1765 | + assert (mblength != (size_t)-1 && mblength != (size_t)-2); |
| 1789 | + if (mblength == 0) | 1766 | + if (mblength == 0) |
| 1790 | + break; | 1767 | + break; |
| 1791 | + | 1768 | + |
| 1792 | + pwc = towupper (wc); | 1769 | + pwc = towupper (wc); |
| 1793 | + if (pwc == wc) | 1770 | + if (pwc == wc) |
| 1794 | + { | 1771 | + { |
| 1795 | + memcpy (mbc, s + j, mblength); | 1772 | + memcpy (mbc, s + j, mblength); |
| 1796 | + j += mblength; | 1773 | + j += mblength; |
| 1797 | + } | 1774 | + } |
| 1798 | + else | 1775 | + else |
| 1799 | + { | 1776 | + { |
| 1800 | + j += mblength; | 1777 | + j += mblength; |
| 1801 | + mblength = wcrtomb (mbc, pwc, &state_wc); | 1778 | + mblength = wcrtomb (mbc, pwc, &state_wc); |
| 1802 | + assert (mblength != (size_t)0 && mblength != (size_t)-1); | 1779 | + assert (mblength != (size_t)0 && mblength != (size_t)-1); |
| 1803 | + } | 1780 | + } |
| 1804 | + | 1781 | + |
| 1805 | + for (l = 0; l < mblength; l++) | 1782 | + for (l = 0; l < mblength; l++) |
| 1806 | + name[k++] = mbc[l]; | 1783 | + name[k++] = mbc[l]; |
| 1807 | + } | 1784 | + } |
| 1808 | + name[k] = '\0'; | 1785 | + name[k] = '\0'; |
| 1809 | + } | 1786 | + } |
| 1810 | + qsort ((void *) monthtab, MONTHS_PER_YEAR, | 1787 | + qsort ((void *) monthtab, MONTHS_PER_YEAR, |
| 1811 | + sizeof (struct month), struct_month_cmp); | 1788 | + sizeof (struct month), struct_month_cmp); |
| 1812 | +} | 1789 | +} |
| … | … | ||
| 1830 | 1807 | ||
| 1831 | - if (tab != TAB_DEFAULT) | 1808 | - if (tab != TAB_DEFAULT) |
| 1832 | + if (tab_length) | 1809 | + if (tab_length) |
| 1833 | while (ptr < lim && sword--) | 1810 | while (ptr < lim && sword--) |
| 1834 | { | 1811 | { |
| 1835 | - while (ptr < lim && *ptr != tab) | 1812 | - while (ptr < lim && *ptr != tab) |
| 1836 | + while (ptr < lim && *ptr != tab[0]) | 1813 | + while (ptr < lim && *ptr != tab[0]) |
| 1837 | ++ptr; | 1814 | ++ptr; |
| 1838 | if (ptr < lim) | 1815 | if (ptr < lim) |
| 1839 | ++ptr; | 1816 | ++ptr; |
| 1840 | @@ -1282,11 +1409,70 @@ | 1817 | @@ -1282,11 +1409,70 @@ |
| 1841 | return ptr; | 1818 | return ptr; |
| 1842 | } | 1819 | } |
| 1843 | 1820 | ||
| 1844 | +#if HAVE_MBRTOWC | 1821 | +#if HAVE_MBRTOWC |
| … | … | ||
| 1855 | + memset (&state, '\0', sizeof(mbstate_t)); | 1832 | + memset (&state, '\0', sizeof(mbstate_t)); |
| 1856 | + | 1833 | + |
| 1857 | + if (tab_length) | 1834 | + if (tab_length) |
| 1858 | + while (ptr < lim && sword--) | 1835 | + while (ptr < lim && sword--) |
| 1859 | + { | 1836 | + { |
| 1860 | + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) | 1837 | + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) |
| 1861 | + { | 1838 | + { |
| 1862 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1839 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1863 | + ptr += mblength; | 1840 | + ptr += mblength; |
| 1864 | + } | 1841 | + } |
| 1865 | + if (ptr < lim) | 1842 | + if (ptr < lim) |
| 1866 | + { | 1843 | + { |
| 1867 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1844 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1868 | + ptr += mblength; | 1845 | + ptr += mblength; |
| 1869 | + } | 1846 | + } |
| 1870 | + } | 1847 | + } |
| 1871 | + else | 1848 | + else |
| 1872 | + while (ptr < lim && sword--) | 1849 | + while (ptr < lim && sword--) |
| 1873 | + { | 1850 | + { |
| 1874 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) | 1851 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) |
| 1875 | + ptr += mblength; | 1852 | + ptr += mblength; |
| 1876 | + if (ptr < lim) | 1853 | + if (ptr < lim) |
| 1877 | + { | 1854 | + { |
| 1878 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1855 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1879 | + ptr += mblength; | 1856 | + ptr += mblength; |
| 1880 | + } | 1857 | + } |
| 1881 | + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) | 1858 | + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) |
| 1882 | + ptr += mblength; | 1859 | + ptr += mblength; |
| 1883 | + } | 1860 | + } |
| 1884 | + | 1861 | + |
| 1885 | + if (key->skipsblanks) | 1862 | + if (key->skipsblanks) |
| 1886 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) | 1863 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) |
| 1887 | + ptr += mblength; | 1864 | + ptr += mblength; |
| … | … | ||
| 1889 | + for (i = 0; i < schar; i++) | 1866 | + for (i = 0; i < schar; i++) |
| 1890 | + { | 1867 | + { |
| 1891 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1868 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1892 | + | 1869 | + |
| 1893 | + if (ptr + mblength > lim) | 1870 | + if (ptr + mblength > lim) |
| 1894 | + break; | 1871 | + break; |
| 1895 | + else | 1872 | + else |
| 1896 | + ptr += mblength; | 1873 | + ptr += mblength; |
| 1897 | + } | 1874 | + } |
| 1898 | + | 1875 | + |
| 1899 | + return ptr; | 1876 | + return ptr; |
| 1900 | +} | 1877 | +} |
| 1901 | +#endif | 1878 | +#endif |
| … | … | ||
| 1915 | the preceding field. */ | 1892 | the preceding field. */ |
| 1916 | - if (tab != TAB_DEFAULT) | 1893 | - if (tab != TAB_DEFAULT) |
| 1917 | + if (tab_length) | 1894 | + if (tab_length) |
| 1918 | while (ptr < lim && eword--) | 1895 | while (ptr < lim && eword--) |
| 1919 | { | 1896 | { |
| 1920 | - while (ptr < lim && *ptr != tab) | 1897 | - while (ptr < lim && *ptr != tab) |
| 1921 | + while (ptr < lim && *ptr != tab[0]) | 1898 | + while (ptr < lim && *ptr != tab[0]) |
| 1922 | ++ptr; | 1899 | ++ptr; |
| 1923 | if (ptr < lim && (eword | echar)) | 1900 | if (ptr < lim && (eword | echar)) |
| 1924 | ++ptr; | 1901 | ++ptr; |
| 1925 | @@ -1348,10 +1534,10 @@ | 1902 | @@ -1348,10 +1534,10 @@ |
| 1926 | */ | 1903 | */ |
| 1927 | 1904 | ||
| 1928 | /* Make LIM point to the end of (one byte past) the current field. */ | 1905 | /* Make LIM point to the end of (one byte past) the current field. */ |
| 1929 | - if (tab != TAB_DEFAULT) | 1906 | - if (tab != TAB_DEFAULT) |
| … | … | ||
| 1931 | { | 1908 | { |
| 1932 | char *newlim; | 1909 | char *newlim; |
| 1933 | - newlim = memchr (ptr, tab, lim - ptr); | 1910 | - newlim = memchr (ptr, tab, lim - ptr); |
| 1934 | + newlim = memchr (ptr, tab[0], lim - ptr); | 1911 | + newlim = memchr (ptr, tab[0], lim - ptr); |
| 1935 | if (newlim) | 1912 | if (newlim) |
| 1936 | lim = newlim; | 1913 | lim = newlim; |
| 1937 | } | 1914 | } |
| 1938 | @@ -1384,6 +1570,113 @@ | 1915 | @@ -1384,6 +1570,113 @@ |
| 1939 | return ptr; | 1916 | return ptr; |
| 1940 | } | 1917 | } |
| 1941 | 1918 | ||
| … | … | ||
| 1955 | + memset (&state, '\0', sizeof(mbstate_t)); | 1932 | + memset (&state, '\0', sizeof(mbstate_t)); |
| 1956 | + | 1933 | + |
| 1957 | + if (tab_length) | 1934 | + if (tab_length) |
| 1958 | + while (ptr < lim && eword--) | 1935 | + while (ptr < lim && eword--) |
| 1959 | + { | 1936 | + { |
| 1960 | + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) | 1937 | + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) |
| 1961 | + { | 1938 | + { |
| 1962 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1939 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1963 | + ptr += mblength; | 1940 | + ptr += mblength; |
| 1964 | + } | 1941 | + } |
| 1965 | + if (ptr < lim && (eword | echar)) | 1942 | + if (ptr < lim && (eword | echar)) |
| 1966 | + { | 1943 | + { |
| 1967 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1944 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1968 | + ptr += mblength; | 1945 | + ptr += mblength; |
| 1969 | + } | 1946 | + } |
| 1970 | + } | 1947 | + } |
| 1971 | + else | 1948 | + else |
| 1972 | + while (ptr < lim && eword--) | 1949 | + while (ptr < lim && eword--) |
| 1973 | + { | 1950 | + { |
| 1974 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) | 1951 | + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) |
| 1975 | + ptr += mblength; | 1952 | + ptr += mblength; |
| 1976 | + if (ptr < lim) | 1953 | + if (ptr < lim) |
| 1977 | + { | 1954 | + { |
| 1978 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1955 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 1979 | + ptr += mblength; | 1956 | + ptr += mblength; |
| 1980 | + } | 1957 | + } |
| 1981 | + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) | 1958 | + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) |
| 1982 | + ptr += mblength; | 1959 | + ptr += mblength; |
| 1983 | + } | 1960 | + } |
| 1984 | + | 1961 | + |
| 1985 | + | 1962 | + |
| 1986 | +# ifdef POSIX_UNSPECIFIED | 1963 | +# ifdef POSIX_UNSPECIFIED |
| 1987 | + /* Make LIM point to the end of (one byte past) the current field. */ | 1964 | + /* Make LIM point to the end of (one byte past) the current field. */ |
| … | … | ||
| 1989 | + { | 1966 | + { |
| 1990 | + char *newlim, *p; | 1967 | + char *newlim, *p; |
| 1991 | + | 1968 | + |
| 1992 | + newlim = NULL; | 1969 | + newlim = NULL; |
| 1993 | + for (p = ptr; p < lim;) | 1970 | + for (p = ptr; p < lim;) |
| 1994 | + { | 1971 | + { |
| 1995 | + if (memcmp (p, tab, tab_length) == 0) | 1972 | + if (memcmp (p, tab, tab_length) == 0) |
| 1996 | + { | 1973 | + { |
| 1997 | + newlim = p; | 1974 | + newlim = p; |
| 1998 | + break; | 1975 | + break; |
| 1999 | + } | 1976 | + } |
| 2000 | + | 1977 | + |
| 2001 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1978 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 2002 | + p += mblength; | 1979 | + p += mblength; |
| 2003 | + } | 1980 | + } |
| 2004 | + } | 1981 | + } |
| 2005 | + else | 1982 | + else |
| 2006 | + { | 1983 | + { |
| 2007 | + char *newlim; | 1984 | + char *newlim; |
| 2008 | + newlim = ptr; | 1985 | + newlim = ptr; |
| 2009 | + | 1986 | + |
| 2010 | + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) | 1987 | + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) |
| 2011 | + newlim += mblength; | 1988 | + newlim += mblength; |
| 2012 | + if (ptr < lim) | 1989 | + if (ptr < lim) |
| 2013 | + { | 1990 | + { |
| 2014 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 1991 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 2015 | + ptr += mblength; | 1992 | + ptr += mblength; |
| 2016 | + } | 1993 | + } |
| 2017 | + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) | 1994 | + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) |
| 2018 | + newlim += mblength; | 1995 | + newlim += mblength; |
| 2019 | + lim = newlim; | 1996 | + lim = newlim; |
| 2020 | + } | 1997 | + } |
| 2021 | +# endif | 1998 | +# endif |
| 2022 | + | 1999 | + |
| 2023 | + if (echar != 0) | 2000 | + if (echar != 0) |
| … | … | ||
| 2034 | + for (i = 0; i < echar; i++) | 2011 | + for (i = 0; i < echar; i++) |
| 2035 | + { | 2012 | + { |
| 2036 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); | 2013 | + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); |
| 2037 | + | 2014 | + |
| 2038 | + if (ptr + mblength > lim) | 2015 | + if (ptr + mblength > lim) |
| 2039 | + break; | 2016 | + break; |
| 2040 | + else | 2017 | + else |
| 2041 | + ptr += mblength; | 2018 | + ptr += mblength; |
| 2042 | + } | 2019 | + } |
| 2043 | + } | 2020 | + } |
| 2044 | + | 2021 | + |
| 2045 | + return ptr; | 2022 | + return ptr; |
| 2046 | +} | 2023 | +} |
| … | … | ||
| 2048 | + | 2025 | + |
| 2049 | /* Fill BUF reading from FP, moving buf->left bytes from the end | 2026 | /* Fill BUF reading from FP, moving buf->left bytes from the end |
| 2050 | of buf->buf to the beginning first. If EOF is reached and the | 2027 | of buf->buf to the beginning first. If EOF is reached and the |
| 2051 | file wasn't terminated by a newline, supply one. Set up BUF's line | 2028 | file wasn't terminated by a newline, supply one. Set up BUF's line |
| 2052 | @@ -1466,8 +1753,24 @@ | 2029 | @@ -1466,8 +1753,24 @@ |
| 2053 | else | 2030 | else |
| 2054 | { | 2031 | { |
| 2055 | if (key->skipsblanks) | 2032 | if (key->skipsblanks) |
| 2056 | - while (blanks[to_uchar (*line_start)]) | 2033 | - while (blanks[to_uchar (*line_start)]) |
| 2057 | - line_start++; | 2034 | - line_start++; |
| 2058 | + { | 2035 | + { |
| 2059 | +#if HAVE_MBRTOWC | 2036 | +#if HAVE_MBRTOWC |
| 2060 | + if (MB_CUR_MAX > 1) | 2037 | + if (MB_CUR_MAX > 1) |
| 2061 | + { | 2038 | + { |
| 2062 | + size_t mblength; | 2039 | + size_t mblength; |
| 2063 | + mbstate_t state; | 2040 | + mbstate_t state; |
| 2064 | + memset (&state, '\0', sizeof(mbstate_t)); | 2041 | + memset (&state, '\0', sizeof(mbstate_t)); |
| 2065 | + while (line_start < line->keylim && | 2042 | + while (line_start < line->keylim && |
| 2066 | + ismbblank (line_start, | 2043 | + ismbblank (line_start, |
| 2067 | + line->keylim - line_start, | 2044 | + line->keylim - line_start, |
| 2068 | + &mblength)) | 2045 | + &mblength)) |
| 2069 | + line_start += mblength; | 2046 | + line_start += mblength; |
| 2070 | + } | 2047 | + } |
| 2071 | + else | 2048 | + else |
| 2072 | +#endif | 2049 | +#endif |
| 2073 | + while (blanks[to_uchar (*line_start)]) | 2050 | + while (blanks[to_uchar (*line_start)]) |
| 2074 | + line_start++; | 2051 | + line_start++; |
| 2075 | + } | 2052 | + } |
| 2076 | line->keybeg = line_start; | 2053 | line->keybeg = line_start; |
| 2077 | } | 2054 | } |
| 2078 | } | 2055 | } |
| 2079 | @@ -1500,7 +1803,7 @@ | 2056 | @@ -1500,7 +1803,7 @@ |
| 2080 | hideously fast. */ | 2057 | hideously fast. */ |
| 2081 | 2058 | ||
| 2082 | static int | 2059 | static int |
| 2083 | -numcompare (const char *a, const char *b) | 2060 | -numcompare (const char *a, const char *b) |
| 2084 | +numcompare_uni (const char *a, const char *b) | 2061 | +numcompare_uni (const char *a, const char *b) |
| 2085 | { | 2062 | { |
| 2086 | while (blanks[to_uchar (*a)]) | 2063 | while (blanks[to_uchar (*a)]) |
| 2087 | a++; | 2064 | a++; |
| 2088 | @@ -1510,6 +1813,25 @@ | 2065 | @@ -1510,6 +1813,25 @@ |
| 2089 | : strnumcmp (a, b, decimal_point, thousands_sep)); | 2066 | : strnumcmp (a, b, decimal_point, thousands_sep)); |
| 2090 | } | 2067 | } |
| 2091 | 2068 | ||
| 2092 | +#if HAVE_MBRTOWC | 2069 | +#if HAVE_MBRTOWC |
| 2093 | +static int | 2070 | +static int |
| 2094 | +numcompare_mb (const char *a, const char *b) | 2071 | +numcompare_mb (const char *a, const char *b) |
| … | … | ||
| 2161 | + | 2138 | + |
| 2162 | + for (i = 0; i < wclength; i++) | 2139 | + for (i = 0; i < wclength; i++) |
| 2163 | + { | 2140 | + { |
| 2164 | + month_wcs[i] = towupper(month_wcs[i]); | 2141 | + month_wcs[i] = towupper(month_wcs[i]); |
| 2165 | + if (iswblank (month_wcs[i])) | 2142 | + if (iswblank (month_wcs[i])) |
| 2166 | + { | 2143 | + { |
| 2167 | + month_wcs[i] = L'\0'; | 2144 | + month_wcs[i] = L'\0'; |
| 2168 | + break; | 2145 | + break; |
| 2169 | + } | 2146 | + } |
| 2170 | + } | 2147 | + } |
| 2171 | + | 2148 | + |
| 2172 | + wpp = (const wchar_t **)&month_wcs; | 2149 | + wpp = (const wchar_t **)&month_wcs; |
| 2173 | + | 2150 | + |
| 2174 | + mblength = wcsrtombs (month, wpp, len + 1, &state); | 2151 | + mblength = wcsrtombs (month, wpp, len + 1, &state); |
| … | … | ||
| 2177 | + do | 2154 | + do |
| 2178 | + { | 2155 | + { |
| 2179 | + int ix = (lo + hi) / 2; | 2156 | + int ix = (lo + hi) / 2; |
| 2180 | + | 2157 | + |
| 2181 | + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) | 2158 | + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) |
| 2182 | + hi = ix; | 2159 | + hi = ix; |
| 2183 | + else | 2160 | + else |
| 2184 | + lo = ix; | 2161 | + lo = ix; |
| 2185 | + } | 2162 | + } |
| 2186 | + while (hi - lo > 1); | 2163 | + while (hi - lo > 1); |
| 2187 | + | 2164 | + |
| 2188 | + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) | 2165 | + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) |
| 2189 | + ? monthtab[lo].val : 0); | 2166 | + ? monthtab[lo].val : 0); |
| … | … | ||
| 2238 | + | 2215 | + |
| 2239 | + /* Actually compare the fields. */ | 2216 | + /* Actually compare the fields. */ |
| 2240 | + if (key->random) | 2217 | + if (key->random) |
| 2241 | + diff = compare_random (texta, lena, textb, lenb); | 2218 | + diff = compare_random (texta, lena, textb, lenb); |
| 2242 | + else if (key->numeric | key->general_numeric | key->human_numeric) | 2219 | + else if (key->numeric | key->general_numeric | key->human_numeric) |
| 2243 | + { | 2220 | + { |
| 2244 | + char savea = *lima, saveb = *limb; | 2221 | + char savea = *lima, saveb = *limb; |
| 2245 | + | 2222 | + |
| 2246 | + *lima = *limb = '\0'; | 2223 | + *lima = *limb = '\0'; |
| 2247 | + diff = (key->numeric ? numcompare (texta, textb) | 2224 | + diff = (key->numeric ? numcompare (texta, textb) |
| 2248 | + : key->general_numeric ? general_numcompare (texta, textb) | 2225 | + : key->general_numeric ? general_numcompare (texta, textb) |
| 2249 | + : human_numcompare (texta, textb, key)); | 2226 | + : human_numcompare (texta, textb, key)); |
| 2250 | + *lima = savea, *limb = saveb; | 2227 | + *lima = savea, *limb = saveb; |
| 2251 | + } | 2228 | + } |
| 2252 | + else if (key->version) | 2229 | + else if (key->version) |
| 2253 | + diff = compare_version (texta, lena, textb, lenb); | 2230 | + diff = compare_version (texta, lena, textb, lenb); |
| 2254 | + else if (key->month) | 2231 | + else if (key->month) |
| 2255 | + diff = getmonth (texta, lena) - getmonth (textb, lenb); | 2232 | + diff = getmonth (texta, lena) - getmonth (textb, lenb); |
| 2256 | + else | 2233 | + else |
| 2257 | + { | 2234 | + { |
| 2258 | + if (ignore || translate) | 2235 | + if (ignore || translate) |
| 2259 | + { | 2236 | + { |
| 2260 | + char *copy_a = (char *) alloca (lena + 1 + lenb + 1); | 2237 | + char *copy_a = (char *) alloca (lena + 1 + lenb + 1); |
| 2261 | + char *copy_b = copy_a + lena + 1; | 2238 | + char *copy_b = copy_a + lena + 1; |
| 2262 | + size_t new_len_a, new_len_b; | 2239 | + size_t new_len_a, new_len_b; |
| 2263 | + size_t i, j; | 2240 | + size_t i, j; |
| 2264 | + | 2241 | + |
| 2265 | + /* Ignore and/or translate chars before comparing. */ | 2242 | + /* Ignore and/or translate chars before comparing. */ |
| 2266 | +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ | 2243 | +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ |
| 2267 | + do \ | 2244 | + do \ |
| 2268 | + { \ | 2245 | + { \ |
| 2269 | + wchar_t uwc; \ | 2246 | + wchar_t uwc; \ |
| 2270 | + char mbc[MB_LEN_MAX]; \ | 2247 | + char mbc[MB_LEN_MAX]; \ |
| 2271 | + mbstate_t state_wc; \ | 2248 | + mbstate_t state_wc; \ |
| 2272 | + \ | 2249 | + \ |
| 2273 | + for (NEW_LEN = i = 0; i < LEN;) \ | 2250 | + for (NEW_LEN = i = 0; i < LEN;) \ |
| 2274 | + { \ | 2251 | + { \ |
| 2275 | + mbstate_t state_bak; \ | 2252 | + mbstate_t state_bak; \ |
| 2276 | + \ | 2253 | + \ |
| 2277 | + state_bak = STATE; \ | 2254 | + state_bak = STATE; \ |
| 2278 | + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ | 2255 | + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ |
| 2279 | + \ | 2256 | + \ |
| 2280 | + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ | 2257 | + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ |
| 2281 | + || MBLENGTH == 0) \ | 2258 | + || MBLENGTH == 0) \ |
| 2282 | + { \ | 2259 | + { \ |
| 2283 | + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ | 2260 | + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ |
| 2284 | + STATE = state_bak; \ | 2261 | + STATE = state_bak; \ |
| 2285 | + if (!ignore) \ | 2262 | + if (!ignore) \ |
| 2286 | + COPY[NEW_LEN++] = TEXT[i++]; \ | 2263 | + COPY[NEW_LEN++] = TEXT[i++]; \ |
| 2287 | + continue; \ | 2264 | + continue; \ |
| 2288 | + } \ | 2265 | + } \ |
| 2289 | + \ | 2266 | + \ |
| 2290 | + if (ignore) \ | 2267 | + if (ignore) \ |
| 2291 | + { \ | 2268 | + { \ |
| 2292 | + if ((ignore == nonprinting && !iswprint (WC)) \ | 2269 | + if ((ignore == nonprinting && !iswprint (WC)) \ |
| 2293 | + || (ignore == nondictionary \ | 2270 | + || (ignore == nondictionary \ |
| 2294 | + && !iswalnum (WC) && !iswblank (WC))) \ | 2271 | + && !iswalnum (WC) && !iswblank (WC))) \ |
| 2295 | + { \ | 2272 | + { \ |
| 2296 | + i += MBLENGTH; \ | 2273 | + i += MBLENGTH; \ |
| 2297 | + continue; \ | 2274 | + continue; \ |
| 2298 | + } \ | 2275 | + } \ |
| 2299 | + } \ | 2276 | + } \ |
| 2300 | + \ | 2277 | + \ |
| 2301 | + if (translate) \ | 2278 | + if (translate) \ |
| 2302 | + { \ | 2279 | + { \ |
| 2303 | + \ | 2280 | + \ |
| 2304 | + uwc = towupper(WC); \ | 2281 | + uwc = towupper(WC); \ |
| 2305 | + if (WC == uwc) \ | 2282 | + if (WC == uwc) \ |
| 2306 | + { \ | 2283 | + { \ |
| 2307 | + memcpy (mbc, TEXT + i, MBLENGTH); \ | 2284 | + memcpy (mbc, TEXT + i, MBLENGTH); \ |
| 2308 | + i += MBLENGTH; \ | 2285 | + i += MBLENGTH; \ |
| 2309 | + } \ | 2286 | + } \ |
| 2310 | + else \ | 2287 | + else \ |
| 2311 | + { \ | 2288 | + { \ |
| 2312 | + i += MBLENGTH; \ | 2289 | + i += MBLENGTH; \ |
| 2313 | + WC = uwc; \ | 2290 | + WC = uwc; \ |
| 2314 | + memset (&state_wc, '\0', sizeof (mbstate_t)); \ | 2291 | + memset (&state_wc, '\0', sizeof (mbstate_t)); \ |
| 2315 | + \ | 2292 | + \ |
| 2316 | + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ | 2293 | + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ |
| 2317 | + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ | 2294 | + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ |
| 2318 | + } \ | 2295 | + } \ |
| 2319 | + \ | 2296 | + \ |
| 2320 | + for (j = 0; j < MBLENGTH; j++) \ | 2297 | + for (j = 0; j < MBLENGTH; j++) \ |
| 2321 | + COPY[NEW_LEN++] = mbc[j]; \ | 2298 | + COPY[NEW_LEN++] = mbc[j]; \ |
| 2322 | + } \ | 2299 | + } \ |
| 2323 | + else \ | 2300 | + else \ |
| 2324 | + for (j = 0; j < MBLENGTH; j++) \ | 2301 | + for (j = 0; j < MBLENGTH; j++) \ |
| 2325 | + COPY[NEW_LEN++] = TEXT[i++]; \ | 2302 | + COPY[NEW_LEN++] = TEXT[i++]; \ |
| 2326 | + } \ | 2303 | + } \ |
| 2327 | + COPY[NEW_LEN] = '\0'; \ | 2304 | + COPY[NEW_LEN] = '\0'; \ |
| 2328 | + } \ | 2305 | + } \ |
| 2329 | + while (0) | 2306 | + while (0) |
| 2330 | + IGNORE_CHARS (new_len_a, lena, texta, copy_a, | 2307 | + IGNORE_CHARS (new_len_a, lena, texta, copy_a, |
| 2331 | + wc_a, mblength_a, state_a); | 2308 | + wc_a, mblength_a, state_a); |
| 2332 | + IGNORE_CHARS (new_len_b, lenb, textb, copy_b, | 2309 | + IGNORE_CHARS (new_len_b, lenb, textb, copy_b, |
| 2333 | + wc_b, mblength_b, state_b); | 2310 | + wc_b, mblength_b, state_b); |
| 2334 | + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); | 2311 | + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); |
| 2335 | + } | 2312 | + } |
| 2336 | + else if (lena == 0) | 2313 | + else if (lena == 0) |
| 2337 | + diff = - NONZERO (lenb); | 2314 | + diff = - NONZERO (lenb); |
| 2338 | + else if (lenb == 0) | 2315 | + else if (lenb == 0) |
| 2339 | + goto greater; | 2316 | + goto greater; |
| 2340 | + else | 2317 | + else |
| 2341 | + diff = xmemcoll (texta, lena, textb, lenb); | 2318 | + diff = xmemcoll (texta, lena, textb, lenb); |
| 2342 | + } | 2319 | + } |
| 2343 | + | 2320 | + |
| 2344 | + if (diff) | 2321 | + if (diff) |
| 2345 | + goto not_equal; | 2322 | + goto not_equal; |
| 2346 | + | 2323 | + |
| 2347 | + key = key->next; | 2324 | + key = key->next; |
| 2348 | + if (! key) | 2325 | + if (! key) |
| 2349 | + break; | 2326 | + break; |
| 2350 | + | 2327 | + |
| 2351 | + /* Find the beginning and limit of the next field. */ | 2328 | + /* Find the beginning and limit of the next field. */ |
| 2352 | + if (key->eword != -1) | 2329 | + if (key->eword != -1) |
| 2353 | + lima = limfield (a, key), limb = limfield (b, key); | 2330 | + lima = limfield (a, key), limb = limfield (b, key); |
| 2354 | + else | 2331 | + else |
| 2355 | + lima = a->text + a->length - 1, limb = b->text + b->length - 1; | 2332 | + lima = a->text + a->length - 1, limb = b->text + b->length - 1; |
| 2356 | + | 2333 | + |
| 2357 | + if (key->sword != -1) | 2334 | + if (key->sword != -1) |
| 2358 | + texta = begfield (a, key), textb = begfield (b, key); | 2335 | + texta = begfield (a, key), textb = begfield (b, key); |
| 2359 | + else | 2336 | + else |
| 2360 | + { | 2337 | + { |
| 2361 | + texta = a->text, textb = b->text; | 2338 | + texta = a->text, textb = b->text; |
| 2362 | + if (key->skipsblanks) | 2339 | + if (key->skipsblanks) |
| 2363 | + { | 2340 | + { |
| 2364 | + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) | 2341 | + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) |
| 2365 | + texta += mblength_a; | 2342 | + texta += mblength_a; |
| 2366 | + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) | 2343 | + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) |
| 2367 | + textb += mblength_b; | 2344 | + textb += mblength_b; |
| 2368 | + } | 2345 | + } |
| 2369 | + } | 2346 | + } |
| 2370 | + } | 2347 | + } |
| 2371 | + | 2348 | + |
| 2372 | + return 0; | 2349 | + return 0; |
| 2373 | + | 2350 | + |
| 2374 | +greater: | 2351 | +greater: |
| … | … | ||
| 2418 | have_read_stdin = false; | 2395 | have_read_stdin = false; |
| 2419 | inittables (); | 2396 | inittables (); |
| 2420 | 2397 | ||
| 2421 | @@ -3015,13 +3599,35 @@ | 2398 | @@ -3015,13 +3599,35 @@ |
| 2422 | 2399 | ||
| 2423 | case 't': | 2400 | case 't': |
| 2424 | { | 2401 | { |
| 2425 | - char newtab = optarg[0]; | 2402 | - char newtab = optarg[0]; |
| 2426 | - if (! newtab) | 2403 | - if (! newtab) |
| 2427 | + char newtab[MB_LEN_MAX + 1]; | 2404 | + char newtab[MB_LEN_MAX + 1]; |
| 2428 | + size_t newtab_length = 1; | 2405 | + size_t newtab_length = 1; |
| 2429 | + strncpy (newtab, optarg, MB_LEN_MAX); | 2406 | + strncpy (newtab, optarg, MB_LEN_MAX); |
| 2430 | + if (! newtab[0]) | 2407 | + if (! newtab[0]) |
| 2431 | error (SORT_FAILURE, 0, _("empty tab")); | 2408 | error (SORT_FAILURE, 0, _("empty tab")); |
| 2432 | - if (optarg[1]) | 2409 | - if (optarg[1]) |
| 2433 | +#if HAVE_MBRTOWC | 2410 | +#if HAVE_MBRTOWC |
| 2434 | + if (MB_CUR_MAX > 1) | 2411 | + if (MB_CUR_MAX > 1) |
| 2435 | + { | 2412 | + { |
| 2436 | + wchar_t wc; | 2413 | + wchar_t wc; |
| 2437 | + mbstate_t state; | 2414 | + mbstate_t state; |
| 2438 | + size_t i; | 2415 | + size_t i; |
| 2439 | + | 2416 | + |
| 2440 | + memset (&state, '\0', sizeof (mbstate_t)); | 2417 | + memset (&state, '\0', sizeof (mbstate_t)); |
| 2441 | + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, | 2418 | + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, |
| 2442 | + MB_LEN_MAX), | 2419 | + MB_LEN_MAX), |
| 2443 | + &state); | 2420 | + &state); |
| 2444 | + switch (newtab_length) | 2421 | + switch (newtab_length) |
| 2445 | + { | 2422 | + { |
| 2446 | + case (size_t) -1: | 2423 | + case (size_t) -1: |
| 2447 | + case (size_t) -2: | 2424 | + case (size_t) -2: |
| 2448 | + case 0: | 2425 | + case 0: |
| 2449 | + newtab_length = 1; | 2426 | + newtab_length = 1; |
| 2450 | + } | 2427 | + } |
| 2451 | + } | 2428 | + } |
| 2452 | +#endif | 2429 | +#endif |
| 2453 | + if (newtab_length == 1 && optarg[1]) | 2430 | + if (newtab_length == 1 && optarg[1]) |
| 2454 | { | 2431 | { |
| 2455 | if (STREQ (optarg, "\\0")) | 2432 | if (STREQ (optarg, "\\0")) |
| 2456 | - newtab = '\0'; | 2433 | - newtab = '\0'; |
| 2457 | + newtab[0] = '\0'; | 2434 | + newtab[0] = '\0'; |
| 2458 | else | 2435 | else |
| 2459 | { | 2436 | { |
| 2460 | /* Provoke with `sort -txx'. Complain about | 2437 | /* Provoke with `sort -txx'. Complain about |
| 2461 | @@ -3032,9 +3638,12 @@ | 2438 | @@ -3032,9 +3638,12 @@ |
| 2462 | quote (optarg)); | 2439 | quote (optarg)); |
| 2463 | } | 2440 | } |
| 2464 | } | 2441 | } |
| 2465 | - if (tab != TAB_DEFAULT && tab != newtab) | 2442 | - if (tab != TAB_DEFAULT && tab != newtab) |
| 2466 | + if (tab_length | 2443 | + if (tab_length |
| 2467 | + && (tab_length != newtab_length | 2444 | + && (tab_length != newtab_length |
| 2468 | + || memcmp (tab, newtab, tab_length) != 0)) | 2445 | + || memcmp (tab, newtab, tab_length) != 0)) |
| 2469 | error (SORT_FAILURE, 0, _("incompatible tabs")); | 2446 | error (SORT_FAILURE, 0, _("incompatible tabs")); |
| 2470 | - tab = newtab; | 2447 | - tab = newtab; |
| 2471 | + memcpy (tab, newtab, newtab_length); | 2448 | + memcpy (tab, newtab, newtab_length); |
| 2472 | + tab_length = newtab_length; | 2449 | + tab_length = newtab_length; |
| 2473 | } | 2450 | } |
| 2474 | break; | 2451 | break; |
| 2475 | 2452 | ||
| 2476 | --- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000 | 2453 | --- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000 |
| 2477 | +++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000 | 2454 | +++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000 |
| 2478 | @@ -39,11 +39,28 @@ | 2455 | @@ -39,11 +39,28 @@ |
| 2479 | #include <stdio.h> | 2456 | #include <stdio.h> |
| … | … | ||
| 2805 | static bool print_stored (COLUMN *p); | 2782 | static bool print_stored (COLUMN *p); |
| 2806 | @@ -426,6 +477,7 @@ | 2783 | @@ -426,6 +477,7 @@ |
| 2807 | static void pad_across_to (int position); | 2784 | static void pad_across_to (int position); |
| 2808 | static void add_line_number (COLUMN *p); | 2785 | static void add_line_number (COLUMN *p); |
| 2809 | static void getoptarg (char *arg, char switch_char, char *character, | 2786 | static void getoptarg (char *arg, char switch_char, char *character, |
| 2810 | + int *character_length, int *character_width, | 2787 | + int *character_length, int *character_width, |
| 2811 | int *number); | 2788 | int *number); |
| 2812 | void usage (int status); | 2789 | void usage (int status); |
| 2813 | static void print_files (int number_of_files, char **av); | 2790 | static void print_files (int number_of_files, char **av); |
| 2814 | @@ -440,7 +492,6 @@ | 2791 | @@ -440,7 +492,6 @@ |
| 2815 | static void pad_down (int lines); | 2792 | static void pad_down (int lines); |
| 2816 | static void read_rest_of_line (COLUMN *p); | 2793 | static void read_rest_of_line (COLUMN *p); |
| … | … | ||
| 2905 | + char_to_clump = char_to_clump_single; | 2882 | + char_to_clump = char_to_clump_single; |
| 2906 | + } | 2883 | + } |
| 2907 | + | 2884 | + |
| 2908 | n_files = 0; | 2885 | n_files = 0; |
| 2909 | file_names = (argc > 1 | 2886 | file_names = (argc > 1 |
| 2910 | ? xmalloc ((argc - 1) * sizeof (char *)) | 2887 | ? xmalloc ((argc - 1) * sizeof (char *)) |
| 2911 | @@ -949,8 +1032,12 @@ | 2888 | @@ -949,8 +1032,12 @@ |
| 2912 | break; | 2889 | break; |
| 2913 | case 'e': | 2890 | case 'e': |
| 2914 | if (optarg) | 2891 | if (optarg) |
| 2915 | - getoptarg (optarg, 'e', &input_tab_char, | 2892 | - getoptarg (optarg, 'e', &input_tab_char, |
| 2916 | - &chars_per_input_tab); | 2893 | - &chars_per_input_tab); |
| 2917 | + { | 2894 | + { |
| 2918 | + int dummy_length, dummy_width; | 2895 | + int dummy_length, dummy_width; |
| 2919 | + | 2896 | + |
| 2920 | + getoptarg (optarg, 'e', input_tab_char, &dummy_length, | 2897 | + getoptarg (optarg, 'e', input_tab_char, &dummy_length, |
| 2921 | + &dummy_width, &chars_per_input_tab); | 2898 | + &dummy_width, &chars_per_input_tab); |
| 2922 | + } | 2899 | + } |
| 2923 | /* Could check tab width > 0. */ | 2900 | /* Could check tab width > 0. */ |
| 2924 | untabify_input = true; | 2901 | untabify_input = true; |
| 2925 | break; | 2902 | break; |
| 2926 | @@ -963,8 +1050,12 @@ | 2903 | @@ -963,8 +1050,12 @@ |
| 2927 | break; | 2904 | break; |
| 2928 | case 'i': | 2905 | case 'i': |
| 2929 | if (optarg) | 2906 | if (optarg) |
| 2930 | - getoptarg (optarg, 'i', &output_tab_char, | 2907 | - getoptarg (optarg, 'i', &output_tab_char, |
| 2931 | - &chars_per_output_tab); | 2908 | - &chars_per_output_tab); |
| 2932 | + { | 2909 | + { |
| 2933 | + int dummy_width; | 2910 | + int dummy_width; |
| 2934 | + | 2911 | + |
| 2935 | + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, | 2912 | + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, |
| 2936 | + &dummy_width, &chars_per_output_tab); | 2913 | + &dummy_width, &chars_per_output_tab); |
| 2937 | + } | 2914 | + } |
| 2938 | /* Could check tab width > 0. */ | 2915 | /* Could check tab width > 0. */ |
| 2939 | tabify_output = true; | 2916 | tabify_output = true; |
| 2940 | break; | 2917 | break; |
| 2941 | @@ -991,8 +1082,8 @@ | 2918 | @@ -991,8 +1082,8 @@ |
| 2942 | case 'n': | 2919 | case 'n': |
| 2943 | numbered_lines = true; | 2920 | numbered_lines = true; |
| 2944 | if (optarg) | 2921 | if (optarg) |
| 2945 | - getoptarg (optarg, 'n', &number_separator, | 2922 | - getoptarg (optarg, 'n', &number_separator, |
| 2946 | - &chars_per_number); | 2923 | - &chars_per_number); |
| 2947 | + getoptarg (optarg, 'n', number_separator, &number_separator_length, | 2924 | + getoptarg (optarg, 'n', number_separator, &number_separator_length, |
| 2948 | + &number_separator_width, &chars_per_number); | 2925 | + &number_separator_width, &chars_per_number); |
| 2949 | break; | 2926 | break; |
| 2950 | case 'N': | 2927 | case 'N': |
| 2951 | skip_count = false; | 2928 | skip_count = false; |
| 2952 | @@ -1031,7 +1122,7 @@ | 2929 | @@ -1031,7 +1122,7 @@ |
| 2953 | old_s = false; | 2930 | old_s = false; |
| 2954 | /* Reset an additional input of -s, -S dominates -s */ | 2931 | /* Reset an additional input of -s, -S dominates -s */ |
| 2955 | col_sep_string = bad_cast (""); | 2932 | col_sep_string = bad_cast (""); |
| 2956 | - col_sep_length = 0; | 2933 | - col_sep_length = 0; |
| 2957 | + col_sep_length = col_sep_width = 0; | 2934 | + col_sep_length = col_sep_width = 0; |
| 2958 | use_col_separator = true; | 2935 | use_col_separator = true; |
| 2959 | if (optarg) | 2936 | if (optarg) |
| 2960 | separator_string (optarg); | 2937 | separator_string (optarg); |
| 2961 | @@ -1188,10 +1279,45 @@ | 2938 | @@ -1188,10 +1279,45 @@ |
| 2962 | a number. */ | 2939 | a number. */ |
| 2963 | 2940 | ||
| 2964 | static void | 2941 | static void |
| 2965 | -getoptarg (char *arg, char switch_char, char *character, int *number) | 2942 | -getoptarg (char *arg, char switch_char, char *character, int *number) |
| 2966 | +getoptarg (char *arg, char switch_char, char *character, int *character_length, | 2943 | +getoptarg (char *arg, char switch_char, char *character, int *character_length, |
| 2967 | + int *character_width, int *number) | 2944 | + int *character_width, int *number) |
| 2968 | { | 2945 | { |
| 2969 | if (!ISDIGIT (*arg)) | 2946 | if (!ISDIGIT (*arg)) |
| 2970 | - *character = *arg++; | 2947 | - *character = *arg++; |
| 2971 | + { | 2948 | + { |
| 2972 | +#ifdef HAVE_MBRTOWC | 2949 | +#ifdef HAVE_MBRTOWC |
| 2973 | + if (MB_CUR_MAX > 1) /* for multibyte locale. */ | 2950 | + if (MB_CUR_MAX > 1) /* for multibyte locale. */ |
| 2974 | + { | 2951 | + { |
| 2975 | + wchar_t wc; | 2952 | + wchar_t wc; |
| 2976 | + size_t mblength; | 2953 | + size_t mblength; |
| 2977 | + int width; | 2954 | + int width; |
| 2978 | + mbstate_t state = {'\0'}; | 2955 | + mbstate_t state = {'\0'}; |
| 2979 | + | 2956 | + |
| 2980 | + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); | 2957 | + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); |
| 2981 | + | 2958 | + |
| 2982 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) | 2959 | + if (mblength == (size_t)-1 || mblength == (size_t)-2) |
| 2983 | + { | 2960 | + { |
| 2984 | + *character_length = 1; | 2961 | + *character_length = 1; |
| 2985 | + *character_width = 1; | 2962 | + *character_width = 1; |
| 2986 | + } | 2963 | + } |
| 2987 | + else | 2964 | + else |
| 2988 | + { | 2965 | + { |
| 2989 | + *character_length = (mblength < 1) ? 1 : mblength; | 2966 | + *character_length = (mblength < 1) ? 1 : mblength; |
| 2990 | + width = wcwidth (wc); | 2967 | + width = wcwidth (wc); |
| 2991 | + *character_width = (width < 0) ? 0 : width; | 2968 | + *character_width = (width < 0) ? 0 : width; |
| 2992 | + } | 2969 | + } |
| 2993 | + | 2970 | + |
| 2994 | + strncpy (character, arg, *character_length); | 2971 | + strncpy (character, arg, *character_length); |
| 2995 | + arg += *character_length; | 2972 | + arg += *character_length; |
| 2996 | + } | 2973 | + } |
| 2997 | + else /* for single byte locale. */ | 2974 | + else /* for single byte locale. */ |
| 2998 | +#endif | 2975 | +#endif |
| 2999 | + { | 2976 | + { |
| 3000 | + *character = *arg++; | 2977 | + *character = *arg++; |
| 3001 | + *character_length = 1; | 2978 | + *character_length = 1; |
| 3002 | + *character_width = 1; | 2979 | + *character_width = 1; |
| 3003 | + } | 2980 | + } |
| 3004 | + } | 2981 | + } |
| 3005 | + | 2982 | + |
| 3006 | if (*arg) | 2983 | if (*arg) |
| 3007 | { | 2984 | { |
| 3008 | long int tmp_long; | 2985 | long int tmp_long; |
| 3009 | @@ -1256,7 +1382,7 @@ | 2986 | @@ -1256,7 +1382,7 @@ |
| 3010 | else | 2987 | else |
| 3011 | col_sep_string = column_separator; | 2988 | col_sep_string = column_separator; |
| 3012 | 2989 | ||
| 3013 | - col_sep_length = 1; | 2990 | - col_sep_length = 1; |
| 3014 | + col_sep_length = col_sep_width = 1; | 2991 | + col_sep_length = col_sep_width = 1; |
| 3015 | use_col_separator = true; | 2992 | use_col_separator = true; |
| 3016 | } | 2993 | } |
| 3017 | /* It's rather pointless to define a TAB separator with column | 2994 | /* It's rather pointless to define a TAB separator with column |
| 3018 | @@ -1288,11 +1414,11 @@ | 2995 | @@ -1288,11 +1414,11 @@ |
| 3019 | TAB_WIDTH (chars_per_input_tab, chars_per_number); */ | 2996 | TAB_WIDTH (chars_per_input_tab, chars_per_number); */ |
| 3020 | 2997 | ||
| 3021 | /* Estimate chars_per_text without any margin and keep it constant. */ | 2998 | /* Estimate chars_per_text without any margin and keep it constant. */ |
| 3022 | - if (number_separator == '\t') | 2999 | - if (number_separator == '\t') |
| 3023 | + if (number_separator[0] == '\t') | 3000 | + if (number_separator[0] == '\t') |
| 3024 | number_width = chars_per_number + | 3001 | number_width = chars_per_number + |
| 3025 | TAB_WIDTH (chars_per_default_tab, chars_per_number); | 3002 | TAB_WIDTH (chars_per_default_tab, chars_per_number); |
| 3026 | else | 3003 | else |
| 3027 | - number_width = chars_per_number + 1; | 3004 | - number_width = chars_per_number + 1; |
| 3028 | + number_width = chars_per_number + number_separator_width; | 3005 | + number_width = chars_per_number + number_separator_width; |
| 3029 | 3006 | ||
| 3030 | /* The number is part of the column width unless we are | 3007 | /* The number is part of the column width unless we are |
| 3031 | printing files in parallel. */ | 3008 | printing files in parallel. */ |
| 3032 | @@ -1307,7 +1433,7 @@ | 3009 | @@ -1307,7 +1433,7 @@ |
| 3033 | } | 3010 | } |
| 3034 | 3011 | ||
| 3035 | chars_per_column = (chars_per_line - chars_used_by_number - | 3012 | chars_per_column = (chars_per_line - chars_used_by_number - |
| 3036 | - (columns - 1) * col_sep_length) / columns; | 3013 | - (columns - 1) * col_sep_length) / columns; |
| 3037 | + (columns - 1) * col_sep_width) / columns; | 3014 | + (columns - 1) * col_sep_width) / columns; |
| 3038 | 3015 | ||
| 3039 | if (chars_per_column < 1) | 3016 | if (chars_per_column < 1) |
| 3040 | error (EXIT_FAILURE, 0, _("page width too narrow")); | 3017 | error (EXIT_FAILURE, 0, _("page width too narrow")); |
| 3041 | @@ -1432,7 +1558,7 @@ | 3018 | @@ -1432,7 +1558,7 @@ |
| 3042 | 3019 | ||
| … | … | ||
| 3046 | + h = h + col_sep_width; | 3023 | + h = h + col_sep_width; |
| 3047 | 3024 | ||
| 3048 | /* This loop takes care of all but the rightmost column. */ | 3025 | /* This loop takes care of all but the rightmost column. */ |
| 3049 | 3026 | ||
| 3050 | @@ -1466,7 +1592,7 @@ | 3027 | @@ -1466,7 +1592,7 @@ |
| 3051 | } | 3028 | } |
| 3052 | else | 3029 | else |
| 3053 | { | 3030 | { |
| 3054 | - h = h_next + col_sep_length; | 3031 | - h = h_next + col_sep_length; |
| 3055 | + h = h_next + col_sep_width; | 3032 | + h = h_next + col_sep_width; |
| 3056 | h_next = h + chars_per_column; | 3033 | h_next = h + chars_per_column; |
| 3057 | } | 3034 | } |
| 3058 | } | 3035 | } |
| 3059 | @@ -1756,9 +1882,9 @@ | 3036 | @@ -1756,9 +1882,9 @@ |
| 3060 | align_column (COLUMN *p) | 3037 | align_column (COLUMN *p) |
| 3061 | { | 3038 | { |
| 3062 | padding_not_printed = p->start_position; | 3039 | padding_not_printed = p->start_position; |
| … | … | ||
| 3084 | char *s; | 3061 | char *s; |
| 3085 | int left_cut; | 3062 | int left_cut; |
| 3086 | 3063 | ||
| 3087 | @@ -2058,22 +2184,24 @@ | 3064 | @@ -2058,22 +2184,24 @@ |
| 3088 | /* Tabification is assumed for multiple columns, also for n-separators, | 3065 | /* Tabification is assumed for multiple columns, also for n-separators, |
| 3089 | but `default n-separator = TAB' hasn't been given priority over | 3066 | but `default n-separator = TAB' hasn't been given priority over |
| 3090 | equal column_width also specified by POSIX. */ | 3067 | equal column_width also specified by POSIX. */ |
| 3091 | - if (number_separator == '\t') | 3068 | - if (number_separator == '\t') |
| 3092 | + if (number_separator[0] == '\t') | 3069 | + if (number_separator[0] == '\t') |
| 3093 | { | 3070 | { |
| 3094 | i = number_width - chars_per_number; | 3071 | i = number_width - chars_per_number; |
| 3095 | while (i-- > 0) | 3072 | while (i-- > 0) |
| 3096 | (p->char_func) (' '); | 3073 | (p->char_func) (' '); |
| 3097 | } | 3074 | } |
| 3098 | else | 3075 | else |
| 3099 | - (p->char_func) (number_separator); | 3076 | - (p->char_func) (number_separator); |
| 3100 | + for (j = 0; j < number_separator_length; j++) | 3077 | + for (j = 0; j < number_separator_length; j++) |
| 3101 | + (p->char_func) (number_separator[j]); | 3078 | + (p->char_func) (number_separator[j]); |
| 3102 | } | 3079 | } |
| 3103 | else | 3080 | else |
| 3104 | /* To comply with POSIX, we avoid any expansion of default TAB | 3081 | /* To comply with POSIX, we avoid any expansion of default TAB |
| 3105 | separator with a single column output. No column_width requirement | 3082 | separator with a single column output. No column_width requirement |
| 3106 | has to be considered. */ | 3083 | has to be considered. */ |
| 3107 | { | 3084 | { |
| 3108 | - (p->char_func) (number_separator); | 3085 | - (p->char_func) (number_separator); |
| 3109 | - if (number_separator == '\t') | 3086 | - if (number_separator == '\t') |
| 3110 | + for (j = 0; j < number_separator_length; j++) | 3087 | + for (j = 0; j < number_separator_length; j++) |
| 3111 | + (p->char_func) (number_separator[j]); | 3088 | + (p->char_func) (number_separator[j]); |
| 3112 | + if (number_separator[0] == '\t') | 3089 | + if (number_separator[0] == '\t') |
| 3113 | output_position = POS_AFTER_TAB (chars_per_output_tab, | 3090 | output_position = POS_AFTER_TAB (chars_per_output_tab, |
| 3114 | output_position); | 3091 | output_position); |
| 3115 | } | 3092 | } |
| 3116 | @@ -2234,7 +2362,7 @@ | 3093 | @@ -2234,7 +2362,7 @@ |
| 3117 | while (goal - h_old > 1 | 3094 | while (goal - h_old > 1 |
| 3118 | && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) | 3095 | && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) |
| 3119 | { | 3096 | { |
| 3120 | - putchar (output_tab_char); | 3097 | - putchar (output_tab_char); |
| 3121 | + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); | 3098 | + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); |
| 3122 | h_old = h_new; | 3099 | h_old = h_new; |
| 3123 | } | 3100 | } |
| … | … | ||
| 3131 | s = col_sep_string; | 3108 | s = col_sep_string; |
| 3132 | 3109 | ||
| 3133 | @@ -2267,6 +2396,7 @@ | 3110 | @@ -2267,6 +2396,7 @@ |
| 3134 | { | 3111 | { |
| 3135 | for (; separators_not_printed > 0; --separators_not_printed) | 3112 | for (; separators_not_printed > 0; --separators_not_printed) |
| 3136 | { | 3113 | { |
| 3137 | + not_space_flag = 0; | 3114 | + not_space_flag = 0; |
| 3138 | while (l-- > 0) | 3115 | while (l-- > 0) |
| 3139 | { | 3116 | { |
| 3140 | /* 3 types of sep_strings: spaces only, spaces and chars, | 3117 | /* 3 types of sep_strings: spaces only, spaces and chars, |
| 3141 | @@ -2280,12 +2410,15 @@ | 3118 | @@ -2280,12 +2410,15 @@ |
| 3142 | } | 3119 | } |
| 3143 | else | 3120 | else |
| 3144 | { | 3121 | { |
| 3145 | + not_space_flag = 1; | 3122 | + not_space_flag = 1; |
| 3146 | if (spaces_not_printed > 0) | 3123 | if (spaces_not_printed > 0) |
| 3147 | print_white_space (); | 3124 | print_white_space (); |
| 3148 | putchar (*s++); | 3125 | putchar (*s++); |
| 3149 | - ++output_position; | 3126 | - ++output_position; |
| 3150 | } | 3127 | } |
| 3151 | } | 3128 | } |
| 3152 | + if (not_space_flag) | 3129 | + if (not_space_flag) |
| 3153 | + output_position += col_sep_width; | 3130 | + output_position += col_sep_width; |
| 3154 | + | 3131 | + |
| 3155 | /* sep_string ends with some spaces */ | 3132 | /* sep_string ends with some spaces */ |
| 3156 | if (spaces_not_printed > 0) | 3133 | if (spaces_not_printed > 0) |
| 3157 | print_white_space (); | 3134 | print_white_space (); |
| 3158 | @@ -2313,7 +2446,7 @@ | 3135 | @@ -2313,7 +2446,7 @@ |
| 3159 | required number of tabs and spaces. */ | 3136 | required number of tabs and spaces. */ |
| 3160 | 3137 | ||
| 3161 | static void | 3138 | static void |
| 3162 | -print_char (char c) | 3139 | -print_char (char c) |
| … | … | ||
| 3185 | + state_bak = state; | 3162 | + state_bak = state; |
| 3186 | + mbc[mbc_pos++] = c; | 3163 | + mbc[mbc_pos++] = c; |
| 3187 | + mblength = mbrtowc (&wc, mbc, mbc_pos, &state); | 3164 | + mblength = mbrtowc (&wc, mbc, mbc_pos, &state); |
| 3188 | + | 3165 | + |
| 3189 | + while (mbc_pos > 0) | 3166 | + while (mbc_pos > 0) |
| 3190 | + { | 3167 | + { |
| 3191 | + switch (mblength) | 3168 | + switch (mblength) |
| 3192 | + { | 3169 | + { |
| 3193 | + case (size_t)-2: | 3170 | + case (size_t)-2: |
| 3194 | + state = state_bak; | 3171 | + state = state_bak; |
| 3195 | + return; | 3172 | + return; |
| 3196 | + | 3173 | + |
| 3197 | + case (size_t)-1: | 3174 | + case (size_t)-1: |
| 3198 | + state = state_bak; | 3175 | + state = state_bak; |
| 3199 | + ++output_position; | 3176 | + ++output_position; |
| 3200 | + putchar (mbc[0]); | 3177 | + putchar (mbc[0]); |
| 3201 | + memmove (mbc, mbc + 1, MB_CUR_MAX - 1); | 3178 | + memmove (mbc, mbc + 1, MB_CUR_MAX - 1); |
| 3202 | + --mbc_pos; | 3179 | + --mbc_pos; |
| 3203 | + break; | 3180 | + break; |
| 3204 | + | 3181 | + |
| 3205 | + case 0: | 3182 | + case 0: |
| 3206 | + mblength = 1; | 3183 | + mblength = 1; |
| 3207 | + | 3184 | + |
| 3208 | + default: | 3185 | + default: |
| 3209 | + if (wc == L' ') | 3186 | + if (wc == L' ') |
| 3210 | + { | 3187 | + { |
| 3211 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); | 3188 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); |
| 3212 | + --mbc_pos; | 3189 | + --mbc_pos; |
| 3213 | + ++spaces_not_printed; | 3190 | + ++spaces_not_printed; |
| 3214 | + return; | 3191 | + return; |
| 3215 | + } | 3192 | + } |
| 3216 | + else if (spaces_not_printed > 0) | 3193 | + else if (spaces_not_printed > 0) |
| 3217 | + print_white_space (); | 3194 | + print_white_space (); |
| 3218 | + | 3195 | + |
| 3219 | + /* Nonprintables are assumed to have width 0, except L'\b'. */ | 3196 | + /* Nonprintables are assumed to have width 0, except L'\b'. */ |
| 3220 | + if ((width = wcwidth (wc)) < 1) | 3197 | + if ((width = wcwidth (wc)) < 1) |
| 3221 | + { | 3198 | + { |
| 3222 | + if (wc == L'\b') | 3199 | + if (wc == L'\b') |
| 3223 | + --output_position; | 3200 | + --output_position; |
| 3224 | + } | 3201 | + } |
| 3225 | + else | 3202 | + else |
| 3226 | + output_position += width; | 3203 | + output_position += width; |
| 3227 | + | 3204 | + |
| 3228 | + fwrite (mbc, sizeof(char), mblength, stdout); | 3205 | + fwrite (mbc, sizeof(char), mblength, stdout); |
| 3229 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); | 3206 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); |
| 3230 | + mbc_pos -= mblength; | 3207 | + mbc_pos -= mblength; |
| 3231 | + } | 3208 | + } |
| 3232 | + } | 3209 | + } |
| 3233 | + return; | 3210 | + return; |
| 3234 | + } | 3211 | + } |
| 3235 | + putchar (c); | 3212 | + putchar (c); |
| 3236 | +} | 3213 | +} |
| 3237 | +#endif | 3214 | +#endif |
| 3238 | + | 3215 | + |
| 3239 | /* Skip to page PAGE before printing. | 3216 | /* Skip to page PAGE before printing. |
| 3240 | PAGE may be larger than total number of pages. */ | 3217 | PAGE may be larger than total number of pages. */ |
| 3241 | 3218 | ||
| 3242 | @@ -2517,9 +2718,9 @@ | 3219 | @@ -2517,9 +2718,9 @@ |
| 3243 | align_empty_cols = false; | 3220 | align_empty_cols = false; |
| 3244 | } | 3221 | } |
| 3245 | 3222 | ||
| 3246 | - if (padding_not_printed - col_sep_length > 0) | 3223 | - if (padding_not_printed - col_sep_length > 0) |
| 3247 | + if (padding_not_printed - col_sep_width > 0) | 3224 | + if (padding_not_printed - col_sep_width > 0) |
| 3248 | { | 3225 | { |
| 3249 | - pad_across_to (padding_not_printed - col_sep_length); | 3226 | - pad_across_to (padding_not_printed - col_sep_length); |
| 3250 | + pad_across_to (padding_not_printed - col_sep_width); | 3227 | + pad_across_to (padding_not_printed - col_sep_width); |
| 3251 | padding_not_printed = ANYWHERE; | 3228 | padding_not_printed = ANYWHERE; |
| 3252 | } | 3229 | } |
| 3253 | 3230 | ||
| 3254 | @@ -2620,9 +2821,9 @@ | 3231 | @@ -2620,9 +2821,9 @@ |
| 3255 | } | 3232 | } |
| 3256 | } | 3233 | } |
| 3257 | 3234 | ||
| 3258 | - if (padding_not_printed - col_sep_length > 0) | 3235 | - if (padding_not_printed - col_sep_length > 0) |
| 3259 | + if (padding_not_printed - col_sep_width > 0) | 3236 | + if (padding_not_printed - col_sep_width > 0) |
| 3260 | { | 3237 | { |
| … | … | ||
| 3266 | @@ -2635,8 +2836,8 @@ | 3243 | @@ -2635,8 +2836,8 @@ |
| 3267 | if (spaces_not_printed == 0) | 3244 | if (spaces_not_printed == 0) |
| 3268 | { | 3245 | { |
| 3269 | output_position = p->start_position + end_vector[line]; | 3246 | output_position = p->start_position + end_vector[line]; |
| 3270 | - if (p->start_position - col_sep_length == chars_per_margin) | 3247 | - if (p->start_position - col_sep_length == chars_per_margin) |
| 3271 | - output_position -= col_sep_length; | 3248 | - output_position -= col_sep_length; |
| 3272 | + if (p->start_position - col_sep_width == chars_per_margin) | 3249 | + if (p->start_position - col_sep_width == chars_per_margin) |
| 3273 | + output_position -= col_sep_width; | 3250 | + output_position -= col_sep_width; |
| 3274 | } | 3251 | } |
| 3275 | 3252 | ||
| 3276 | return true; | 3253 | return true; |
| 3277 | @@ -2655,7 +2856,7 @@ | 3254 | @@ -2655,7 +2856,7 @@ |
| 3278 | number of characters is 1.) */ | 3255 | number of characters is 1.) */ |
| … | … | ||
| 3325 | + width = 0; | 3302 | + width = 0; |
| 3326 | + chars = 0; | 3303 | + chars = 0; |
| 3327 | + while (mbc_pos > 0) | 3304 | + while (mbc_pos > 0) |
| 3328 | + { | 3305 | + { |
| 3329 | + switch (mblength) | 3306 | + switch (mblength) |
| 3330 | + { | 3307 | + { |
| 3331 | + case (size_t)-2: | 3308 | + case (size_t)-2: |
| 3332 | + state = state_bak; | 3309 | + state = state_bak; |
| 3333 | + return 0; | 3310 | + return 0; |
| 3334 | + | 3311 | + |
| 3335 | + case (size_t)-1: | 3312 | + case (size_t)-1: |
| 3336 | + state = state_bak; | 3313 | + state = state_bak; |
| 3337 | + mblength = 1; | 3314 | + mblength = 1; |
| 3338 | + | 3315 | + |
| 3339 | + if (use_esc_sequence || use_cntrl_prefix) | 3316 | + if (use_esc_sequence || use_cntrl_prefix) |
| 3340 | + { | 3317 | + { |
| 3341 | + width = +4; | 3318 | + width = +4; |
| 3342 | + chars = +4; | 3319 | + chars = +4; |
| 3343 | + *s++ = '\\'; | 3320 | + *s++ = '\\'; |
| 3344 | + sprintf (esc_buff, "%03o", mbc[0]); | 3321 | + sprintf (esc_buff, "%03o", mbc[0]); |
| 3345 | + for (i = 0; i <= 2; ++i) | 3322 | + for (i = 0; i <= 2; ++i) |
| 3346 | + *s++ = (int) esc_buff[i]; | 3323 | + *s++ = (int) esc_buff[i]; |
| 3347 | + } | 3324 | + } |
| 3348 | + else | 3325 | + else |
| 3349 | + { | 3326 | + { |
| 3350 | + width += 1; | 3327 | + width += 1; |
| 3351 | + chars += 1; | 3328 | + chars += 1; |
| 3352 | + *s++ = mbc[0]; | 3329 | + *s++ = mbc[0]; |
| 3353 | + } | 3330 | + } |
| 3354 | + break; | 3331 | + break; |
| 3355 | + | 3332 | + |
| 3356 | + case 0: | 3333 | + case 0: |
| 3357 | + mblength = 1; | 3334 | + mblength = 1; |
| 3358 | + /* Fall through */ | 3335 | + /* Fall through */ |
| 3359 | + | 3336 | + |
| 3360 | + default: | 3337 | + default: |
| 3361 | + if (memcmp (mbc, input_tab_char, mblength) == 0) | 3338 | + if (memcmp (mbc, input_tab_char, mblength) == 0) |
| 3362 | + chars_per_c = chars_per_input_tab; | 3339 | + chars_per_c = chars_per_input_tab; |
| 3363 | + | 3340 | + |
| 3364 | + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') | 3341 | + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') |
| 3365 | + { | 3342 | + { |
| 3366 | + int width_inc; | 3343 | + int width_inc; |
| 3367 | + | 3344 | + |
| 3368 | + width_inc = TAB_WIDTH (chars_per_c, input_position); | 3345 | + width_inc = TAB_WIDTH (chars_per_c, input_position); |
| 3369 | + width += width_inc; | 3346 | + width += width_inc; |
| 3370 | + | 3347 | + |
| 3371 | + if (untabify_input) | 3348 | + if (untabify_input) |
| 3372 | + { | 3349 | + { |
| 3373 | + for (i = width_inc; i; --i) | 3350 | + for (i = width_inc; i; --i) |
| 3374 | + *s++ = ' '; | 3351 | + *s++ = ' '; |
| 3375 | + chars += width_inc; | 3352 | + chars += width_inc; |
| 3376 | + } | 3353 | + } |
| 3377 | + else | 3354 | + else |
| 3378 | + { | 3355 | + { |
| 3379 | + for (i = 0; i < mblength; i++) | 3356 | + for (i = 0; i < mblength; i++) |
| 3380 | + *s++ = mbc[i]; | 3357 | + *s++ = mbc[i]; |
| 3381 | + chars += mblength; | 3358 | + chars += mblength; |
| 3382 | + } | 3359 | + } |
| 3383 | + } | 3360 | + } |
| 3384 | + else if ((wc_width = wcwidth (wc)) < 1) | 3361 | + else if ((wc_width = wcwidth (wc)) < 1) |
| 3385 | + { | 3362 | + { |
| 3386 | + if (use_esc_sequence) | 3363 | + if (use_esc_sequence) |
| 3387 | + { | 3364 | + { |
| 3388 | + for (i = 0; i < mblength; i++) | 3365 | + for (i = 0; i < mblength; i++) |
| 3389 | + { | 3366 | + { |
| 3390 | + width += 4; | 3367 | + width += 4; |
| 3391 | + chars += 4; | 3368 | + chars += 4; |
| 3392 | + *s++ = '\\'; | 3369 | + *s++ = '\\'; |
| 3393 | + sprintf (esc_buff, "%03o", c); | 3370 | + sprintf (esc_buff, "%03o", c); |
| 3394 | + for (j = 0; j <= 2; ++j) | 3371 | + for (j = 0; j <= 2; ++j) |
| 3395 | + *s++ = (int) esc_buff[j]; | 3372 | + *s++ = (int) esc_buff[j]; |
| 3396 | + } | 3373 | + } |
| 3397 | + } | 3374 | + } |
| 3398 | + else if (use_cntrl_prefix) | 3375 | + else if (use_cntrl_prefix) |
| 3399 | + { | 3376 | + { |
| 3400 | + if (wc < 0200) | 3377 | + if (wc < 0200) |
| 3401 | + { | 3378 | + { |
| 3402 | + width += 2; | 3379 | + width += 2; |
| 3403 | + chars += 2; | 3380 | + chars += 2; |
| 3404 | + *s++ = '^'; | 3381 | + *s++ = '^'; |
| 3405 | + *s++ = wc ^ 0100; | 3382 | + *s++ = wc ^ 0100; |
| 3406 | + } | 3383 | + } |
| 3407 | + else | 3384 | + else |
| 3408 | + { | 3385 | + { |
| 3409 | + for (i = 0; i < mblength; i++) | 3386 | + for (i = 0; i < mblength; i++) |
| 3410 | + { | 3387 | + { |
| 3411 | + width += 4; | 3388 | + width += 4; |
| 3412 | + chars += 4; | 3389 | + chars += 4; |
| 3413 | + *s++ = '\\'; | 3390 | + *s++ = '\\'; |
| 3414 | + sprintf (esc_buff, "%03o", c); | 3391 | + sprintf (esc_buff, "%03o", c); |
| 3415 | + for (j = 0; j <= 2; ++j) | 3392 | + for (j = 0; j <= 2; ++j) |
| 3416 | + *s++ = (int) esc_buff[j]; | 3393 | + *s++ = (int) esc_buff[j]; |
| 3417 | + } | 3394 | + } |
| 3418 | + } | 3395 | + } |
| 3419 | + } | 3396 | + } |
| 3420 | + else if (wc == L'\b') | 3397 | + else if (wc == L'\b') |
| 3421 | + { | 3398 | + { |
| 3422 | + width += -1; | 3399 | + width += -1; |
| 3423 | + chars += 1; | 3400 | + chars += 1; |
| 3424 | + *s++ = c; | 3401 | + *s++ = c; |
| 3425 | + } | 3402 | + } |
| 3403 | + else | ||
| 3404 | + { | ||
| 3405 | + width += 0; | ||
| 3406 | + chars += mblength; | ||
| 3407 | + for (i = 0; i < mblength; i++) | ||
| 3408 | + *s++ = mbc[i]; | ||
| 3409 | + } | ||
| 3410 | + } | ||
| 3426 | + else | 3411 | + else |
| 3427 | + { | 3412 | + { |
| 3428 | + width += 0; | ||
| 3429 | + chars += mblength; | ||
| 3430 | + for (i = 0; i < mblength; i++) | ||
| 3431 | + *s++ = mbc[i]; | ||
| 3432 | + } | ||
| 3433 | + } | ||
| 3434 | + else | ||
| 3435 | + { | ||
| 3436 | + width += wc_width; | 3413 | + width += wc_width; |
| 3437 | + chars += mblength; | 3414 | + chars += mblength; |
| 3438 | + for (i = 0; i < mblength; i++) | 3415 | + for (i = 0; i < mblength; i++) |
| 3439 | + *s++ = mbc[i]; | 3416 | + *s++ = mbc[i]; |
| 3440 | + } | 3417 | + } |
| 3441 | + } | 3418 | + } |
| 3442 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); | 3419 | + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); |
| 3443 | + mbc_pos -= mblength; | 3420 | + mbc_pos -= mblength; |
| 3444 | + } | 3421 | + } |
| 3445 | + | 3422 | + |
| 3446 | + input_position += width; | 3423 | + input_position += width; |
| … | … | ||
| 3468 | @@ -37,6 +42,18 @@ | 3445 | @@ -37,6 +42,18 @@ |
| 3469 | #include "quote.h" | 3446 | #include "quote.h" |
| 3470 | #include "xstrndup.h" | 3447 | #include "xstrndup.h" |
| 3471 | 3448 | ||
| 3472 | +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC | 3449 | +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC |
| 3473 | + installation; work around this configuration error. */ | 3450 | + installation; work around this configuration error. */ |
| 3474 | +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 | 3451 | +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 |
| 3475 | +# undef MB_LEN_MAX | 3452 | +# undef MB_LEN_MAX |
| 3476 | +# define MB_LEN_MAX 16 | 3453 | +# define MB_LEN_MAX 16 |
| 3477 | +#endif | 3454 | +#endif |
| 3478 | + | 3455 | + |
| … | … | ||
| 3487 | @@ -67,6 +84,52 @@ | 3464 | @@ -67,6 +84,52 @@ |
| 3488 | } \ | 3465 | } \ |
| 3489 | while (0) | 3466 | while (0) |
| 3490 | 3467 | ||
| 3491 | +/* Refill the buffer BUF to get a multibyte character. */ | 3468 | +/* Refill the buffer BUF to get a multibyte character. */ |
| 3492 | +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ | 3469 | +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ |
| 3493 | + do \ | 3470 | + do \ |
| 3494 | + { \ | 3471 | + { \ |
| 3495 | + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ | 3472 | + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ |
| 3496 | + { \ | 3473 | + { \ |
| 3497 | + memmove (BUF, BUFPOS, BUFLEN); \ | 3474 | + memmove (BUF, BUFPOS, BUFLEN); \ |
| 3498 | + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ | 3475 | + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ |
| 3499 | + BUFPOS = BUF; \ | 3476 | + BUFPOS = BUF; \ |
| 3500 | + } \ | 3477 | + } \ |
| 3501 | + } \ | 3478 | + } \ |
| 3502 | + while (0) | 3479 | + while (0) |
| 3503 | + | 3480 | + |
| 3504 | +/* Get wide character on BUFPOS. BUFPOS is not included after that. | 3481 | +/* Get wide character on BUFPOS. BUFPOS is not included after that. |
| 3505 | + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ | 3482 | + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ |
| 3506 | +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ | 3483 | +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ |
| 3507 | + do \ | 3484 | + do \ |
| 3508 | + { \ | 3485 | + { \ |
| 3509 | + mbstate_t state_bak; \ | 3486 | + mbstate_t state_bak; \ |
| 3510 | + \ | 3487 | + \ |
| 3511 | + if (BUFLEN < 1) \ | 3488 | + if (BUFLEN < 1) \ |
| 3512 | + { \ | 3489 | + { \ |
| 3513 | + WC = WEOF; \ | 3490 | + WC = WEOF; \ |
| 3514 | + break; \ | 3491 | + break; \ |
| 3515 | + } \ | 3492 | + } \ |
| 3516 | + \ | 3493 | + \ |
| 3517 | + /* Get a wide character. */ \ | 3494 | + /* Get a wide character. */ \ |
| 3518 | + CONVFAIL = 0; \ | 3495 | + CONVFAIL = 0; \ |
| 3519 | + state_bak = STATE; \ | 3496 | + state_bak = STATE; \ |
| 3520 | + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ | 3497 | + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ |
| 3521 | + \ | 3498 | + \ |
| 3522 | + switch (MBLENGTH) \ | 3499 | + switch (MBLENGTH) \ |
| 3523 | + { \ | 3500 | + { \ |
| 3524 | + case (size_t)-1: \ | 3501 | + case (size_t)-1: \ |
| 3525 | + case (size_t)-2: \ | 3502 | + case (size_t)-2: \ |
| 3526 | + CONVFAIL++; \ | 3503 | + CONVFAIL++; \ |
| 3527 | + STATE = state_bak; \ | 3504 | + STATE = state_bak; \ |
| 3528 | + /* Fall througn. */ \ | 3505 | + /* Fall througn. */ \ |
| 3529 | + \ | 3506 | + \ |
| 3530 | + case 0: \ | 3507 | + case 0: \ |
| 3531 | + MBLENGTH = 1; \ | 3508 | + MBLENGTH = 1; \ |
| 3532 | + break; \ | 3509 | + break; \ |
| 3533 | + } \ | 3510 | + } \ |
| 3534 | + } \ | 3511 | + } \ |
| 3535 | + while (0) | 3512 | + while (0) |
| 3536 | + | 3513 | + |
| 3537 | struct range_pair | 3514 | struct range_pair |
| 3538 | { | 3515 | { |
| 3539 | size_t lo; | 3516 | size_t lo; |
| … | … | ||
| 3606 | + -n with -b: don't split multibyte characters\n\ | 3583 | + -n with -b: don't split multibyte characters\n\ |
| 3607 | "), stdout); | 3584 | "), stdout); |
| 3608 | fputs (_("\ | 3585 | fputs (_("\ |
| 3609 | --complement complement the set of selected bytes, characters\n\ | 3586 | --complement complement the set of selected bytes, characters\n\ |
| 3610 | @@ -362,7 +439,7 @@ | 3587 | @@ -362,7 +439,7 @@ |
| 3611 | in_digits = false; | 3588 | in_digits = false; |
| 3612 | /* Starting a range. */ | 3589 | /* Starting a range. */ |
| 3613 | if (dash_found) | 3590 | if (dash_found) |
| 3614 | - FATAL_ERROR (_("invalid byte or field list")); | 3591 | - FATAL_ERROR (_("invalid byte or field list")); |
| 3615 | + FATAL_ERROR (_("invalid byte, character or field list")); | 3592 | + FATAL_ERROR (_("invalid byte, character or field list")); |
| 3616 | dash_found = true; | 3593 | dash_found = true; |
| 3617 | fieldstr++; | 3594 | fieldstr++; |
| 3618 | 3595 | ||
| 3619 | @@ -387,14 +464,16 @@ | 3596 | @@ -387,14 +464,16 @@ |
| 3620 | if (!rhs_specified) | 3597 | if (!rhs_specified) |
| 3621 | { | 3598 | { |
| 3622 | /* `n-'. From `initial' to end of line. */ | 3599 | /* `n-'. From `initial' to end of line. */ |
| 3623 | - eol_range_start = initial; | 3600 | - eol_range_start = initial; |
| 3624 | + if (eol_range_start == 0 || | 3601 | + if (eol_range_start == 0 || |
| 3625 | + (eol_range_start != 0 && eol_range_start > initial)) | 3602 | + (eol_range_start != 0 && eol_range_start > initial)) |
| 3626 | + eol_range_start = initial; | 3603 | + eol_range_start = initial; |
| 3627 | field_found = true; | 3604 | field_found = true; |
| 3628 | } | 3605 | } |
| 3629 | else | 3606 | else |
| 3630 | { | 3607 | { |
| 3631 | /* `m-n' or `-n' (1-n). */ | 3608 | /* `m-n' or `-n' (1-n). */ |
| 3632 | if (value < initial) | 3609 | if (value < initial) |
| 3633 | - FATAL_ERROR (_("invalid decreasing range")); | 3610 | - FATAL_ERROR (_("invalid decreasing range")); |
| 3634 | + FATAL_ERROR (_("invalid byte, character or field list")); | 3611 | + FATAL_ERROR (_("invalid byte, character or field list")); |
| 3635 | 3612 | ||
| 3636 | /* Is there already a range going to end of line? */ | 3613 | /* Is there already a range going to end of line? */ |
| 3637 | if (eol_range_start != 0) | 3614 | if (eol_range_start != 0) |
| 3638 | @@ -467,6 +546,9 @@ | 3615 | @@ -467,6 +546,9 @@ |
| 3639 | if (operating_mode == byte_mode) | 3616 | if (operating_mode == byte_mode) |
| 3640 | error (0, 0, | 3617 | error (0, 0, |
| 3641 | _("byte offset %s is too large"), quote (bad_num)); | 3618 | _("byte offset %s is too large"), quote (bad_num)); |
| 3642 | + else if (operating_mode == character_mode) | 3619 | + else if (operating_mode == character_mode) |
| 3643 | + error (0, 0, | 3620 | + error (0, 0, |
| 3644 | + _("character offset %s is too large"), quote (bad_num)); | 3621 | + _("character offset %s is too large"), quote (bad_num)); |
| 3645 | else | 3622 | else |
| 3646 | error (0, 0, | 3623 | error (0, 0, |
| 3647 | _("field number %s is too large"), quote (bad_num)); | 3624 | _("field number %s is too large"), quote (bad_num)); |
| 3648 | @@ -477,7 +559,7 @@ | 3625 | @@ -477,7 +559,7 @@ |
| 3649 | fieldstr++; | 3626 | fieldstr++; |
| 3650 | } | 3627 | } |
| 3651 | else | 3628 | else |
| 3652 | - FATAL_ERROR (_("invalid byte or field list")); | 3629 | - FATAL_ERROR (_("invalid byte or field list")); |
| 3653 | + FATAL_ERROR (_("invalid byte, character or field list")); | 3630 | + FATAL_ERROR (_("invalid byte, character or field list")); |
| 3654 | } | 3631 | } |
| 3655 | 3632 | ||
| 3656 | max_range_endpoint = 0; | 3633 | max_range_endpoint = 0; |
| 3657 | @@ -570,6 +652,63 @@ | 3634 | @@ -570,6 +652,63 @@ |
| 3658 | } | 3635 | } |
| … | … | ||
| 3668 | + without splitting multibyte characters. */ | 3645 | + without splitting multibyte characters. */ |
| 3669 | + | 3646 | + |
| 3670 | +static void | 3647 | +static void |
| 3671 | +cut_characters_or_cut_bytes_no_split (FILE *stream) | 3648 | +cut_characters_or_cut_bytes_no_split (FILE *stream) |
| 3672 | +{ | 3649 | +{ |
| 3673 | + int idx; /* number of bytes or characters in the line so far. */ | 3650 | + int idx; /* number of bytes or characters in the line so far. */ |
| 3674 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ | 3651 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ |
| 3675 | + char *bufpos; /* Next read position of BUF. */ | 3652 | + char *bufpos; /* Next read position of BUF. */ |
| 3676 | + size_t buflen; /* The length of the byte sequence in buf. */ | 3653 | + size_t buflen; /* The length of the byte sequence in buf. */ |
| 3677 | + wint_t wc; /* A gotten wide character. */ | 3654 | + wint_t wc; /* A gotten wide character. */ |
| 3678 | + size_t mblength; /* The byte size of a multibyte character which shows | 3655 | + size_t mblength; /* The byte size of a multibyte character which shows |
| 3679 | + as same character as WC. */ | 3656 | + as same character as WC. */ |
| 3680 | + mbstate_t state; /* State of the stream. */ | 3657 | + mbstate_t state; /* State of the stream. */ |
| 3681 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ | 3658 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ |
| 3682 | + | 3659 | + |
| 3683 | + idx = 0; | 3660 | + idx = 0; |
| 3684 | + buflen = 0; | 3661 | + buflen = 0; |
| 3685 | + bufpos = buf; | 3662 | + bufpos = buf; |
| 3686 | + memset (&state, '\0', sizeof(mbstate_t)); | 3663 | + memset (&state, '\0', sizeof(mbstate_t)); |
| … | … | ||
| 3690 | + REFILL_BUFFER (buf, bufpos, buflen, stream); | 3667 | + REFILL_BUFFER (buf, bufpos, buflen, stream); |
| 3691 | + | 3668 | + |
| 3692 | + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); | 3669 | + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); |
| 3693 | + | 3670 | + |
| 3694 | + if (wc == WEOF) | 3671 | + if (wc == WEOF) |
| 3695 | + { | 3672 | + { |
| 3696 | + if (idx > 0) | 3673 | + if (idx > 0) |
| 3697 | + putchar ('\n'); | 3674 | + putchar ('\n'); |
| 3698 | + break; | 3675 | + break; |
| 3699 | + } | 3676 | + } |
| 3700 | + else if (wc == L'\n') | 3677 | + else if (wc == L'\n') |
| 3701 | + { | 3678 | + { |
| 3702 | + putchar ('\n'); | 3679 | + putchar ('\n'); |
| 3703 | + idx = 0; | 3680 | + idx = 0; |
| 3704 | + } | 3681 | + } |
| 3705 | + else | 3682 | + else |
| 3706 | + { | 3683 | + { |
| 3707 | + idx += (operating_mode == byte_mode) ? mblength : 1; | 3684 | + idx += (operating_mode == byte_mode) ? mblength : 1; |
| 3708 | + if (print_kth (idx, NULL)) | 3685 | + if (print_kth (idx, NULL)) |
| 3709 | + fwrite (bufpos, mblength, sizeof(char), stdout); | 3686 | + fwrite (bufpos, mblength, sizeof(char), stdout); |
| 3710 | + } | 3687 | + } |
| 3711 | + | 3688 | + |
| 3712 | + buflen -= mblength; | 3689 | + buflen -= mblength; |
| 3713 | + bufpos += mblength; | 3690 | + bufpos += mblength; |
| 3714 | + } | 3691 | + } |
| 3715 | +} | 3692 | +} |
| 3716 | +#endif | 3693 | +#endif |
| 3717 | + | 3694 | + |
| 3718 | /* Read from stream STREAM, printing to standard output any selected fields. */ | 3695 | /* Read from stream STREAM, printing to standard output any selected fields. */ |
| 3719 | 3696 | ||
| 3720 | static void | 3697 | static void |
| 3721 | @@ -692,13 +831,192 @@ | 3698 | @@ -692,13 +831,192 @@ |
| 3722 | } | 3699 | } |
| … | … | ||
| 3730 | + unsigned int field_idx; | 3707 | + unsigned int field_idx; |
| 3731 | + int found_any_selected_field; | 3708 | + int found_any_selected_field; |
| 3732 | + int buffer_first_field; | 3709 | + int buffer_first_field; |
| 3733 | + int empty_input; | 3710 | + int empty_input; |
| 3734 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ | 3711 | + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ |
| 3735 | + char *bufpos; /* Next read position of BUF. */ | 3712 | + char *bufpos; /* Next read position of BUF. */ |
| 3736 | + size_t buflen; /* The length of the byte sequence in buf. */ | 3713 | + size_t buflen; /* The length of the byte sequence in buf. */ |
| 3737 | + wint_t wc = 0; /* A gotten wide character. */ | 3714 | + wint_t wc = 0; /* A gotten wide character. */ |
| 3738 | + size_t mblength; /* The byte size of a multibyte character which shows | 3715 | + size_t mblength; /* The byte size of a multibyte character which shows |
| 3739 | + as same character as WC. */ | 3716 | + as same character as WC. */ |
| 3740 | + mbstate_t state; /* State of the stream. */ | 3717 | + mbstate_t state; /* State of the stream. */ |
| 3741 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ | 3718 | + int convfail; /* 1, when conversion is failed. Otherwise 0. */ |
| 3742 | + | 3719 | + |
| 3743 | + found_any_selected_field = 0; | 3720 | + found_any_selected_field = 0; |
| 3744 | + field_idx = 1; | 3721 | + field_idx = 1; |
| 3745 | + bufpos = buf; | 3722 | + bufpos = buf; |
| 3746 | + buflen = 0; | 3723 | + buflen = 0; |
| … | … | ||
| 3762 | + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); | 3739 | + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); |
| 3763 | + | 3740 | + |
| 3764 | + while (1) | 3741 | + while (1) |
| 3765 | + { | 3742 | + { |
| 3766 | + if (field_idx == 1 && buffer_first_field) | 3743 | + if (field_idx == 1 && buffer_first_field) |
| 3767 | + { | 3744 | + { |
| 3768 | + int len = 0; | 3745 | + int len = 0; |
| 3769 | + | 3746 | + |
| 3770 | + while (1) | 3747 | + while (1) |
| 3771 | + { | 3748 | + { |
| 3772 | + REFILL_BUFFER (buf, bufpos, buflen, stream); | 3749 | + REFILL_BUFFER (buf, bufpos, buflen, stream); |
| 3773 | + | 3750 | + |
| 3774 | + GET_NEXT_WC_FROM_BUFFER | 3751 | + GET_NEXT_WC_FROM_BUFFER |
| 3775 | + (wc, bufpos, buflen, mblength, state, convfail); | 3752 | + (wc, bufpos, buflen, mblength, state, convfail); |
| 3776 | + | 3753 | + |
| 3777 | + if (wc == WEOF) | 3754 | + if (wc == WEOF) |
| 3778 | + break; | 3755 | + break; |
| 3779 | + | 3756 | + |
| 3780 | + field_1_buffer = xrealloc (field_1_buffer, len + mblength); | 3757 | + field_1_buffer = xrealloc (field_1_buffer, len + mblength); |
| 3781 | + memcpy (field_1_buffer + len, bufpos, mblength); | 3758 | + memcpy (field_1_buffer + len, bufpos, mblength); |
| 3782 | + len += mblength; | 3759 | + len += mblength; |
| 3783 | + buflen -= mblength; | 3760 | + buflen -= mblength; |
| 3784 | + bufpos += mblength; | 3761 | + bufpos += mblength; |
| 3785 | + | 3762 | + |
| 3786 | + if (!convfail && (wc == L'\n' || wc == wcdelim)) | 3763 | + if (!convfail && (wc == L'\n' || wc == wcdelim)) |
| 3787 | + break; | 3764 | + break; |
| 3788 | + } | 3765 | + } |
| 3789 | + | 3766 | + |
| 3790 | + if (wc == WEOF) | 3767 | + if (wc == WEOF) |
| 3791 | + break; | 3768 | + break; |
| 3792 | + | 3769 | + |
| 3793 | + /* If the first field extends to the end of line (it is not | 3770 | + /* If the first field extends to the end of line (it is not |
| 3794 | + delimited) and we are printing all non-delimited lines, | 3771 | + delimited) and we are printing all non-delimited lines, |
| 3795 | + print this one. */ | 3772 | + print this one. */ |
| 3796 | + if (convfail || (!convfail && wc != wcdelim)) | 3773 | + if (convfail || (!convfail && wc != wcdelim)) |
| 3797 | + { | 3774 | + { |
| 3798 | + if (suppress_non_delimited) | 3775 | + if (suppress_non_delimited) |
| 3799 | + { | 3776 | + { |
| 3800 | + /* Empty. */ | 3777 | + /* Empty. */ |
| 3801 | + } | 3778 | + } |
| 3802 | + else | 3779 | + else |
| 3803 | + { | 3780 | + { |
| 3804 | + fwrite (field_1_buffer, sizeof (char), len, stdout); | 3781 | + fwrite (field_1_buffer, sizeof (char), len, stdout); |
| 3805 | + /* Make sure the output line is newline terminated. */ | 3782 | + /* Make sure the output line is newline terminated. */ |
| 3806 | + if (convfail || (!convfail && wc != L'\n')) | 3783 | + if (convfail || (!convfail && wc != L'\n')) |
| 3807 | + putchar ('\n'); | 3784 | + putchar ('\n'); |
| 3808 | + } | 3785 | + } |
| 3809 | + continue; | 3786 | + continue; |
| 3810 | + } | 3787 | + } |
| 3811 | + | 3788 | + |
| 3812 | + if (print_kth (1, NULL)) | 3789 | + if (print_kth (1, NULL)) |
| 3813 | + { | 3790 | + { |
| 3814 | + /* Print the field, but not the trailing delimiter. */ | 3791 | + /* Print the field, but not the trailing delimiter. */ |
| 3815 | + fwrite (field_1_buffer, sizeof (char), len - 1, stdout); | 3792 | + fwrite (field_1_buffer, sizeof (char), len - 1, stdout); |
| 3816 | + found_any_selected_field = 1; | 3793 | + found_any_selected_field = 1; |
| 3817 | + } | 3794 | + } |
| 3818 | + ++field_idx; | 3795 | + ++field_idx; |
| 3819 | + } | 3796 | + } |
| 3820 | + | 3797 | + |
| 3821 | + if (wc != WEOF) | 3798 | + if (wc != WEOF) |
| 3822 | + { | 3799 | + { |
| 3823 | + if (print_kth (field_idx, NULL)) | ||
| 3824 | + { | ||
| 3825 | + if (found_any_selected_field) | ||
| 3826 | + { | ||
| 3827 | + fwrite (output_delimiter_string, sizeof (char), | ||
| 3828 | + output_delimiter_length, stdout); | ||
| 3829 | + } | ||
| 3830 | + found_any_selected_field = 1; | ||
| 3831 | + } | ||
| 3832 | + | ||
| 3833 | + while (1) | ||
| 3834 | + { | ||
| 3835 | + REFILL_BUFFER (buf, bufpos, buflen, stream); | ||
| 3836 | + | ||
| 3837 | + GET_NEXT_WC_FROM_BUFFER | ||
| 3838 | + (wc, bufpos, buflen, mblength, state, convfail); | ||
| 3839 | + | ||
| 3840 | + if (wc == WEOF) | ||
| 3841 | + break; | ||
| 3842 | + else if (!convfail && (wc == wcdelim || wc == L'\n')) | ||
| 3843 | + { | ||
| 3844 | + buflen -= mblength; | ||
| 3845 | + bufpos += mblength; | ||
| 3846 | + break; | ||
| 3847 | + } | ||
| 3848 | + | ||
| 3849 | + if (print_kth (field_idx, NULL)) | 3800 | + if (print_kth (field_idx, NULL)) |
| 3801 | + { | ||
| 3802 | + if (found_any_selected_field) | ||
| 3803 | + { | ||
| 3804 | + fwrite (output_delimiter_string, sizeof (char), | ||
| 3805 | + output_delimiter_length, stdout); | ||
| 3806 | + } | ||
| 3807 | + found_any_selected_field = 1; | ||
| 3808 | + } | ||
| 3809 | + | ||
| 3810 | + while (1) | ||
| 3811 | + { | ||
| 3812 | + REFILL_BUFFER (buf, bufpos, buflen, stream); | ||
| 3813 | + | ||
| 3814 | + GET_NEXT_WC_FROM_BUFFER | ||
| 3815 | + (wc, bufpos, buflen, mblength, state, convfail); | ||
| 3816 | + | ||
| 3817 | + if (wc == WEOF) | ||
| 3818 | + break; | ||
| 3819 | + else if (!convfail && (wc == wcdelim || wc == L'\n')) | ||
| 3820 | + { | ||
| 3821 | + buflen -= mblength; | ||
| 3822 | + bufpos += mblength; | ||
| 3823 | + break; | ||
| 3824 | + } | ||
| 3825 | + | ||
| 3826 | + if (print_kth (field_idx, NULL)) | ||
| 3850 | + fwrite (bufpos, mblength, sizeof(char), stdout); | 3827 | + fwrite (bufpos, mblength, sizeof(char), stdout); |
| 3851 | + | 3828 | + |
| 3852 | + buflen -= mblength; | 3829 | + buflen -= mblength; |
| 3853 | + bufpos += mblength; | 3830 | + bufpos += mblength; |
| 3854 | + } | 3831 | + } |
| 3855 | + } | 3832 | + } |
| 3856 | + | 3833 | + |
| 3857 | + if ((!convfail || wc == L'\n') && buflen < 1) | 3834 | + if ((!convfail || wc == L'\n') && buflen < 1) |
| 3858 | + wc = WEOF; | 3835 | + wc = WEOF; |
| 3859 | + | 3836 | + |
| 3860 | + if (!convfail && wc == wcdelim) | 3837 | + if (!convfail && wc == wcdelim) |
| 3861 | + ++field_idx; | 3838 | + ++field_idx; |
| 3862 | + else if (wc == WEOF || (!convfail && wc == L'\n')) | 3839 | + else if (wc == WEOF || (!convfail && wc == L'\n')) |
| 3863 | + { | 3840 | + { |
| 3864 | + if (found_any_selected_field | 3841 | + if (found_any_selected_field |
| 3865 | + || (!empty_input && !(suppress_non_delimited && field_idx == 1))) | 3842 | + || (!empty_input && !(suppress_non_delimited && field_idx == 1))) |
| 3866 | + putchar ('\n'); | 3843 | + putchar ('\n'); |
| 3867 | + if (wc == WEOF) | 3844 | + if (wc == WEOF) |
| 3868 | + break; | 3845 | + break; |
| 3869 | + field_idx = 1; | 3846 | + field_idx = 1; |
| 3870 | + found_any_selected_field = 0; | 3847 | + found_any_selected_field = 0; |
| 3871 | + } | 3848 | + } |
| 3872 | + } | 3849 | + } |
| 3873 | +} | 3850 | +} |
| 3874 | +#endif | 3851 | +#endif |
| 3875 | + | 3852 | + |
| 3876 | static void | 3853 | static void |
| … | … | ||
| 3880 | - cut_bytes (stream); | 3857 | - cut_bytes (stream); |
| 3881 | +#if HAVE_MBRTOWC | 3858 | +#if HAVE_MBRTOWC |
| 3882 | + if (MB_CUR_MAX > 1 && !force_singlebyte_mode) | 3859 | + if (MB_CUR_MAX > 1 && !force_singlebyte_mode) |
| 3883 | + { | 3860 | + { |
| 3884 | + switch (operating_mode) | 3861 | + switch (operating_mode) |
| 3885 | + { | 3862 | + { |
| 3886 | + case byte_mode: | 3863 | + case byte_mode: |
| 3887 | + if (byte_mode_character_aware) | 3864 | + if (byte_mode_character_aware) |
| 3865 | + cut_characters_or_cut_bytes_no_split (stream); | ||
| 3866 | + else | ||
| 3867 | + cut_bytes (stream); | ||
| 3868 | + break; | ||
| 3869 | + | ||
| 3870 | + case character_mode: | ||
| 3888 | + cut_characters_or_cut_bytes_no_split (stream); | 3871 | + cut_characters_or_cut_bytes_no_split (stream); |
| 3889 | + else | 3872 | + break; |
| 3890 | + cut_bytes (stream); | ||
| 3891 | + break; | ||
| 3892 | + | 3873 | + |
| 3893 | + case character_mode: | ||
| 3894 | + cut_characters_or_cut_bytes_no_split (stream); | ||
| 3895 | + break; | ||
| 3896 | + | ||
| 3897 | + case field_mode: | 3874 | + case field_mode: |
| 3898 | + cut_fields_mb (stream); | 3875 | + cut_fields_mb (stream); |
| 3899 | + break; | 3876 | + break; |
| 3900 | + | 3877 | + |
| 3901 | + default: | 3878 | + default: |
| 3902 | + abort (); | 3879 | + abort (); |
| 3903 | + } | 3880 | + } |
| 3904 | + } | 3881 | + } |
| 3905 | else | 3882 | else |
| 3906 | - cut_fields (stream); | 3883 | - cut_fields (stream); |
| 3907 | +#endif | 3884 | +#endif |
| 3908 | + { | 3885 | + { |
| 3909 | + if (operating_mode == field_mode) | 3886 | + if (operating_mode == field_mode) |
| 3910 | + cut_fields (stream); | 3887 | + cut_fields (stream); |
| 3911 | + else | 3888 | + else |
| 3912 | + cut_bytes (stream); | 3889 | + cut_bytes (stream); |
| 3913 | + } | 3890 | + } |
| 3914 | } | 3891 | } |
| 3915 | 3892 | ||
| 3916 | /* Process file FILE to standard output. | 3893 | /* Process file FILE to standard output. |
| 3917 | @@ -748,6 +1066,8 @@ | 3894 | @@ -748,6 +1066,8 @@ |
| … | … | ||
| 3923 | 3900 | ||
| 3924 | initialize_main (&argc, &argv); | 3901 | initialize_main (&argc, &argv); |
| 3925 | set_program_name (argv[0]); | 3902 | set_program_name (argv[0]); |
| 3926 | @@ -770,7 +1090,6 @@ | 3903 | @@ -770,7 +1090,6 @@ |