/[pkgs]/devel/sed/sed-4.1.5-utf8performance.patch
ViewVC logotype

Contents of /devel/sed/sed-4.1.5-utf8performance.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download) (as text)
Thu Aug 3 13:36:23 2006 UTC (3 years, 3 months ago) by pmachata
Branch: MAIN
CVS Tags: sed-4_2_1-2_fc12, sed-4_1_5-6_fc7, sed-4_1_5-12_fc11, F-12-split, show, F-10-split, sed-4_2_1-4_fc13, F-7-split, sed-4_2_1-1_fc12, sed-4_1_5-4_fc6, sed-4_2_1-3_fc12, F-11-split, sed-4_1_5-7_fc7, sed-4_1_5-5_fc6, F-8-split, sed-4_1_5-9_fc8, F-9-split, sed-4_1_5-11_fc11, sed-4_1_5-10_fc9, FC-6-split, HEAD
File MIME type: text/x-patch
- remove superfluous multibyte processing in str_append for UTF-8
  encoding (thanks Paolo Bonzini, #177246)
1 * looking for bonzini@gnu.org--2004b/sed--stable--4.1--patch-69 to compare with
2 * comparing to bonzini@gnu.org--2004b/sed--stable--4.1--patch-69
3 M sed/mbcs.c
4 M sed/sed.h
5 M sed/execute.c
6
7 * modified files
8
9 --- orig/sed/execute.c
10 +++ mod/sed/execute.c
11 @@ -235,25 +235,26 @@ str_append(to, string, length)
12 to->length = new_length;
13
14 #ifdef HAVE_MBRTOWC
15 - if (mb_cur_max == 1)
16 - return;
17 -
18 - while (length)
19 - {
20 - int n = MBRLEN (string, length, &to->mbstate);
21 + if (mb_cur_max > 1 && !is_utf8)
22 + while (length)
23 + {
24 + size_t n = MBRLEN (string, length, &to->mbstate);
25
26 - /* An invalid sequence is treated like a singlebyte character. */
27 - if (n == -1)
28 - {
29 - memset (&to->mbstate, 0, sizeof (to->mbstate));
30 - n = 1;
31 - }
32 + /* An invalid sequence is treated like a singlebyte character. */
33 + if (n == (size_t) -1)
34 + {
35 + memset (&to->mbstate, 0, sizeof (to->mbstate));
36 + n = 1;
37 + }
38
39 - if (n > 0)
40 - length -= n;
41 - else
42 - break;
43 - }
44 + if (n > 0)
45 + {
46 + string += n;
47 + length -= n;
48 + }
49 + else
50 + break;
51 + }
52 #endif
53 }
54
55
56
57 --- orig/sed/mbcs.c
58 +++ mod/sed/mbcs.c
59 @@ -18,7 +18,12 @@
60 #include "sed.h"
61 #include <stdlib.h>
62
63 +#ifdef HAVE_LANGINFO_CODESET
64 +#include <langinfo.h>
65 +#endif
66 +
67 int mb_cur_max;
68 +bool is_utf8;
69
70 #ifdef HAVE_MBRTOWC
71 /* Add a byte to the multibyte character represented by the state
72 @@ -47,6 +52,26 @@ int brlen (ch, cur_stat)
73 void
74 initialize_mbcs ()
75 {
76 + /* For UTF-8, we know that the encoding is stateless. */
77 + const char *codeset_name;
78 +
79 +#ifdef HAVE_LANGINFO_CODESET
80 + codeset_name = nl_langinfo (CODESET);
81 +#else
82 + codeset_name = getenv ("LC_ALL");
83 + if (codeset_name == NULL || codeset_name[0] == '\0')
84 + codeset_name = getenv ("LC_CTYPE");
85 + if (codeset_name == NULL || codeset_name[0] == '\0')
86 + codeset_name = getenv ("LANG");
87 + if (codeset_name == NULL)
88 + codeset_name = "";
89 + else if (strchr (codeset_name, '.') != NULL)
90 + codeset_name = strchr (codeset_name, '.') + 1;
91 +#endif
92 +
93 + is_utf8 = (strcasecmp (codeset_name, "UTF-8") == 0
94 + || strcasecmp (codeset_name, "UTF8") == 0);
95 +
96 #ifdef HAVE_MBRTOWC
97 mb_cur_max = MB_CUR_MAX;
98 #else
99
100
101 --- orig/sed/sed.h
102 +++ mod/sed/sed.h
103 @@ -233,6 +233,7 @@ extern bool use_extended_syntax_p;
104
105 /* Declarations for multibyte character sets. */
106 extern int mb_cur_max;
107 +extern bool is_utf8;
108
109 #ifdef HAVE_MBRTOWC
110 #ifdef HAVE_BTOWC
111
112
113

admin@fedoraproject.org
ViewVC Help
Powered by ViewVC 1.1.2