| 1 |
* looking for bonzini@gnu.org--2004b/sed--stable--4.1--patch-69 to compare with
|
| 2 |
* comparing to bonzini@gnu.org--2004b/sed--stable--4.1--patch-69
|
| 3 |
M sed/mbcs.c
|
| 4 |
M sed/sed.h
|
| 5 |
M sed/execute.c
|
| 6 |
|
| 7 |
* modified files
|
| 8 |
|
| 9 |
--- orig/sed/execute.c
|
| 10 |
+++ mod/sed/execute.c
|
| 11 |
@@ -235,25 +235,26 @@ str_append(to, string, length)
|
| 12 |
to->length = new_length;
|
| 13 |
|
| 14 |
#ifdef HAVE_MBRTOWC
|
| 15 |
- if (mb_cur_max == 1)
|
| 16 |
- return;
|
| 17 |
-
|
| 18 |
- while (length)
|
| 19 |
- {
|
| 20 |
- int n = MBRLEN (string, length, &to->mbstate);
|
| 21 |
+ if (mb_cur_max > 1 && !is_utf8)
|
| 22 |
+ while (length)
|
| 23 |
+ {
|
| 24 |
+ size_t n = MBRLEN (string, length, &to->mbstate);
|
| 25 |
|
| 26 |
- /* An invalid sequence is treated like a singlebyte character. */
|
| 27 |
- if (n == -1)
|
| 28 |
- {
|
| 29 |
- memset (&to->mbstate, 0, sizeof (to->mbstate));
|
| 30 |
- n = 1;
|
| 31 |
- }
|
| 32 |
+ /* An invalid sequence is treated like a singlebyte character. */
|
| 33 |
+ if (n == (size_t) -1)
|
| 34 |
+ {
|
| 35 |
+ memset (&to->mbstate, 0, sizeof (to->mbstate));
|
| 36 |
+ n = 1;
|
| 37 |
+ }
|
| 38 |
|
| 39 |
- if (n > 0)
|
| 40 |
- length -= n;
|
| 41 |
- else
|
| 42 |
- break;
|
| 43 |
- }
|
| 44 |
+ if (n > 0)
|
| 45 |
+ {
|
| 46 |
+ string += n;
|
| 47 |
+ length -= n;
|
| 48 |
+ }
|
| 49 |
+ else
|
| 50 |
+ break;
|
| 51 |
+ }
|
| 52 |
#endif
|
| 53 |
}
|
| 54 |
|
| 55 |
|
| 56 |
|
| 57 |
--- orig/sed/mbcs.c
|
| 58 |
+++ mod/sed/mbcs.c
|
| 59 |
@@ -18,7 +18,12 @@
|
| 60 |
#include "sed.h"
|
| 61 |
#include <stdlib.h>
|
| 62 |
|
| 63 |
+#ifdef HAVE_LANGINFO_CODESET
|
| 64 |
+#include <langinfo.h>
|
| 65 |
+#endif
|
| 66 |
+
|
| 67 |
int mb_cur_max;
|
| 68 |
+bool is_utf8;
|
| 69 |
|
| 70 |
#ifdef HAVE_MBRTOWC
|
| 71 |
/* Add a byte to the multibyte character represented by the state
|
| 72 |
@@ -47,6 +52,26 @@ int brlen (ch, cur_stat)
|
| 73 |
void
|
| 74 |
initialize_mbcs ()
|
| 75 |
{
|
| 76 |
+ /* For UTF-8, we know that the encoding is stateless. */
|
| 77 |
+ const char *codeset_name;
|
| 78 |
+
|
| 79 |
+#ifdef HAVE_LANGINFO_CODESET
|
| 80 |
+ codeset_name = nl_langinfo (CODESET);
|
| 81 |
+#else
|
| 82 |
+ codeset_name = getenv ("LC_ALL");
|
| 83 |
+ if (codeset_name == NULL || codeset_name[0] == '\0')
|
| 84 |
+ codeset_name = getenv ("LC_CTYPE");
|
| 85 |
+ if (codeset_name == NULL || codeset_name[0] == '\0')
|
| 86 |
+ codeset_name = getenv ("LANG");
|
| 87 |
+ if (codeset_name == NULL)
|
| 88 |
+ codeset_name = "";
|
| 89 |
+ else if (strchr (codeset_name, '.') != NULL)
|
| 90 |
+ codeset_name = strchr (codeset_name, '.') + 1;
|
| 91 |
+#endif
|
| 92 |
+
|
| 93 |
+ is_utf8 = (strcasecmp (codeset_name, "UTF-8") == 0
|
| 94 |
+ || strcasecmp (codeset_name, "UTF8") == 0);
|
| 95 |
+
|
| 96 |
#ifdef HAVE_MBRTOWC
|
| 97 |
mb_cur_max = MB_CUR_MAX;
|
| 98 |
#else
|
| 99 |
|
| 100 |
|
| 101 |
--- orig/sed/sed.h
|
| 102 |
+++ mod/sed/sed.h
|
| 103 |
@@ -233,6 +233,7 @@ extern bool use_extended_syntax_p;
|
| 104 |
|
| 105 |
/* Declarations for multibyte character sets. */
|
| 106 |
extern int mb_cur_max;
|
| 107 |
+extern bool is_utf8;
|
| 108 |
|
| 109 |
#ifdef HAVE_MBRTOWC
|
| 110 |
#ifdef HAVE_BTOWC
|
| 111 |
|
| 112 |
|
| 113 |
|