Skip to content

Commit 25c4244

Browse files
committed
fix #9535
find the position of illegal subcharacters and modify the value of device.pos add ((encoder->status && ((encoder->status & 0xF) || (encoder->status == 0x11))) || encoder->cache) condition. fix test fix test delay initialization parameter illegal_substchar optimize code
1 parent 3d5df06 commit 25c4244

File tree

2 files changed

+213
-0
lines changed

2 files changed

+213
-0
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,7 @@ mbfl_strcut(
10961096
} else {
10971097
mbfl_convert_filter *encoder = NULL;
10981098
mbfl_convert_filter *decoder = NULL;
1099+
int mode_backup;
10991100
const unsigned char *p, *q, *r;
11001101
struct {
11011102
mbfl_convert_filter encoder;
@@ -1112,6 +1113,8 @@ mbfl_strcut(
11121113
return NULL;
11131114
}
11141115

1116+
mode_backup = decoder->illegal_mode;
1117+
11151118
/* wchar filter */
11161119
if (!(encoder = mbfl_convert_filter_new(
11171120
string->encoding,
@@ -1276,7 +1279,9 @@ mbfl_strcut(
12761279
bk = _bk;
12771280
}
12781281

1282+
decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
12791283
(*encoder->filter_flush)(encoder);
1284+
decoder->illegal_mode = mode_backup;
12801285

12811286
if (bk.decoder.filter_dtor)
12821287
bk.decoder.filter_dtor(&bk.decoder);

ext/mbstring/tests/gh9535.phpt

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
--TEST--
2+
GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
$encodings = [
8+
'BASE64',
9+
'HTML-ENTITIES',
10+
'Quoted-Printable',
11+
'UTF-16',
12+
'UTF-16BE',
13+
'UTF-16LE',
14+
'UTF-7',
15+
'UTF7-IMAP',
16+
'JIS',
17+
'ISO-2022-JP',
18+
'ISO-2022-JP-MS',
19+
'GB18030',
20+
'HZ',
21+
'ISO-2022-KR',
22+
'ISO-2022-JP-2004',
23+
'ISO-2022-JP-MOBILE#KDDI',
24+
'CP50220',
25+
'CP50221',
26+
'CP50222',
27+
];
28+
29+
$input = '宛如繁星般宛如皎月般';
30+
$bytes_length = 15;
31+
foreach($encodings as $encoding) {
32+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
33+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
34+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
35+
echo $encoding.': '.$reconverted_str.PHP_EOL;
36+
}
37+
38+
echo PHP_EOL;
39+
40+
$input = '星のように月のように';
41+
$bytes_length = 20;
42+
foreach($encodings as $encoding) {
43+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
44+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
45+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
46+
echo $encoding.': '.$reconverted_str.PHP_EOL;
47+
}
48+
49+
echo PHP_EOL;
50+
51+
$input = 'あaいb';
52+
$bytes_length = 10;
53+
foreach($encodings as $encoding) {
54+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
55+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
56+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
57+
echo $encoding.': '.$reconverted_str.PHP_EOL;
58+
}
59+
60+
echo PHP_EOL;
61+
62+
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
63+
$bytes_length = 10;
64+
foreach($encodings as $encoding) {
65+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
66+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
67+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
68+
echo $encoding.': '.$reconverted_str.PHP_EOL;
69+
}
70+
71+
echo PHP_EOL;
72+
73+
$input = '???';
74+
$bytes_length = 2;
75+
foreach($encodings as $encoding) {
76+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
77+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
78+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
79+
echo $encoding.': '.$reconverted_str.PHP_EOL;
80+
}
81+
82+
echo PHP_EOL;
83+
84+
foreach($encodings as $encoding) {
85+
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
86+
}
87+
88+
?>
89+
--EXPECTF--
90+
BASE64: 宛如繁
91+
HTML-ENTITIES: 宛&#22914
92+
Quoted-Printable: %s
93+
UTF-16: 宛如繁星般宛如
94+
UTF-16BE: 宛如繁星般宛如
95+
UTF-16LE: 宛如繁星般宛如
96+
UTF-7: 宛如繁星
97+
UTF7-IMAP: 宛如繁星
98+
JIS: 宛如繁星般
99+
ISO-2022-JP: 宛如繁星般
100+
ISO-2022-JP-MS: 宛如繁星
101+
GB18030: 宛如繁星般宛如
102+
HZ: 宛如繁星般
103+
ISO-2022-KR: 宛如繁星
104+
ISO-2022-JP-2004: 宛如繁星
105+
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
106+
CP50220: 宛如繁星
107+
CP50221: 宛如繁星
108+
CP50222: 宛如繁星
109+
110+
BASE64: 星のように
111+
HTML-ENTITIES: 星の&#12
112+
Quoted-Printable: 星の
113+
UTF-16: 星のように月のように
114+
UTF-16BE: 星のように月のように
115+
UTF-16LE: 星のように月のように
116+
UTF-7: 星のように月
117+
UTF7-IMAP: 星のように月
118+
JIS: 星のように月の
119+
ISO-2022-JP: 星のように月の
120+
ISO-2022-JP-MS: 星のように月の
121+
GB18030: 星のように月のように
122+
HZ: 星のように月のよ
123+
ISO-2022-KR: 星のように月の
124+
ISO-2022-JP-2004: 星のように月の
125+
ISO-2022-JP-MOBILE#KDDI: 星のように月の
126+
CP50220: 星のように月の
127+
CP50221: 星のように月の
128+
CP50222: 星のように月の
129+
130+
BASE64: %s
131+
HTML-ENTITIES: あa&
132+
Quoted-Printable: あa
133+
UTF-16: あaいb
134+
UTF-16BE: あaいb
135+
UTF-16LE: あaいb
136+
UTF-7: あa
137+
UTF7-IMAP: あa
138+
JIS: あa
139+
ISO-2022-JP: あa
140+
ISO-2022-JP-MS: あa
141+
GB18030: あaいb
142+
HZ: あa
143+
ISO-2022-KR: あa
144+
ISO-2022-JP-2004: あa
145+
ISO-2022-JP-MOBILE#KDDI: あa
146+
CP50220: あa
147+
CP50221: あa
148+
CP50222: あa
149+
150+
BASE64: AAAAAA
151+
HTML-ENTITIES: AAAAAAAAAA
152+
Quoted-Printable: AAAAAAAAAA
153+
UTF-16: AAAAA
154+
UTF-16BE: AAAAA
155+
UTF-16LE: AAAAA
156+
UTF-7: AAAAAAAAAA
157+
UTF7-IMAP: AAAAAAAAAA
158+
JIS: AAAAAAAAAA
159+
ISO-2022-JP: AAAAAAAAAA
160+
ISO-2022-JP-MS: AAAAAAAAAA
161+
GB18030: AAAAAAAAAA
162+
HZ: AAAAAAAAAA
163+
ISO-2022-KR: AAAAAAAAAA
164+
ISO-2022-JP-2004: AAAAAAAAAA
165+
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
166+
CP50220: AAAAAAAAAA
167+
CP50221: AAAAAAAAAA
168+
CP50222: AAAAAAAAAA
169+
170+
BASE64:%s
171+
HTML-ENTITIES: ??
172+
Quoted-Printable: ??
173+
UTF-16: ?
174+
UTF-16BE: ?
175+
UTF-16LE: ?
176+
UTF-7: ??
177+
UTF7-IMAP: ??
178+
JIS: ??
179+
ISO-2022-JP: ??
180+
ISO-2022-JP-MS: ??
181+
GB18030: ??
182+
HZ: ??
183+
ISO-2022-KR: ??
184+
ISO-2022-JP-2004: ??
185+
ISO-2022-JP-MOBILE#KDDI: ??
186+
CP50220: ??
187+
CP50221: ??
188+
CP50222: ??
189+
190+
string(0) ""
191+
string(2) "??"
192+
string(2) "??"
193+
string(2) "??"
194+
string(2) "??"
195+
string(2) "??"
196+
string(2) "??"
197+
string(2) "??"
198+
string(2) "??"
199+
string(2) "??"
200+
string(2) "??"
201+
string(2) "??"
202+
string(2) "??"
203+
string(2) "??"
204+
string(2) "??"
205+
string(2) "??"
206+
string(2) "??"
207+
string(2) "??"
208+
string(2) "??"

0 commit comments

Comments
 (0)