Skip to content

Commit f5bb86d

Browse files
committed
fix #9535
1 parent b0d606d commit f5bb86d

File tree

2 files changed

+154
-12
lines changed

2 files changed

+154
-12
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,8 @@ mbfl_strcut(
10971097
mbfl_convert_filter *encoder = NULL;
10981098
mbfl_convert_filter *decoder = NULL;
10991099
const unsigned char *p, *q, *r;
1100-
int original_pos = 0;
1100+
size_t position = 0;
1101+
unsigned char illegal_substchar;
11011102
struct {
11021103
mbfl_convert_filter encoder;
11031104
mbfl_convert_filter decoder;
@@ -1277,10 +1278,16 @@ mbfl_strcut(
12771278
bk = _bk;
12781279
}
12791280

1280-
if (device.pos != 0) {
1281-
original_pos = device.pos;
1282-
(*encoder->filter_flush)(encoder);
1283-
device.pos = original_pos;
1281+
position = device.pos;
1282+
(*encoder->filter_flush)(encoder);
1283+
illegal_substchar = (unsigned char) encoder->illegal_substchar;
1284+
1285+
while(device.pos > position) {
1286+
/* check illegal output */
1287+
if (device.buffer[position++] == illegal_substchar) {
1288+
device.pos = position - 1;
1289+
break;
1290+
}
12841291
}
12851292

12861293
if (bk.decoder.filter_dtor)

ext/mbstring/tests/gh9535.phpt

Lines changed: 142 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,148 @@ GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in
44
mbstring
55
--FILE--
66
<?php
7+
$encodings = [
8+
'BASE64',
9+
'HTML-ENTITIES',
10+
'Quoted-Printable',
11+
'UTF-16',
12+
'UTF-16BE',
13+
'UTF-16LE',
14+
'UTF-7',
15+
'UTF7-IMAP',
16+
'JIS',
17+
'ISO-2022-JP',
18+
'ISO-2022-JP-MS',
19+
'GB18030',
20+
'HZ',
21+
'ISO-2022-KR',
22+
'ISO-2022-JP-2004',
23+
'ISO-2022-JP-MOBILE#KDDI',
24+
'CP50220',
25+
'CP50221',
26+
'CP50222',
27+
];
28+
29+
$input = '宛如繁星般宛如皎月般';
30+
$bytes_length = 15;
31+
foreach($encodings as $encoding) {
32+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
33+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
34+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
35+
echo $encoding.': '.$reconverted_str.PHP_EOL;
36+
}
37+
38+
echo PHP_EOL;
39+
40+
$input = '星のように月のように';
41+
$bytes_length = 20;
42+
foreach($encodings as $encoding) {
43+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
44+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
45+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
46+
echo $encoding.': '.$reconverted_str.PHP_EOL;
47+
}
48+
49+
echo PHP_EOL;
50+
751
$input = 'あaいb';
852
$bytes_length = 10;
9-
$encoding = "ISO-2022-JP";
10-
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
11-
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
12-
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
13-
var_dump($reconverted_str);
53+
foreach($encodings as $encoding) {
54+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
55+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
56+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
57+
echo $encoding.': '.$reconverted_str.PHP_EOL;
58+
}
59+
60+
echo PHP_EOL;
61+
62+
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
63+
$bytes_length = 10;
64+
foreach($encodings as $encoding) {
65+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
66+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
67+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
68+
echo $encoding.': '.$reconverted_str.PHP_EOL;
69+
}
70+
1471
?>
15-
--EXPECT--
16-
string(4) "あa"
72+
--EXPECTF--
73+
BASE64: 宛如繁
74+
HTML-ENTITIES: 宛&#22914
75+
Quoted-Printable: %s
76+
UTF-16: 宛如繁星般宛如
77+
UTF-16BE: 宛如繁星般宛如
78+
UTF-16LE: 宛如繁星般宛如
79+
UTF-7: 宛如繁星
80+
UTF7-IMAP: 宛如繁星
81+
JIS: 宛如繁星般
82+
ISO-2022-JP: 宛如繁星般
83+
ISO-2022-JP-MS: 宛如繁星
84+
GB18030: 宛如繁星般宛如
85+
HZ: 宛如繁星般
86+
ISO-2022-KR: 宛如繁星
87+
ISO-2022-JP-2004: 宛如繁星
88+
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
89+
CP50220: 宛如繁星
90+
CP50221: 宛如繁星
91+
CP50222: 宛如繁星
92+
93+
BASE64: 星のように
94+
HTML-ENTITIES: 星の&#12
95+
Quoted-Printable: 星の
96+
UTF-16: 星のように月のように
97+
UTF-16BE: 星のように月のように
98+
UTF-16LE: 星のように月のように
99+
UTF-7: 星のように月
100+
UTF7-IMAP: 星のように月
101+
JIS: 星のように月の
102+
ISO-2022-JP: 星のように月の
103+
ISO-2022-JP-MS: 星のように月の
104+
GB18030: 星のように月のように
105+
HZ: 星のように月のよ
106+
ISO-2022-KR: 星のように月の
107+
ISO-2022-JP-2004: 星のように月の
108+
ISO-2022-JP-MOBILE#KDDI: 星のように月の
109+
CP50220: 星のように月の
110+
CP50221: 星のように月の
111+
CP50222: 星のように月の
112+
113+
BASE64: %s
114+
HTML-ENTITIES: あa&
115+
Quoted-Printable: あa
116+
UTF-16: あaいb
117+
UTF-16BE: あaいb
118+
UTF-16LE: あaいb
119+
UTF-7: あa
120+
UTF7-IMAP: あa
121+
JIS: あa
122+
ISO-2022-JP: あa
123+
ISO-2022-JP-MS: あa
124+
GB18030: あaいb
125+
HZ: あa
126+
ISO-2022-KR: あa
127+
ISO-2022-JP-2004: あa
128+
ISO-2022-JP-MOBILE#KDDI: あa
129+
CP50220: あa
130+
CP50221: あa
131+
CP50222: あa
132+
133+
BASE64: AAAAAA
134+
HTML-ENTITIES: AAAAAAAAAA
135+
Quoted-Printable: AAAAAAAAAA
136+
UTF-16: AAAAA
137+
UTF-16BE: AAAAA
138+
UTF-16LE: AAAAA
139+
UTF-7: AAAAAAAAAA
140+
UTF7-IMAP: AAAAAAAAAA
141+
JIS: AAAAAAAAAA
142+
ISO-2022-JP: AAAAAAAAAA
143+
ISO-2022-JP-MS: AAAAAAAAAA
144+
GB18030: AAAAAAAAAA
145+
HZ: AAAAAAAAAA
146+
ISO-2022-KR: AAAAAAAAAA
147+
ISO-2022-JP-2004: AAAAAAAAAA
148+
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
149+
CP50220: AAAAAAAAAA
150+
CP50221: AAAAAAAAAA
151+
CP50222: AAAAAAAAAA

0 commit comments

Comments
 (0)