Skip to content

Commit c417990

Browse files
committed
Merge branch 'PHP-8.2'
* PHP-8.2: Restore backwards-compatible mappings of U+005C and U+007E to SJIS-2004
2 parents 3ce888a + 9beb93f commit c417990

File tree

3 files changed

+11
-28
lines changed

3 files changed

+11
-28
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,15 @@
2424
/*
2525
* The source code included in this files was separated from mbfilter_sjis.c
2626
* by rui hirokawa <[email protected]> on 15 aug 2011.
27-
*
2827
*/
2928

29+
/* Although the specification for Shift-JIS-2004 indicates that 0x5C and
30+
* 0x7E should (respectively) represent a Yen sign and an overbar, feedback
31+
* from Japanese PHP users indicates that they prefer 0x5C and 0x7E to be
32+
* treated as equivalent to U+005C and U+007E. This is the historical
33+
* behavior of mbstring, and promotes compatibility with other software
34+
* which handles Shift-JIS and Shift-JIS-2004 text in this way. */
35+
3036
#include "mbfilter.h"
3137
#include "mbfilter_sjis_2004.h"
3238
#include "mbfilter_euc_jp_2004.h"
@@ -565,13 +571,6 @@ int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter)
565571
}
566572
}
567573

568-
if (s1 <= 0 && (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 || filter->to->no_encoding == mbfl_no_encoding_eucjp2004) && (c == 0x5C || c == 0x7E)) {
569-
/* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need
570-
* to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde
571-
* Likewise for EUC-JP-2004 */
572-
s1 = c;
573-
}
574-
575574
/* check for major japanese chars: U+4E00 - U+9FFF */
576575
if (s1 <= 0) {
577576
for (k = 0; k < uni2jis_tbl_len; k++) {
@@ -1077,12 +1076,6 @@ process_codepoint: ;
10771076
}
10781077
}
10791078

1080-
if (!s && (w == 0x5C || w == 0x7E)) {
1081-
/* EUC-JP-2004 can represent ASCII characters directly, so there is no need
1082-
* to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */
1083-
s = w;
1084-
}
1085-
10861079
/* Check for major Japanese chars: U+4E00-U+9FFF */
10871080
if (!s) {
10881081
for (int k = 0; k < uni2jis_tbl_len; k++) {
@@ -1336,12 +1329,6 @@ process_codepoint: ;
13361329
}
13371330
}
13381331

1339-
if (!s && (w == 0x5C || w == 0x7E)) {
1340-
/* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need
1341-
* to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */
1342-
s = w;
1343-
}
1344-
13451332
/* Check for major Japanese chars: U+4E00-U+9FFF */
13461333
if (!s) {
13471334
for (int k = 0; k < uni2jis_tbl_len; k++) {

ext/mbstring/libmbfl/filters/unicode_table_jis2004.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,11 +1608,11 @@ static const unsigned short ucs_a1_jisx0213_table[] = { // 0x0000 - 0x045f
16081608
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
16091609
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
16101610
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
1611-
0x0058,0x0059,0x005A,0x005B,0x2140,0x005D,0x005E,0x005F,
1611+
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
16121612
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
16131613
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
16141614
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
1615-
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x2141,0x007F,
1615+
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
16161616
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
16171617
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
16181618
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,

ext/mbstring/tests/sjis2004_encoding.phpt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,8 @@ while ($line = fgets($fp, 256)) {
3838
}
3939
}
4040

41-
/* U+007E is TILDE, Shift-JIS 0x8160 is WAVE DASH */
42-
$fromUnicode["\x00\x7E"] = "\x81\x60";
43-
44-
/* U+005C is backslash, Shift-JIS 0x815F is REVERSE SOLIDUS
45-
* (ie. a fancy way to say "backslash") */
46-
$fromUnicode["\x00\x5C"] = "\x81\x5F";
41+
$fromUnicode["\x00\x7E"] = "\x7E";
42+
$fromUnicode["\x00\x5C"] = "\x5C";
4743

4844
testAllValidChars($validChars, 'SJIS-2004', 'UTF-32BE');
4945
echo "SJIS-2004 verification and conversion works for all valid characters\n";

0 commit comments

Comments
 (0)