@@ -16,24 +16,25 @@ defmodule String.Unicode do
16
16
cluster_path = Path . join ( __DIR__ , "GraphemeBreakProperty.txt" )
17
17
regex = ~r/ (?:^([0-9A-F]+)(?:\. \. ([0-9A-F]+))?)\s +;\s (\w +)/ m
18
18
19
- cluster = Enum . reduce File . stream! ( cluster_path ) , % { } , fn line , acc ->
20
- case Regex . run ( regex , line , capture: :all_but_first ) do
21
- [ "D800" , "DFFF" , _class ] ->
22
- acc
23
-
24
- [ first , "" , class ] ->
25
- codepoint = << String . to_integer ( first , 16 ) :: utf8 >>
26
- Map . update ( acc , class , [ codepoint ] , & [ << String . to_integer ( first , 16 ) :: utf8 >> | & 1 ] )
27
-
28
- [ first , last , class ] ->
29
- range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
30
- codepoints = Enum . map ( range , fn int -> << int :: utf8 >> end )
31
- Map . update ( acc , class , codepoints , & ( codepoints ++ & 1 ) )
32
-
33
- nil ->
34
- acc
35
- end
36
- end
19
+ cluster =
20
+ Enum . reduce ( File . stream! ( cluster_path ) , % { } , fn line , acc ->
21
+ case Regex . run ( regex , line , capture: :all_but_first ) do
22
+ [ "D800" , "DFFF" , _class ] ->
23
+ acc
24
+
25
+ [ first , "" , class ] ->
26
+ codepoint = << String . to_integer ( first , 16 ) :: utf8 >>
27
+ Map . update ( acc , class , [ codepoint ] , & [ << String . to_integer ( first , 16 ) :: utf8 >> | & 1 ] )
28
+
29
+ [ first , last , class ] ->
30
+ range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
31
+ codepoints = Enum . map ( range , fn int -> << int :: utf8 >> end )
32
+ Map . update ( acc , class , codepoints , & ( codepoints ++ & 1 ) )
33
+
34
+ nil ->
35
+ acc
36
+ end
37
+ end )
37
38
38
39
# Don't break CRLF
39
40
def next_grapheme_size ( << ?\r , ?\n , rest :: binary >> ) do
@@ -69,7 +70,7 @@ defmodule String.Unicode do
69
70
end
70
71
71
72
# Handle Hangul V
72
- for codepoint <- cluster [ "LV" ] ++ cluster [ "V" ] do
73
+ for codepoint <- cluster [ "LV" ] ++ cluster [ "V" ] do
73
74
def next_grapheme_size ( << unquote ( codepoint ) , rest :: binary >> ) do
74
75
next_hangul_v_size ( rest , unquote ( byte_size ( codepoint ) ) )
75
76
end
@@ -102,7 +103,7 @@ defmodule String.Unicode do
102
103
x when x <= 0x007F -> next_extend_size ( rest , 1 , :other )
103
104
x when x <= 0x07FF -> next_extend_size ( rest , 2 , :other )
104
105
x when x <= 0xFFFF -> next_extend_size ( rest , 3 , :other )
105
- _ -> next_extend_size ( rest , 4 , :other )
106
+ _ -> next_extend_size ( rest , 4 , :other )
106
107
end
107
108
end
108
109
@@ -180,6 +181,7 @@ defmodule String.Unicode do
180
181
next_extend_size ( rest , size + unquote ( byte_size ( codepoint ) ) , :other )
181
182
end
182
183
end
184
+
183
185
defp next_regional_size ( rest , size ) do
184
186
next_extend_size ( rest , size , :other )
185
187
end
@@ -244,7 +246,7 @@ defmodule String.Unicode do
244
246
245
247
defp next_prepend_size ( rest , size ) do
246
248
case next_grapheme_size ( rest ) do
247
- { more , rest } -> { more + size , rest }
249
+ { more , rest } -> { more + size , rest }
248
250
nil -> { size , rest }
249
251
end
250
252
end
0 commit comments