@@ -152,6 +152,8 @@ class Tokenizer:
152
152
MERGES = "tokenizer.ggml.merges"
153
153
BOS_ID = "tokenizer.ggml.bos_token_id"
154
154
EOS_ID = "tokenizer.ggml.eos_token_id"
155
+ EOT_ID = "tokenizer.ggml.eot_token_id"
156
+ EOM_ID = "tokenizer.ggml.eom_token_id"
155
157
UNK_ID = "tokenizer.ggml.unknown_token_id"
156
158
SEP_ID = "tokenizer.ggml.seperator_token_id"
157
159
PAD_ID = "tokenizer.ggml.padding_token_id"
@@ -168,11 +170,16 @@ class Tokenizer:
168
170
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
169
171
CHAT_TEMPLATES = "tokenizer.chat_templates"
170
172
# FIM/Infill special tokens constants
173
+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
174
+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
175
+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
176
+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
177
+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
178
+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
179
+ # deprecated:
171
180
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
172
181
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
173
182
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
174
- EOT_ID = "tokenizer.ggml.eot_token_id"
175
- EOM_ID = "tokenizer.ggml.eom_token_id"
176
183
177
184
class Adapter :
178
185
TYPE = "adapter.type"
@@ -1579,15 +1586,24 @@ def get_type(val: Any) -> GGUFValueType:
1579
1586
KEY_TOKENIZER_MERGES = Keys .Tokenizer .MERGES
1580
1587
KEY_TOKENIZER_BOS_ID = Keys .Tokenizer .BOS_ID
1581
1588
KEY_TOKENIZER_EOS_ID = Keys .Tokenizer .EOS_ID
1589
+ KEY_TOKENIZER_EOT_ID = Keys .Tokenizer .EOT_ID
1590
+ KEY_TOKENIZER_EOM_ID = Keys .Tokenizer .EOM_ID
1582
1591
KEY_TOKENIZER_UNK_ID = Keys .Tokenizer .UNK_ID
1583
1592
KEY_TOKENIZER_SEP_ID = Keys .Tokenizer .SEP_ID
1584
1593
KEY_TOKENIZER_PAD_ID = Keys .Tokenizer .PAD_ID
1585
1594
KEY_TOKENIZER_CLS_ID = Keys .Tokenizer .CLS_ID
1586
1595
KEY_TOKENIZER_MASK_ID = Keys .Tokenizer .MASK_ID
1587
1596
KEY_TOKENIZER_HF_JSON = Keys .Tokenizer .HF_JSON
1588
1597
KEY_TOKENIZER_RWKV = Keys .Tokenizer .RWKV
1589
- KEY_TOKENIZER_PRIFIX_ID = Keys .Tokenizer .PREFIX_ID
1598
+
1599
+ KEY_TOKENIZER_FIM_PRE_ID = Keys .Tokenizer .FIM_PRE_ID
1600
+ KEY_TOKENIZER_FIM_SUF_ID = Keys .Tokenizer .FIM_SUF_ID
1601
+ KEY_TOKENIZER_FIM_MID_ID = Keys .Tokenizer .FIM_MID_ID
1602
+ KEY_TOKENIZER_FIM_PAD_ID = Keys .Tokenizer .FIM_PAD_ID
1603
+ KEY_TOKENIZER_FIM_REP_ID = Keys .Tokenizer .FIM_REP_ID
1604
+ KEY_TOKENIZER_FIM_SEP_ID = Keys .Tokenizer .FIM_SEP_ID
1605
+
1606
+ # deprecated
1607
+ KEY_TOKENIZER_PREFIX_ID = Keys .Tokenizer .PREFIX_ID
1590
1608
KEY_TOKENIZER_SUFFIX_ID = Keys .Tokenizer .SUFFIX_ID
1591
1609
KEY_TOKENIZER_MIDDLE_ID = Keys .Tokenizer .MIDDLE_ID
1592
- KEY_TOKENIZER_EOT_ID = Keys .Tokenizer .EOT_ID
1593
- KEY_TOKENIZER_EOM_ID = Keys .Tokenizer .EOM_ID
0 commit comments