@@ -686,9 +686,54 @@ def recurse_elems(elem: Any):
686
686
config_dict ["hidden_act" ] = config_dict .get ("activation" , "silu" )
687
687
config_dict ["tie_word_embeddings" ] = config_dict .get (
688
688
"tie_embeddings" , False )
689
- config_dict ["max_seq_len" ] = config_dict .get ("max_seq_len" , 128_000 )
690
- config_dict ["max_position_embeddings" ] = config_dict .get (
691
- "max_position_embeddings" , 128_000 )
689
+ # Check if max_position_embeddings is in params.json
690
+ mpe_from_params = config_dict .get ("max_position_embeddings" )
691
+ final_mpe_to_set = mpe_from_params
692
+
693
+ if final_mpe_to_set is None :
694
+ # Not found in params.json, try to get from standard HF AutoConfig
695
+ hf_config_for_defaults = None
696
+ try :
697
+ trust_remote_code_val = kwargs .get ("trust_remote_code" , False )
698
+ token_val = kwargs .get ("token" ) # Passed from get_config
699
+
700
+ hf_config_for_defaults = AutoConfig .from_pretrained (
701
+ model ,
702
+ revision = revision ,
703
+ trust_remote_code = trust_remote_code_val ,
704
+ token = token_val )
705
+ except Exception as e :
706
+ error_message = (
707
+ "Invalid repository ID or local directory specified:"
708
+ " '{model}'.\n Please verify the following requirements:\n "
709
+ "1. Provide a valid Hugging Face repository ID.\n "
710
+ "2. Specify a local directory that contains a recognized "
711
+ "configuration file.\n " ).format (model = model )
712
+
713
+ raise ValueError (error_message ) from e
714
+
715
+ if hf_config_for_defaults :
716
+ # Try to get from text_config first, then top-level
717
+ mpe_from_hf_config = None
718
+ text_config_obj = getattr (hf_config_for_defaults , "text_config" ,
719
+ None )
720
+ if text_config_obj and hasattr (text_config_obj ,
721
+ "max_position_embeddings" ):
722
+ mpe_from_hf_config = getattr (text_config_obj ,
723
+ "max_position_embeddings" , None )
724
+
725
+ if mpe_from_hf_config is None and hasattr (
726
+ hf_config_for_defaults , "max_position_embeddings" ):
727
+ mpe_from_hf_config = getattr (hf_config_for_defaults ,
728
+ "max_position_embeddings" , None )
729
+
730
+ if mpe_from_hf_config is not None :
731
+ final_mpe_to_set = mpe_from_hf_config
732
+
733
+ if final_mpe_to_set is None : # Still not found, use ultimate fallback
734
+ final_mpe_to_set = 128_000
735
+
736
+ config_dict ["max_position_embeddings" ] = final_mpe_to_set
692
737
693
738
if config_dict .get ("quantization" ) is not None :
694
739
quantization = config_dict .get ("quantization" , {})
0 commit comments