model-conversion : add trust_remote_code for embedding scripts (#18288)

This commit adds the trust_remote_code=True parameter when loading models and configurations in the embedding model conversion scripts. It also adds a cast to float for models that might use a data type that is not supported by python, for example bfloat16. The motivation for this is that some models may require custom code to be executed during loading, and setting trust_remote_code to True avoids getting prompted for confirmation. Future work will consolidate the embedding conversion scripts with the causal conversion scripts to avoid code duplication. But in the mean time it would be nice to have this fix in place.
2025-12-23 07:27:37 +01:00 · 2025-12-23 07:27:37 +01:00 · 847c35f7d5
parent a6a552e4ec
commit 847c35f7d5
2 changed files with 7 additions and 7 deletions
--- a/examples/model-conversion/scripts/embedding/run-original-model.py
+++ b/examples/model-conversion/scripts/embedding/run-original-model.py
@ -45,7 +45,7 @@ if use_sentence_transformers:
 else:
    tokenizer = AutoTokenizer.from_pretrained(model_path)

-    config = AutoConfig.from_pretrained(model_path)
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)

    # This can be used to override the sliding window size for manual testing. This
    # can be useful to verify the sliding window attention mask in the original model
@ -64,12 +64,12 @@ else:

        try:
            model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(model_path, config=config)
+            model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True)
        except (ImportError, AttributeError) as e:
            print(f"Failed to import or load model: {e}")
            exit(1)
    else:
-        model = AutoModel.from_pretrained(model_path, config=config)
+        model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
    print(f"Model class: {type(model)}")
    print(f"Model file: {type(model).__module__}")

@ -123,7 +123,7 @@ with torch.no_grad():
        outputs = model(**encoded)
        hidden_states = outputs.last_hidden_state  # Shape: [batch_size, seq_len, hidden_size]

-        all_embeddings = hidden_states[0].cpu().numpy()  # Shape: [seq_len, hidden_size]
+        all_embeddings = hidden_states[0].float().cpu().numpy()  # Shape: [seq_len, hidden_size]

        print(f"Hidden states shape: {hidden_states.shape}")
        print(f"All embeddings shape: {all_embeddings.shape}")
--- a/examples/model-conversion/scripts/utils/semantic_check.py
+++ b/examples/model-conversion/scripts/utils/semantic_check.py
@ -166,7 +166,7 @@ def main():
    # Load the python model to get configuration information and also to load the tokenizer.
    print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
    tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    config = AutoConfig.from_pretrained(args.model_path)
+    config = AutoConfig.from_pretrained(args.model_path, trust_remote_code=True)

    if unreleased_model_name:
        model_name_lower = unreleased_model_name.lower()
@ -186,9 +186,9 @@ def main():
            exit(1)
    else:
        if args.causal:
-            model = AutoModelForCausalLM.from_pretrained(args.model_path)
+            model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True)
        else:
-            model = AutoModel.from_pretrained(args.model_path)
+            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)

    encoded = tokenizer(prompt, return_tensors="pt")
    tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])