From df7fb92170f1c6ed08bf0943d6d8bf1191543a95 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 8 Jan 2026 09:29:15 +0100 Subject: [PATCH] model-conversion : remove -st targets for converted model (#18689) This commit removes the '-st` make target for running the converted embedding model. The motivation for this is that the pooling type is now part of the .gguf metdata of the model and this is used by llama-debug when running the model. So there is no need to specify the pooling type separately any more. The commit also adds an option to specify the type of normalization applied to the output embeddings when running the converted model. And the readme documentation has been updated to reflect these changes. --- examples/model-conversion/Makefile | 7 ++----- examples/model-conversion/README.md | 18 ++++++++++++++---- .../scripts/embedding/run-converted-model.sh | 14 +++++--------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/examples/model-conversion/Makefile b/examples/model-conversion/Makefile index f8dc525a77..359b9cfd8e 100644 --- a/examples/model-conversion/Makefile +++ b/examples/model-conversion/Makefile @@ -138,16 +138,13 @@ embedding-run-original-model-st: embedding-run-original-model embedding-run-converted-model: @./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \ - $(if $(USE_POOLING),--pooling) - -embedding-run-converted-model-st: USE_POOLING=1 -embedding-run-converted-model-st: embedding-run-converted-model + $(if $(EMBD_NORMALIZE),--embd-normalize "$(EMBD_NORMALIZE)") embedding-verify-logits: embedding-run-original-model embedding-run-converted-model @./scripts/embedding/compare-embeddings-logits.sh \ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") -embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model-st +embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model @./scripts/embedding/compare-embeddings-logits.sh \ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") diff --git a/examples/model-conversion/README.md b/examples/model-conversion/README.md index 8163b306b4..637870a5c1 100644 --- a/examples/model-conversion/README.md +++ b/examples/model-conversion/README.md @@ -198,14 +198,13 @@ model, and the other is a text file which allows for manual visual inspection. #### Using SentenceTransformer with numbered layers For models that have numbered SentenceTransformer layers (01_Pooling, 02_Dense, -03_Dense, 04_Normalize), use the `-st` targets to apply all these layers: +03_Dense, 04_Normalize), these will be applied automatically when running the +converted model but currently there is a separate target to run the original +version: ```console # Run original model with SentenceTransformer (applies all numbered layers) (venv) $ make embedding-run-original-model-st - -# Run converted model with pooling enabled -(venv) $ make embedding-run-converted-model-st ``` This will use the SentenceTransformer library to load and run the model, which @@ -213,6 +212,17 @@ automatically applies all the numbered layers in the correct order. This is particularly useful when comparing with models that should include these additional transformation layers beyond just the base model output. +The type of normalization can be specified for the converted model but is not +strictly necessary as the verification uses cosine similarity and the magnitude +of the output vectors does not affect this. But the normalization type can be +specified as an argument to the target which might be useful for manual +inspection: +```console +(venv) $ make embedding-verify-logits-st EMBD_NORMALIZE=1 +``` +The original model will apply the normalization according to the normalization +layer specified in the modules.json configuration file. + ### Model conversion After updates have been made to [gguf-py](../../gguf-py) to add support for the new model the model can be converted to GGUF format using the following command: diff --git a/examples/model-conversion/scripts/embedding/run-converted-model.sh b/examples/model-conversion/scripts/embedding/run-converted-model.sh index 5d264b0663..84625cec3d 100755 --- a/examples/model-conversion/scripts/embedding/run-converted-model.sh +++ b/examples/model-conversion/scripts/embedding/run-converted-model.sh @@ -5,7 +5,7 @@ set -e # Parse command line arguments CONVERTED_MODEL="" PROMPTS_FILE="" -USE_POOLING="" +EMBD_NORMALIZE="2" while [[ $# -gt 0 ]]; do case $1 in @@ -13,9 +13,9 @@ while [[ $# -gt 0 ]]; do PROMPTS_FILE="$2" shift 2 ;; - --pooling) - USE_POOLING="1" - shift + --embd-normalize) + EMBD_NORMALIZE="$2" + shift 2 ;; *) if [ -z "$CONVERTED_MODEL" ]; then @@ -51,8 +51,4 @@ fi echo $CONVERTED_MODEL cmake --build ../../build --target llama-debug -j8 -if [ -n "$USE_POOLING" ]; then - ../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding --pooling mean -p "$PROMPT" --save-logits -else - ../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding --pooling none -p "$PROMPT" --save-logits -fi +../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding -p "$PROMPT" --save-logits --embd-normalize $EMBD_NORMALIZE