From 07a7412a3b7518a55bf6fe191beb754a1dd2a561 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Mon, 2 Feb 2026 20:59:06 +0100 Subject: [PATCH] mtmd: add min/max pixels gguf metadata (#19273) --- gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 6 ++++++ tools/mtmd/clip-impl.h | 2 ++ 3 files changed, 10 insertions(+) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 31273b2b5a..6f56d36c59 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -284,6 +284,8 @@ class Keys: class ClipVision: PROJECTOR_TYPE = "clip.vision.projector_type" # for mixed modality models IMAGE_SIZE = "clip.vision.image_size" + IMAGE_MIN_PIXELS = "clip.vision.image_min_pixels" + IMAGE_MAX_PIXELS = "clip.vision.image_max_pixels" PREPROC_IMAGE_SIZE = "clip.vision.preproc_image_size" PATCH_SIZE = "clip.vision.patch_size" EMBEDDING_LENGTH = "clip.vision.embedding_length" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 7fbb78866b..0b9c650161 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -1113,6 +1113,12 @@ class GGUFWriter: def add_vision_image_size(self, value: int) -> None: self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value) + def add_vision_max_pixels(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.IMAGE_MAX_PIXELS, value) + + def add_vision_min_pixels(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.IMAGE_MIN_PIXELS, value) + def add_vision_preproc_image_size(self, value: int) -> None: self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value) diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index dd693623a2..ad232178bf 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -36,6 +36,8 @@ // vision-specific #define KEY_VISION_PROJ_TYPE "clip.vision.projector_type" // for models with mixed modalities #define KEY_IMAGE_SIZE "clip.vision.image_size" +#define KEY_IMAGE_MIN_PIXELS "clip.vision.image_min_pixels" +#define KEY_IMAGE_MAX_PIXELS "clip.vision.image_max_pixels" #define KEY_PREPROC_IMAGE_SIZE "clip.vision.preproc_image_size" #define KEY_PATCH_SIZE "clip.vision.patch_size" #define KEY_IMAGE_MEAN "clip.vision.image_mean"