visual_model warmup (technically) works
This commit is contained in:
parent
63a042f21e
commit
89afda8da9
|
|
@ -5412,6 +5412,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_JANUS_PRO:
|
||||
case PROJECTOR_TYPE_COGVLM:
|
||||
case PROJECTOR_TYPE_DEEPSEEKOCR:
|
||||
{
|
||||
// do nothing
|
||||
} break;
|
||||
|
|
@ -5554,6 +5555,10 @@ bool clip_is_gemma3(const struct clip_ctx * ctx) {
|
|||
return ctx->proj_type() == PROJECTOR_TYPE_GEMMA3;
|
||||
}
|
||||
|
||||
bool clip_is_deepseekocr(const struct clip_ctx * ctx) {
|
||||
return ctx->proj_type() == PROJECTOR_TYPE_DEEPSEEKOCR;
|
||||
}
|
||||
|
||||
bool clip_has_vision_encoder(const struct clip_ctx * ctx) {
|
||||
return ctx->model.modality == CLIP_MODALITY_VISION;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,6 +105,8 @@ bool clip_is_glm(const struct clip_ctx * ctx);
|
|||
bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
||||
bool clip_is_llava(const struct clip_ctx * ctx);
|
||||
bool clip_is_gemma3(const struct clip_ctx * ctx);
|
||||
bool clip_is_deepseekocr(const struct clip_ctx * ctx);
|
||||
|
||||
|
||||
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
||||
|
||||
|
|
|
|||
|
|
@ -810,7 +810,8 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
|
|||
|
||||
if (clip_is_llava(ctx_clip)
|
||||
|| clip_is_minicpmv(ctx_clip)
|
||||
|| clip_is_glm(ctx_clip)) {
|
||||
|| clip_is_glm(ctx_clip)
|
||||
|| clip_is_deepseekocr(ctx_clip)) {
|
||||
// TODO @ngxson : llava does not support batched encoding ; this should be fixed inside clip_image_batch_encode()
|
||||
const auto & entries = image_tokens->batch_f32.entries;
|
||||
for (size_t i = 0; i < entries.size(); i++) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue