merge resolved
- fixed issues in convert - tested several deepseek models
This commit is contained in:
parent
ded92076a8
commit
a94c241751
|
|
@ -7519,9 +7519,6 @@ class DeepseekV2Model(TextModel):
|
|||
first_k_dense_replace = hparams["num_hidden_layers"] if not has_moe else 0
|
||||
self.gguf_writer.add_leading_dense_block_count(first_k_dense_replace)
|
||||
kv_lora_rank = hparams["kv_lora_rank"] if hparams.get("kv_lora_rank") is not None else 512
|
||||
routed_scaling_factor = hparams.get("routed_scaling_factor", 1.0)
|
||||
norm_topk_prob = hparams.get("norm_topk_prob", False)
|
||||
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
|
||||
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
|
||||
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
|
||||
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
|
||||
|
|
@ -7534,7 +7531,6 @@ class DeepseekV2Model(TextModel):
|
|||
self.gguf_writer.add_value_length(kv_lora_rank)
|
||||
self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
|
||||
self.gguf_writer.add_value_length_mla(hparams["v_head_dim"])
|
||||
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
||||
|
||||
# MoE parameters (required by C++ code for DEEPSEEK2 arch)
|
||||
# For non-MoE models like Youtu, use intermediate_size as expert_feed_forward_length
|
||||
|
|
@ -7554,11 +7550,6 @@ class DeepseekV2Model(TextModel):
|
|||
|
||||
if (norm_topk_prob := hparams.get("norm_topk_prob")) is not None and norm_topk_prob:
|
||||
self.gguf_writer.add_expert_weights_norm(norm_topk_prob)
|
||||
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
|
||||
self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
|
||||
self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
|
||||
self.gguf_writer.add_expert_weights_scale(routed_scaling_factor)
|
||||
self.gguf_writer.add_expert_weights_norm(norm_topk_prob)
|
||||
|
||||
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
||||
|
||||
|
|
|
|||
|
|
@ -1312,7 +1312,7 @@ class TensorNameMap:
|
|||
"vision_model.positional_embedding_vlm", # llama 4
|
||||
"vision_tower.patch_embed.pos_emb", # kimi-vl
|
||||
"visual.pos_embed", # qwen3vl
|
||||
"model.vision.patch_embedding.position_embedding", # cogvlm
|
||||
"model.vision.patch_embedding.position_embedding", # cogvlm
|
||||
"visual.embeddings.position_embedding", # glm4v
|
||||
),
|
||||
|
||||
|
|
|
|||
|
|
@ -4917,7 +4917,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
case LLM_ARCH_DEEPSEEK2OCR:
|
||||
{
|
||||
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||
const bool is_ocr = (arch == LLM_ARCH_DEEPSEEK2OCR);
|
||||
|
||||
const bool is_mla = hparams.is_mla();
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
|
||||
llm_graph_context(params) {
|
||||
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||
bool is_ocr = (model.arch == LLM_ARCH_DEEPSEEK2OCR);
|
||||
bool is_ocr = model.arch == LLM_ARCH_DEEPSEEK2OCR;
|
||||
|
||||
const bool is_mla = hparams.is_mla();
|
||||
|
||||
|
|
@ -83,7 +82,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
|
|||
cb(Qcur, "q_pe", il);
|
||||
cb(Kcur, "k_pe", il);
|
||||
|
||||
cur = build_attn(inp_attn,
|
||||
cur = build_attn(inp_attn_kv,
|
||||
model.layers[il].wo, NULL,
|
||||
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
|
||||
cb(cur, "attn_out", il);
|
||||
|
|
|
|||
|
|
@ -4180,37 +4180,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
return false;
|
||||
}
|
||||
|
||||
// print debug nodes
|
||||
if (ctx->debug_graph) {
|
||||
LOG_INF("\n\n---\n\n");
|
||||
LOG_INF("\n\nDebug graph:\n\n");
|
||||
for (ggml_tensor * t : ctx->debug_print_tensors) {
|
||||
std::vector<uint8_t> data(ggml_nbytes(t));
|
||||
ggml_backend_tensor_get(t, data.data(), 0, ggml_nbytes(t));
|
||||
print_tensor_info(t);
|
||||
print_tensor_shape(t);
|
||||
print_tensor_sum(t, data.data(), 3);
|
||||
std::string tname_s = std::string(t->name);
|
||||
|
||||
bool is_stored = false;
|
||||
std::vector<std::string> patterns = {
|
||||
/* Add tensor names here to dump (e.g. "sam_output") */
|
||||
};
|
||||
|
||||
for (auto & p : patterns) {
|
||||
if (tname_s == p) {
|
||||
save_tensor_to_file(t, data.data());
|
||||
is_stored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_stored) {
|
||||
print_tensor_data(t, data.data(), 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the last node is the embedding tensor
|
||||
ggml_tensor * embeddings = ggml_graph_node(gf, -1);
|
||||
|
||||
|
|
|
|||
|
|
@ -83,9 +83,9 @@ def read_expected_output(file_path: str) -> str:
|
|||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Compare llama.cpp and HuggingFace DeepSeek-OCR outputs")
|
||||
ap.add_argument("--llama-model", default="gguf_models/deepseek-ai/deepseek-ocr-f16.gguf",
|
||||
ap.add_argument("--llama-model", default="gguf_models/deepseek-ai/deepseek-ocr-q8_0_test.gguf",
|
||||
help="Path to llama.cpp GGUF model")
|
||||
ap.add_argument("--mmproj", default="gguf_models/deepseek-ai/mmproj-deepseek-ocr-f16.gguf",
|
||||
ap.add_argument("--mmproj", default="gguf_models/deepseek-ai/mmproj-deepseek-ocr-f16_test.gguf",
|
||||
help="Path to mmproj GGUF file")
|
||||
ap.add_argument("--image", default="test-1.jpeg",
|
||||
help="Path to test image")
|
||||
|
|
|
|||
Loading…
Reference in New Issue