Compare commits

...

2 Commits

Author SHA1 Message Date
Christian Zhou-Zheng c8ecbc67e2 oops, actually fix gguf_writer placement 2024-06-03 19:34:37 -04:00
Christian Zhou-Zheng efead0408c fix gguf_writer placement and remove comments 2024-06-03 19:34:01 -04:00
3 changed files with 11 additions and 14 deletions

View File

@ -81,7 +81,14 @@ models = [
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
<<<<<<< Updated upstream
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
=======
{"name": "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom-7b1", },
{"name": "gptbigcode", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/gpt_bigcode-santacoder", },
{"name": "phi2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B-Chat", },
>>>>>>> Stashed changes
]

View File

@ -56,11 +56,11 @@ class Model:
part_names: list[str]
is_safetensors: bool
hparams: dict[str, Any]
gguf_writer: gguf.GGUFManager
block_count: int
tensor_map: gguf.TensorNameMap
tensor_names: set[str] | None
fname_out: Path
gguf_writer: gguf.GGUFManager
# subclasses should define this!
model_arch: gguf.MODEL_ARCH
@ -82,8 +82,6 @@ class Model:
self.part_names = Model.get_model_part_names(self.dir_model, ".bin")
self.hparams = Model.load_hparams(self.dir_model)
self.gguf_writer = gguf.GGUFManager(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], split_arguments,
endianess=self.endianess, use_temp_file=self.use_temp_file)
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
self.tensor_names = None
@ -100,6 +98,8 @@ class Model:
ftype_lw: str = ftype_up.lower()
# allow templating the file name with the output ftype, useful with the "auto" ftype
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
self.gguf_writer = gguf.GGUFManager(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], split_arguments,
endianess=self.endianess, use_temp_file=self.use_temp_file)
@classmethod
def __init_subclass__(cls):

View File

@ -305,20 +305,10 @@ class GGUFManager:
tensor.byteswap(inplace=True)
# TODO reimplement temp file
#if self.use_temp_file and self.temp_file is None:
# fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
# fp.seek(0)
# self.temp_file = fp
# I'm pretty sure it gets handled per shard?
self.tensors.append((name, tensor, raw_dtype))
#if self.temp_file is None:
# self.tensors.append(tensor)
# return
#tensor.tofile(self.temp_file)
#self.write_padding(self.temp_file, tensor.nbytes)
def close(self) -> None:
for _, _, writer in self.split_strategy:
writer.close()