fix loading chatglm3 (#1937)

* update welcome svg

* update welcome message

* fix loading chatglm3

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com>
This commit is contained in:
moetayuko
2024-08-19 23:32:45 +08:00
committed by GitHub
parent 5010537f3c
commit a95b3daab9
3 changed files with 32 additions and 22 deletions

View File

@@ -18,7 +18,7 @@ class GetGLM3Handle(LocalLLMHandle):
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
from transformers import AutoModel, AutoTokenizer
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
import os, glob
import os
import platform
@@ -45,15 +45,13 @@ class GetGLM3Handle(LocalLLMHandle):
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_4bit=True,
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
)
elif LOCAL_MODEL_QUANT == "INT8": # INT8
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_8bit=True,
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
else:
chatglm_model = AutoModel.from_pretrained(