fix loading chatglm3 (#1937)

* update welcome svg * update welcome message * fix loading chatglm3 --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com>
2024-08-19 23:32:45 +08:00
parent 5010537f3c
commit a95b3daab9
3 changed files with 32 additions and 22 deletions
--- a/request_llms/bridge_chatglm3.py
+++ b/request_llms/bridge_chatglm3.py
@@ -18,7 +18,7 @@ class GetGLM3Handle(LocalLLMHandle):

    def load_model_and_tokenizer(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        from transformers import AutoModel, AutoTokenizer
+        from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
        import os, glob
        import os
        import platform
@@ -45,15 +45,13 @@ class GetGLM3Handle(LocalLLMHandle):
                chatglm_model = AutoModel.from_pretrained(
                    pretrained_model_name_or_path=_model_name_,
                    trust_remote_code=True,
-                    device="cuda",
-                    load_in_4bit=True,
+                    quantization_config=BitsAndBytesConfig(load_in_4bit=True),
                )
            elif LOCAL_MODEL_QUANT == "INT8":  # INT8
                chatglm_model = AutoModel.from_pretrained(
                    pretrained_model_name_or_path=_model_name_,
                    trust_remote_code=True,
-                    device="cuda",
-                    load_in_8bit=True,
+                    quantization_config=BitsAndBytesConfig(load_in_8bit=True),
                )
            else:
                chatglm_model = AutoModel.from_pretrained(