add support for Deepseek R1 model and display CoT (#2118)
* feat: add support for R1 model and display CoT * fix unpacking * feat: customized font & font size * auto hide tooltip when scoll down * tooltip glass transparent css * fix: Enhance API key validation in is_any_api_key function (#2113) * support qwen2.5-max! * update minior adjustment --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: Steven Moder <java20131114@gmail.com>
This commit is contained in:
@@ -13,6 +13,9 @@ API_KEY = "在此处填写APIKEY" # 可同时填写多个API-KEY,用英文
|
||||
# [step 1-2]>> ( 接入通义 qwen-max ) 接入通义千问在线大模型,api-key获取地址 https://dashscope.console.aliyun.com/
|
||||
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
|
||||
|
||||
# [step 1-3]>> ( 接入通义 deepseek-reasoner ) 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
|
||||
DEEPSEEK_API_KEY = ""
|
||||
|
||||
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改
|
||||
USE_PROXY = False
|
||||
if USE_PROXY:
|
||||
@@ -39,7 +42,8 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024-
|
||||
"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
|
||||
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
|
||||
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
|
||||
"gemini-1.5-pro", "chatglm3", "chatglm4"
|
||||
"gemini-1.5-pro", "chatglm3", "chatglm4",
|
||||
"deepseek-chat", "deepseek-coder", "deepseek-reasoner"
|
||||
]
|
||||
|
||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
@@ -261,9 +265,6 @@ MOONSHOT_API_KEY = ""
|
||||
# 零一万物(Yi Model) API KEY
|
||||
YIMODEL_API_KEY = ""
|
||||
|
||||
# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
|
||||
DEEPSEEK_API_KEY = ""
|
||||
|
||||
|
||||
# 紫东太初大模型 https://ai-maas.wair.ac.cn
|
||||
TAICHU_API_KEY = ""
|
||||
|
||||
@@ -1090,7 +1090,7 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
|
||||
except:
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
|
||||
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
|
||||
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
deepseekapi_noui, deepseekapi_ui = get_predict_function(
|
||||
api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
|
||||
@@ -1101,7 +1101,7 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
|
||||
"fn_without_ui": deepseekapi_noui,
|
||||
"endpoint": deepseekapi_endpoint,
|
||||
"can_multi_thread": True,
|
||||
"max_token": 32000,
|
||||
"max_token": 64000,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
@@ -1114,6 +1114,16 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
"deepseek-reasoner":{
|
||||
"fn_with_ui": deepseekapi_ui,
|
||||
"fn_without_ui": deepseekapi_noui,
|
||||
"endpoint": deepseekapi_endpoint,
|
||||
"can_multi_thread": True,
|
||||
"max_token": 64000,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
"enable_reasoning": True
|
||||
},
|
||||
})
|
||||
except:
|
||||
logger.error(trimmed_format_exc())
|
||||
|
||||
@@ -36,10 +36,11 @@ def get_full_error(chunk, stream_response):
|
||||
|
||||
def decode_chunk(chunk):
|
||||
"""
|
||||
用于解读"content"和"finish_reason"的内容
|
||||
用于解读"content"和"finish_reason"的内容(如果支持思维链也会返回"reasoning_content"内容)
|
||||
"""
|
||||
chunk = chunk.decode()
|
||||
respose = ""
|
||||
reasoning_content = ""
|
||||
finish_reason = "False"
|
||||
try:
|
||||
chunk = json.loads(chunk[6:])
|
||||
@@ -57,14 +58,20 @@ def decode_chunk(chunk):
|
||||
return respose, finish_reason
|
||||
|
||||
try:
|
||||
if chunk["choices"][0]["delta"]["content"] is not None:
|
||||
respose = chunk["choices"][0]["delta"]["content"]
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if chunk["choices"][0]["delta"]["reasoning_content"] is not None:
|
||||
reasoning_content = chunk["choices"][0]["delta"]["reasoning_content"]
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
finish_reason = chunk["choices"][0]["finish_reason"]
|
||||
except:
|
||||
pass
|
||||
return respose, finish_reason
|
||||
return respose, reasoning_content, finish_reason
|
||||
|
||||
|
||||
def generate_message(input, model, key, history, max_output_token, system_prompt, temperature):
|
||||
@@ -149,6 +156,7 @@ def get_predict_function(
|
||||
observe_window = None:
|
||||
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
||||
"""
|
||||
from .bridge_all import model_info
|
||||
watch_dog_patience = 5 # 看门狗的耐心,设置5秒不准咬人(咬的也不是人
|
||||
if len(APIKEY) == 0:
|
||||
raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
|
||||
@@ -163,25 +171,17 @@ def get_predict_function(
|
||||
system_prompt=sys_prompt,
|
||||
temperature=llm_kwargs["temperature"],
|
||||
)
|
||||
|
||||
reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)
|
||||
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
from .bridge_all import model_info
|
||||
|
||||
endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
|
||||
if not disable_proxy:
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
json=playload,
|
||||
stream=True,
|
||||
timeout=TIMEOUT_SECONDS,
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
proxies=None if disable_proxy else proxies,
|
||||
json=playload,
|
||||
stream=True,
|
||||
timeout=TIMEOUT_SECONDS,
|
||||
@@ -195,9 +195,12 @@ def get_predict_function(
|
||||
if MAX_RETRY != 0:
|
||||
logger.error(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……")
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ""
|
||||
finish_reason = ""
|
||||
if reasoning:
|
||||
resoning_buffer = ""
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
while True:
|
||||
try:
|
||||
chunk = next(stream_response)
|
||||
@@ -207,9 +210,9 @@ def get_predict_function(
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
|
||||
response_text, finish_reason = decode_chunk(chunk)
|
||||
response_text, reasoning_content, finish_reason = decode_chunk(chunk)
|
||||
# 返回的数据流第一次为空,继续等待
|
||||
if response_text == "" and finish_reason != "False":
|
||||
if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
|
||||
continue
|
||||
if response_text == "API_ERROR" and (
|
||||
finish_reason != "False" or finish_reason != "stop"
|
||||
@@ -227,6 +230,8 @@ def get_predict_function(
|
||||
print(f"[response] {result}")
|
||||
break
|
||||
result += response_text
|
||||
if reasoning:
|
||||
resoning_buffer += reasoning_content
|
||||
if observe_window is not None:
|
||||
# 观测窗,把已经获取的数据显示出去
|
||||
if len(observe_window) >= 1:
|
||||
@@ -241,6 +246,10 @@ def get_predict_function(
|
||||
error_msg = chunk_decoded
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError("Json解析不合常规")
|
||||
if reasoning:
|
||||
# reasoning 的部分加上框 (>)
|
||||
return '\n'.join(map(lambda x: '> ' + x, resoning_buffer.split('\n'))) + \
|
||||
'\n\n' + result
|
||||
return result
|
||||
|
||||
def predict(
|
||||
@@ -262,6 +271,7 @@ def get_predict_function(
|
||||
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
||||
additional_fn代表点击的哪个按钮,按钮见functional.py
|
||||
"""
|
||||
from .bridge_all import model_info
|
||||
if len(APIKEY) == 0:
|
||||
raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
|
||||
if inputs == "":
|
||||
@@ -299,27 +309,18 @@ def get_predict_function(
|
||||
temperature=llm_kwargs["temperature"],
|
||||
)
|
||||
|
||||
reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)
|
||||
|
||||
history.append(inputs)
|
||||
history.append("")
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
from .bridge_all import model_info
|
||||
|
||||
endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
|
||||
if not disable_proxy:
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
json=playload,
|
||||
stream=True,
|
||||
timeout=TIMEOUT_SECONDS,
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
proxies=None if disable_proxy else proxies,
|
||||
json=playload,
|
||||
stream=True,
|
||||
timeout=TIMEOUT_SECONDS,
|
||||
@@ -338,6 +339,8 @@ def get_predict_function(
|
||||
raise TimeoutError
|
||||
|
||||
gpt_replying_buffer = ""
|
||||
if reasoning:
|
||||
gpt_reasoning_buffer = ""
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
while True:
|
||||
@@ -347,9 +350,9 @@ def get_predict_function(
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
|
||||
response_text, finish_reason = decode_chunk(chunk)
|
||||
response_text, reasoning_content, finish_reason = decode_chunk(chunk)
|
||||
# 返回的数据流第一次为空,继续等待
|
||||
if response_text == "" and finish_reason != "False":
|
||||
if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
|
||||
status_text = f"finish_reason: {finish_reason}"
|
||||
yield from update_ui(
|
||||
chatbot=chatbot, history=history, msg=status_text
|
||||
@@ -379,6 +382,11 @@ def get_predict_function(
|
||||
logger.info(f"[response] {gpt_replying_buffer}")
|
||||
break
|
||||
status_text = f"finish_reason: {finish_reason}"
|
||||
if reasoning:
|
||||
gpt_replying_buffer += response_text
|
||||
gpt_reasoning_buffer += reasoning_content
|
||||
history[-1] = '\n'.join(map(lambda x: '> ' + x, gpt_reasoning_buffer.split('\n'))) + '\n\n' + gpt_replying_buffer
|
||||
else:
|
||||
gpt_replying_buffer += response_text
|
||||
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
||||
history[-1] = gpt_replying_buffer
|
||||
|
||||
@@ -2,12 +2,19 @@ class WelcomeMessage {
|
||||
constructor() {
|
||||
this.static_welcome_message = [
|
||||
{
|
||||
title: "环境配置教程",
|
||||
content: "配置模型和插件,释放大语言模型的学术应用潜力。",
|
||||
svg: "file=themes/svg/conf.svg",
|
||||
title: "改变主题外观",
|
||||
content: "点击「界面外观」,然后「更换UI主题」或「切换界面明暗」。",
|
||||
svg: "file=themes/svg/theme.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic/wiki/%E9%A1%B9%E7%9B%AE%E9%85%8D%E7%BD%AE%E8%AF%B4%E6%98%8E",
|
||||
},
|
||||
{
|
||||
title: "修改回答语言偏好",
|
||||
content: "点击「更改模型」,删除「System prompt」并输入「用某语言回答」。",
|
||||
svg: "file=themes/svg/prompt.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic",
|
||||
},
|
||||
{
|
||||
title: "Arxiv论文一键翻译",
|
||||
title: "Arxiv论文翻译",
|
||||
content: "无缝切换学术阅读语言,最优英文转中文的学术论文阅读体验。",
|
||||
svg: "file=themes/svg/arxiv.svg",
|
||||
@@ -19,6 +26,12 @@ class WelcomeMessage {
|
||||
svg: "file=themes/svg/mm.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic",
|
||||
},
|
||||
{
|
||||
title: "获取多个模型的答案",
|
||||
content: "输入问题后点击「询问多个GPT模型」,消耗算子低于单词询问gpt-4o。",
|
||||
svg: "file=themes/svg/model_multiple.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic",
|
||||
},
|
||||
{
|
||||
title: "文档与源码批处理",
|
||||
content: "您可以将任意文件拖入「此处」,随后调用对应插件功能。",
|
||||
@@ -52,7 +65,13 @@ class WelcomeMessage {
|
||||
{
|
||||
title: "实时语音对话",
|
||||
content: "配置实时语音对话功能,无须任何激活词,我将一直倾听。",
|
||||
svg: "file=themes/svg/default.svg",
|
||||
svg: "file=themes/svg/voice.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md",
|
||||
},
|
||||
{
|
||||
title: "联网回答问题",
|
||||
content: "输入问题后,点击右侧插件区的「查互联网后回答」插件。",
|
||||
svg: "file=themes/svg/Internet.svg",
|
||||
url: "https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md",
|
||||
},
|
||||
{
|
||||
@@ -85,6 +104,7 @@ class WelcomeMessage {
|
||||
this.card_array = [];
|
||||
this.static_welcome_message_previous = [];
|
||||
this.reflesh_time_interval = 15 * 1000;
|
||||
this.update_time_interval = 2 * 1000;
|
||||
this.major_title = "欢迎使用GPT-Academic";
|
||||
|
||||
const reflesh_render_status = () => {
|
||||
@@ -101,12 +121,19 @@ class WelcomeMessage {
|
||||
window.addEventListener('resize', this.update.bind(this));
|
||||
// add a loop to reflesh cards
|
||||
this.startRefleshCards();
|
||||
this.startAutoUpdate();
|
||||
}
|
||||
|
||||
begin_render() {
|
||||
this.update();
|
||||
}
|
||||
|
||||
async startAutoUpdate() {
|
||||
// sleep certain time
|
||||
await new Promise(r => setTimeout(r, this.update_time_interval));
|
||||
this.update();
|
||||
}
|
||||
|
||||
async startRefleshCards() {
|
||||
// sleep certain time
|
||||
await new Promise(r => setTimeout(r, this.reflesh_time_interval));
|
||||
@@ -134,6 +161,7 @@ class WelcomeMessage {
|
||||
|
||||
// combine two lists
|
||||
this.static_welcome_message_previous = not_shown_previously.concat(already_shown_previously);
|
||||
this.static_welcome_message_previous = this.static_welcome_message_previous.slice(0, this.max_welcome_card_num);
|
||||
|
||||
(async () => {
|
||||
// 使用 for...of 循环来处理异步操作
|
||||
@@ -198,12 +226,11 @@ class WelcomeMessage {
|
||||
return array;
|
||||
}
|
||||
|
||||
async update() {
|
||||
async can_display() {
|
||||
// update the card visibility
|
||||
const elem_chatbot = document.getElementById('gpt-chatbot');
|
||||
const chatbot_top = elem_chatbot.getBoundingClientRect().top;
|
||||
const welcome_card_container = document.getElementsByClassName('welcome-card-container')[0];
|
||||
|
||||
// detect if welcome card overflow
|
||||
let welcome_card_overflow = false;
|
||||
if (welcome_card_container) {
|
||||
@@ -215,22 +242,22 @@ class WelcomeMessage {
|
||||
var page_width = document.documentElement.clientWidth;
|
||||
const width_to_hide_welcome = 1200;
|
||||
if (!await this.isChatbotEmpty() || page_width < width_to_hide_welcome || welcome_card_overflow) {
|
||||
// overflow !
|
||||
if (this.visible) {
|
||||
// console.log("remove welcome");
|
||||
this.removeWelcome();
|
||||
this.card_array = [];
|
||||
this.static_welcome_message_previous = [];
|
||||
// cannot display
|
||||
return false;
|
||||
}
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
if (this.visible) {
|
||||
// console.log("already visible");
|
||||
return;
|
||||
}
|
||||
// not overflow, not yet shown, then create and display welcome card
|
||||
// console.log("show welcome");
|
||||
|
||||
async update() {
|
||||
const can_display = await this.can_display();
|
||||
if (can_display && !this.visible) {
|
||||
this.showWelcome();
|
||||
return;
|
||||
}
|
||||
if (!can_display && this.visible) {
|
||||
this.removeWelcome();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
showCard(message) {
|
||||
@@ -297,6 +324,16 @@ class WelcomeMessage {
|
||||
});
|
||||
|
||||
elem_chatbot.appendChild(welcome_card_container);
|
||||
const can_display = await this.can_display();
|
||||
if (!can_display) {
|
||||
// undo
|
||||
this.visible = false;
|
||||
this.card_array = [];
|
||||
this.static_welcome_message_previous = [];
|
||||
elem_chatbot.removeChild(welcome_card_container);
|
||||
await new Promise(r => setTimeout(r, this.update_time_interval / 2));
|
||||
return;
|
||||
}
|
||||
|
||||
// 添加显示动画
|
||||
requestAnimationFrame(() => {
|
||||
@@ -313,6 +350,8 @@ class WelcomeMessage {
|
||||
welcome_card_container.classList.add('hide');
|
||||
welcome_card_container.addEventListener('transitionend', () => {
|
||||
elem_chatbot.removeChild(welcome_card_container);
|
||||
this.card_array = [];
|
||||
this.static_welcome_message_previous = [];
|
||||
}, { once: true });
|
||||
// add a fail safe timeout
|
||||
const timeout = 600; // 与 CSS 中 transition 的时间保持一致(1s)
|
||||
|
||||
Reference in New Issue
Block a user