Merge branch 'frontier' into production

2023-11-23 16:22:17 +08:00
parent 79a0b687b8 4fefbb80ac
commit 0ec5a8e5f8
6 changed files with 129 additions and 46 deletions
--- a/check_proxy.py
+++ b/check_proxy.py
@@ -5,7 +5,6 @@ def check_proxy(proxies):
    try:
        response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
        data = response.json()
-        # print(f'查询代理的地理位置，返回的结果是{data}')
        if 'country_name' in data:
            country = data['country_name']
            result = f"代理配置 {proxies_https}, 代理所在地：{country}"
@@ -47,8 +46,8 @@ def backup_and_download(current_version, remote_version):
    os.makedirs(new_version_dir)
    shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
    proxies = get_conf('proxies')
-    r = requests.get(
-        'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
+    try:    r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
+    except: r = requests.get('https://public.gpt-academic.top/publish/master.zip', proxies=proxies, stream=True)
    zip_file_path = backup_dir+'/master.zip'
    with open(zip_file_path, 'wb+') as f:
        f.write(r.content)
@@ -111,11 +110,10 @@ def auto_update(raise_error=False):
    try:
        from toolbox import get_conf
        import requests
-        import time
        import json
        proxies = get_conf('proxies')
-        response = requests.get(
-            "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
+        try:    response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
+        except: response = requests.get("https://public.gpt-academic.top/publish/version", proxies=proxies, timeout=5)
        remote_json_data = json.loads(response.text)
        remote_version = remote_json_data['version']
        if remote_json_data["show_feature"]:
@@ -127,8 +125,7 @@ def auto_update(raise_error=False):
            current_version = json.loads(current_version)['version']
        if (remote_version - current_version) >= 0.01-1e-5:
            from colorful import print亮黄
-            print亮黄(
-                f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')
+            print亮黄(f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')
            print('（1）Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
            user_instruction = input('（2）是否一键更新代码（Y+回车=确认，输入其他/无输入+回车=不更新）？')
            if user_instruction in ['Y', 'y']:
@@ -154,7 +151,7 @@ def auto_update(raise_error=False):
        print(msg)

 def warm_up_modules():
-    print('正在执行一些模块的预热...')
+    print('正在执行一些模块的预热 ...')
    from toolbox import ProxyNetworkActivate
    from request_llms.bridge_all import model_info
    with ProxyNetworkActivate("Warmup_Modules"):
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -73,6 +73,7 @@ def move_project(project_folder, arxiv_id=None):

    # align subfolder if there is a folder wrapper
    items = glob.glob(pj(project_folder,'*'))
+    items = [item for item in items if os.path.basename(item)!='__MACOSX']
    if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
        if os.path.isdir(items[0]): project_folder = items[0]

@@ -214,7 +215,6 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
    # <-------------- we are done ------------->
    return success

-
 # ========================================= 插件主程序2 =====================================================    

@CatchException
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@@ -283,10 +283,10 @@ def find_tex_file_ignore_case(fp):
    dir_name = os.path.dirname(fp)
    base_name = os.path.basename(fp)
    # 如果输入的文件路径是正确的
-    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
+    if os.path.isfile(pj(dir_name, base_name)): return pj(dir_name, base_name)
    # 如果不正确，试着加上.tex后缀试试
    if not base_name.endswith('.tex'): base_name+='.tex'
-    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
+    if os.path.isfile(pj(dir_name, base_name)): return pj(dir_name, base_name)
    # 如果还找不到，解除大小写限制，再试一次
    import glob
    for f in glob.glob(dir_name+'/*.tex'):
--- a/request_llms/bridge_chatgpt_vision.py
+++ b/request_llms/bridge_chatgpt_vision.py
@@ -15,29 +15,16 @@ import requests
 import base64
 import os
 import glob
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
+    update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
+

-from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, update_ui_lastest_msg, get_max_token
 proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')

 timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'

-def have_any_recent_upload_image_files(chatbot):
-    _5min = 5 * 60
-    if chatbot is None: return False, None    # chatbot is None
-    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
-    if not most_recent_uploaded: return False, None   # most_recent_uploaded is None
-    if time.time() - most_recent_uploaded["time"] < _5min: 
-        most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
-        path = most_recent_uploaded['path']
-        file_manifest = [f for f in glob.glob(f'{path}/**/*.jpg', recursive=True)]
-        file_manifest += [f for f in glob.glob(f'{path}/**/*.jpeg', recursive=True)]
-        file_manifest += [f for f in glob.glob(f'{path}/**/*.png', recursive=True)]
-        if len(file_manifest) == 0: return False, None
-        return True, file_manifest # most_recent_uploaded is new
-    else: 
-        return False, None  # most_recent_uploaded is too old

 def report_invalid_key(key):
    if get_conf("BLOCK_INVALID_APIKEY"): 
@@ -258,10 +245,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg,
        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
    return chatbot, history

-# Function to encode the image
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')

 def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
    """
--- a/request_llms/com_sparkapi.py
+++ b/request_llms/com_sparkapi.py
@@ -1,4 +1,4 @@
-from toolbox import get_conf
+from toolbox import get_conf, get_pictures_list, encode_image
 import base64
 import datetime
 import hashlib
@@ -65,6 +65,7 @@ class SparkRequestInstance():
        self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
        self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
        self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
+        self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"

        self.time_to_yield_event = threading.Event()
        self.time_to_exit_event = threading.Event()
@@ -92,7 +93,11 @@ class SparkRequestInstance():
            gpt_url = self.gpt_url_v3
        else:
            gpt_url = self.gpt_url
-
+        file_manifest = []
+        if llm_kwargs.get('most_recent_uploaded'):
+            if llm_kwargs['most_recent_uploaded'].get('path'):
+                file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
+                gpt_url = self.gpt_url_img
        wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
        websocket.enableTrace(False)
        wsUrl = wsParam.create_url()
@@ -101,9 +106,8 @@ class SparkRequestInstance():
        def on_open(ws):
            import _thread as thread
            thread.start_new_thread(run, (ws,))
-
        def run(ws, *args):
-            data = json.dumps(gen_params(ws.appid, *ws.all_args))
+            data = json.dumps(gen_params(ws.appid, *ws.all_args, file_manifest))
            ws.send(data)

        # 收到websocket消息的处理
@@ -142,9 +146,18 @@ class SparkRequestInstance():
        ws.all_args = (inputs, llm_kwargs, history, system_prompt)
        ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

-def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest):
    conversation_cnt = len(history) // 2
-    messages = [{"role": "system", "content": system_prompt}]
+    messages = []
+    if file_manifest:
+        base64_images = []
+        for image_path in file_manifest:
+            base64_images.append(encode_image(image_path))
+        for img_s in base64_images:
+            if img_s not in str(messages):
+                messages.append({"role": "user", "content": img_s, "content_type": "image"})
+    else:
+        messages = [{"role": "system", "content": system_prompt}]
    if conversation_cnt:
        for index in range(0, 2*conversation_cnt, 2):
            what_i_have_asked = {}
@@ -167,7 +180,7 @@ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
    return messages


-def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
+def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest):
    """
    通过appid和用户的提问来生成请参数
    """
@@ -176,6 +189,8 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
        "sparkv2": "generalv2",
        "sparkv3": "generalv3",
    }
+    domains_select = domains[llm_kwargs['llm_model']]
+    if file_manifest: domains_select = 'image'
    data = {
        "header": {
            "app_id": appid,
@@ -183,7 +198,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
        },
        "parameter": {
            "chat": {
-                "domain": domains[llm_kwargs['llm_model']],
+                "domain": domains_select,
                "temperature": llm_kwargs["temperature"],
                "random_threshold": 0.5,
                "max_tokens": 4096,
@@ -192,7 +207,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
        },
        "payload": {
            "message": {
-                "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt)
+                "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest)
            }
        }
    }
--- a/toolbox.py
+++ b/toolbox.py
@@ -4,6 +4,7 @@ import time
 import inspect
 import re
 import os
+import base64
 import gradio
 import shutil
 import glob
@@ -79,6 +80,7 @@ def ArgsGeneralWrapper(f):
            'max_length': max_length,
            'temperature':temperature,
            'client_ip': request.client.host,
+            'most_recent_uploaded': cookies.get('most_recent_uploaded')
        }
        plugin_kwargs = {
            "advanced_arg": plugin_advanced_arg,
@@ -602,6 +604,64 @@ def del_outdated_uploads(outdate_time_seconds, target_path_base=None):
            except: pass
    return

+
+def html_local_file(file):
+    base_path = os.path.dirname(__file__)  # 项目目录
+    if os.path.exists(str(file)):
+        file = f'file={file.replace(base_path, ".")}'
+    return file
+
+
+def html_local_img(__file, layout='left', max_width=None, max_height=None, md=True):
+    style = ''
+    if max_width is not None:
+        style += f"max-width: {max_width};"
+    if max_height is not None:
+        style += f"max-height: {max_height};"
+    __file = html_local_file(__file)
+    a = f'<div align="{layout}"><img src="{__file}" style="{style}"></div>'
+    if md:
+        a = f'![{__file}]({__file})'
+    return a
+
+def file_manifest_filter_type(file_list, filter_: list = None):
+    new_list = []
+    if not filter_: filter_ = ['png', 'jpg', 'jpeg']
+    for file in file_list:
+        if str(os.path.basename(file)).split('.')[-1] in filter_:
+            new_list.append(html_local_img(file, md=False))
+        else:
+            new_list.append(file)
+    return new_list
+
+def to_markdown_tabs(head: list, tabs: list, alignment=':---:', column=False):
+    """
+    Args:
+        head: 表头：[]
+        tabs: 表值：[[列1], [列2], [列3], [列4]]
+        alignment: :--- 左对齐， :---: 居中对齐， ---: 右对齐
+        column: True to keep data in columns, False to keep data in rows (default).
+    Returns:
+        A string representation of the markdown table.
+    """
+    if column:
+        transposed_tabs = list(map(list, zip(*tabs)))
+    else:
+        transposed_tabs = tabs
+    # Find the maximum length among the columns
+    max_len = max(len(column) for column in transposed_tabs)
+
+    tab_format = "| %s "
+    tabs_list = "".join([tab_format % i for i in head]) + '|\n'
+    tabs_list += "".join([tab_format % alignment for i in head]) + '|\n'
+
+    for i in range(max_len):
+        row_data = [tab[i] if i < len(tab) else '' for tab in transposed_tabs]
+        row_data = file_manifest_filter_type(row_data, filter_=None)
+        tabs_list += "".join([tab_format % i for i in row_data]) + '|\n'
+
+    return tabs_list
+
 def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkboxes, cookies):
    """
    当文件被上传时的回调函数
@@ -626,16 +686,15 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo
        this_file_path = pj(target_path_base, file_origin_name)
        shutil.move(file.name, this_file_path)
        upload_msg += extract_archive(file_path=this_file_path, dest_dir=this_file_path+'.extract')
-    
-    # 整理文件集合
-    moved_files = [fp for fp in glob.glob(f'{target_path_base}/**/*', recursive=True)]
+
    if "浮动输入区" in checkboxes: 
        txt, txt2 = "", target_path_base
    else:
        txt, txt2 = target_path_base, ""

-    # 输出消息
-    moved_files_str = '\t\n\n'.join(moved_files)
+    # 整理文件集合 输出消息
+    moved_files = [fp for fp in glob.glob(f'{target_path_base}/**/*', recursive=True)]
+    moved_files_str = to_markdown_tabs(head=['文件'], tabs=[moved_files])
    chatbot.append(['我上传了文件，请查收', 
                    f'[Local Message] 收到以下文件: \n\n{moved_files_str}' +
                    f'\n\n调用路径参数已自动修正到: \n\n{txt}' +
@@ -1203,6 +1262,35 @@ def get_chat_default_kwargs():

    return default_chat_kwargs

+
+def get_pictures_list(path):
+    file_manifest = [f for f in glob.glob(f'{path}/**/*.jpg', recursive=True)]
+    file_manifest += [f for f in glob.glob(f'{path}/**/*.jpeg', recursive=True)]
+    file_manifest += [f for f in glob.glob(f'{path}/**/*.png', recursive=True)]
+    return file_manifest
+
+
+def have_any_recent_upload_image_files(chatbot):
+    _5min = 5 * 60
+    if chatbot is None: return False, None    # chatbot is None
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    if not most_recent_uploaded: return False, None   # most_recent_uploaded is None
+    if time.time() - most_recent_uploaded["time"] < _5min:
+        most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+        path = most_recent_uploaded['path']
+        file_manifest = get_pictures_list(path)
+        if len(file_manifest) == 0: return False, None
+        return True, file_manifest # most_recent_uploaded is new
+    else:
+        return False, None  # most_recent_uploaded is too old
+
+
+# Function to encode the image
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+
+
 def get_max_token(llm_kwargs):
    from request_llms.bridge_all import model_info
    return model_info[llm_kwargs['llm_model']]['max_token']