diff --git a/check_proxy.py b/check_proxy.py index 3a42a739..3aad33c1 100644 --- a/check_proxy.py +++ b/check_proxy.py @@ -1,33 +1,44 @@ -def check_proxy(proxies): +def check_proxy(proxies, return_ip=False): import requests proxies_https = proxies['https'] if proxies is not None else '无' + ip = None try: response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4) data = response.json() if 'country_name' in data: country = data['country_name'] result = f"代理配置 {proxies_https}, 代理所在地:{country}" + if 'ip' in data: ip = data['ip'] elif 'error' in data: - alternative = _check_with_backup_source(proxies) + alternative, ip = _check_with_backup_source(proxies) if alternative is None: result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限" else: result = f"代理配置 {proxies_https}, 代理所在地:{alternative}" else: result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}" - print(result) - return result + if not return_ip: + print(result) + return result + else: + return ip except: result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效" - print(result) - return result + if not return_ip: + print(result) + return result + else: + return ip def _check_with_backup_source(proxies): import random, string, requests random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32)) - try: return requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()['dns']['geo'] - except: return None + try: + res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json() + return res_json['dns']['geo'], res_json['dns']['ip'] + except: + return None, None def backup_and_download(current_version, remote_version): """ diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index 24e441be..840990fd 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -4,27 +4,43 @@ import requests from bs4 import BeautifulSoup from request_llms.bridge_all import model_info import urllib.request +import random from functools import lru_cache +from check_proxy import check_proxy @lru_cache def get_auth_ip(): - try: - external_ip = urllib.request.urlopen('https://v4.ident.me/').read().decode('utf8') - return external_ip - except: - return '114.114.114.114' + ip = check_proxy(None, return_ip=True) + if ip is None: + return '114.114.114.' + str(random.randint(1, 10)) + return ip -def searxng_request(query, proxies, categories='general', searxng_url=None): +def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None): if searxng_url is None: url = get_conf("SEARXNG_URL") else: url = searxng_url - params = { - 'q': query, # 搜索查询 - 'format': 'json', # 输出格式为JSON - 'language': 'zh', # 搜索语言 - 'categories': categories - } + + if engines is None: + engines = 'bing' + + if categories == 'general': + params = { + 'q': query, # 搜索查询 + 'format': 'json', # 输出格式为JSON + 'language': 'zh', # 搜索语言 + 'engines': engines, + } + elif categories == 'science': + params = { + 'q': query, # 搜索查询 + 'format': 'json', # 输出格式为JSON + 'language': 'zh', # 搜索语言 + 'categories': 'science' + } + else: + raise ValueError('不支持的检索类型') + headers = { 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', @@ -32,12 +48,13 @@ def searxng_request(query, proxies, categories='general', searxng_url=None): 'X-Real-IP': get_auth_ip() } results = [] - response = requests.post(url, params=params, headers=headers, proxies=proxies) + response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30) if response.status_code == 200: json_result = response.json() for result in json_result['results']: item = { "title": result.get("title", ""), + "source": result.get("engines", "unknown"), "content": result.get("content", ""), "link": result["url"], } @@ -80,7 +97,7 @@ def scrape_text(url, proxies) -> str: def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): history = [] # 清空历史,以免输入溢出 - chatbot.append((f"请结合互联网信息回答以下问题:{txt}", None)) + chatbot.append((f"请结合互联网信息回答以下问题:{txt}", "检索中...")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # ------------- < 第1步:爬取搜索引擎的结果 > ------------- @@ -88,11 +105,12 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s proxies = get_conf('proxies') categories = plugin_kwargs.get('categories', 'general') searxng_url = plugin_kwargs.get('searxng_url', None) - urls = searxng_request(txt, proxies, categories, searxng_url) + engines = plugin_kwargs.get('engine', None) + urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines) history = [] if len(urls) == 0: chatbot.append((f"结论:{txt}", - "[Local Message] 受到google限制,无法从google获取信息!")) + "[Local Message] 受到限制,无法从searxng获取信息!请尝试更换搜索引擎。")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # ------------- < 第2步:依次访问网页 > ------------- @@ -100,9 +118,10 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s chatbot.append([f"联网检索中 ...", None]) for index, url in enumerate(urls[:max_search_result]): res = scrape_text(url['link'], proxies) - history.extend([f"第{index}份搜索结果:", res]) + prefix = f"第{index}份搜索结果 [源自{url['source'][0]}搜索] ({url['title'][:25]}):" + history.extend([prefix, res]) res_squeeze = res.replace('\n', '...') - chatbot[-1] = [f"第{index}份搜索结果:\n\n" + res_squeeze[:500] + "......", None] + chatbot[-1] = [prefix + "\n\n" + res_squeeze[:500] + "......", None] yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # ------------- < 第3步:ChatGPT综合 > ------------- diff --git a/crazy_functions/Internet_GPT_Wrap.py b/crazy_functions/Internet_GPT_Wrap.py index c05f44c0..8d3aa43f 100644 --- a/crazy_functions/Internet_GPT_Wrap.py +++ b/crazy_functions/Internet_GPT_Wrap.py @@ -25,6 +25,8 @@ class NetworkGPT_Wrap(GptAcademicPluginTemplate): ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步 "categories": ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="无", type="dropdown").model_dump_json(), + "engine": + ArgProperty(title="选择搜索引擎", options=["bing", "google", "duckduckgo"], default_value="bing", description="无", type="dropdown").model_dump_json(), "searxng_url": ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步 diff --git a/main.py b/main.py index d80d3f26..b574edcc 100644 --- a/main.py +++ b/main.py @@ -106,7 +106,7 @@ def main(): with gr.Row(): audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False) with gr.Row(): - status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel") + status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。支持将文件直接粘贴到输入区。", elem_id="state-panel") with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn: with gr.Row(): diff --git a/tests/test_searxng.py b/tests/test_searxng.py new file mode 100644 index 00000000..e7e2d3d5 --- /dev/null +++ b/tests/test_searxng.py @@ -0,0 +1,58 @@ +def validate_path(): + import os, sys + os.path.dirname(__file__) + root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/..") + os.chdir(root_dir_assume) + sys.path.append(root_dir_assume) +validate_path() # validate path so you can run from base directory + +from toolbox import get_conf +import requests + +def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None): + url = 'http://localhost:50001/' + + if engines is None: + engine = 'bing,' + if categories == 'general': + params = { + 'q': query, # 搜索查询 + 'format': 'json', # 输出格式为JSON + 'language': 'zh', # 搜索语言 + 'engines': engine, + } + elif categories == 'science': + params = { + 'q': query, # 搜索查询 + 'format': 'json', # 输出格式为JSON + 'language': 'zh', # 搜索语言 + 'categories': 'science' + } + else: + raise ValueError('不支持的检索类型') + headers = { + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', + 'X-Forwarded-For': '112.112.112.112', + 'X-Real-IP': '112.112.112.112' + } + results = [] + response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30) + if response.status_code == 200: + json_result = response.json() + for result in json_result['results']: + item = { + "title": result.get("title", ""), + "content": result.get("content", ""), + "link": result["url"], + } + print(result['engines']) + results.append(item) + return results + else: + if response.status_code == 429: + raise ValueError("Searxng(在线搜索服务)当前使用人数太多,请稍后。") + else: + raise ValueError("在线搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8')) +res = searxng_request("vr environment", None, categories='science', searxng_url=None, engines=None) +print(res) \ No newline at end of file