upgrade searxng

This commit is contained in:
binary-husky
2024-06-25 11:12:51 +00:00
parent ececfb9b6e
commit ddad5247fc
5 changed files with 117 additions and 27 deletions

View File

@@ -1,33 +1,44 @@
def check_proxy(proxies): def check_proxy(proxies, return_ip=False):
import requests import requests
proxies_https = proxies['https'] if proxies is not None else '' proxies_https = proxies['https'] if proxies is not None else ''
ip = None
try: try:
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4) response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
data = response.json() data = response.json()
if 'country_name' in data: if 'country_name' in data:
country = data['country_name'] country = data['country_name']
result = f"代理配置 {proxies_https}, 代理所在地:{country}" result = f"代理配置 {proxies_https}, 代理所在地:{country}"
if 'ip' in data: ip = data['ip']
elif 'error' in data: elif 'error' in data:
alternative = _check_with_backup_source(proxies) alternative, ip = _check_with_backup_source(proxies)
if alternative is None: if alternative is None:
result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限" result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限"
else: else:
result = f"代理配置 {proxies_https}, 代理所在地:{alternative}" result = f"代理配置 {proxies_https}, 代理所在地:{alternative}"
else: else:
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}" result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
print(result) if not return_ip:
return result print(result)
return result
else:
return ip
except: except:
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效" result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
print(result) if not return_ip:
return result print(result)
return result
else:
return ip
def _check_with_backup_source(proxies): def _check_with_backup_source(proxies):
import random, string, requests import random, string, requests
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32)) random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
try: return requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()['dns']['geo'] try:
except: return None res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()
return res_json['dns']['geo'], res_json['dns']['ip']
except:
return None, None
def backup_and_download(current_version, remote_version): def backup_and_download(current_version, remote_version):
""" """

View File

@@ -4,27 +4,43 @@ import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info from request_llms.bridge_all import model_info
import urllib.request import urllib.request
import random
from functools import lru_cache from functools import lru_cache
from check_proxy import check_proxy
@lru_cache @lru_cache
def get_auth_ip(): def get_auth_ip():
try: ip = check_proxy(None, return_ip=True)
external_ip = urllib.request.urlopen('https://v4.ident.me/').read().decode('utf8') if ip is None:
return external_ip return '114.114.114.' + str(random.randint(1, 10))
except: return ip
return '114.114.114.114'
def searxng_request(query, proxies, categories='general', searxng_url=None): def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None):
if searxng_url is None: if searxng_url is None:
url = get_conf("SEARXNG_URL") url = get_conf("SEARXNG_URL")
else: else:
url = searxng_url url = searxng_url
params = {
'q': query, # 搜索查询 if engines is None:
'format': 'json', # 输出格式为JSON engines = 'bing'
'language': 'zh', # 搜索语言
'categories': categories if categories == 'general':
} params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'engines': engines,
}
elif categories == 'science':
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'categories': 'science'
}
else:
raise ValueError('不支持的检索类型')
headers = { headers = {
'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
@@ -32,12 +48,13 @@ def searxng_request(query, proxies, categories='general', searxng_url=None):
'X-Real-IP': get_auth_ip() 'X-Real-IP': get_auth_ip()
} }
results = [] results = []
response = requests.post(url, params=params, headers=headers, proxies=proxies) response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30)
if response.status_code == 200: if response.status_code == 200:
json_result = response.json() json_result = response.json()
for result in json_result['results']: for result in json_result['results']:
item = { item = {
"title": result.get("title", ""), "title": result.get("title", ""),
"source": result.get("engines", "unknown"),
"content": result.get("content", ""), "content": result.get("content", ""),
"link": result["url"], "link": result["url"],
} }
@@ -80,7 +97,7 @@ def scrape_text(url, proxies) -> str:
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出 history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}", None)) chatbot.append((f"请结合互联网信息回答以下问题:{txt}", "检索中..."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ------------- < 第1步爬取搜索引擎的结果 > ------------- # ------------- < 第1步爬取搜索引擎的结果 > -------------
@@ -88,11 +105,12 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
proxies = get_conf('proxies') proxies = get_conf('proxies')
categories = plugin_kwargs.get('categories', 'general') categories = plugin_kwargs.get('categories', 'general')
searxng_url = plugin_kwargs.get('searxng_url', None) searxng_url = plugin_kwargs.get('searxng_url', None)
urls = searxng_request(txt, proxies, categories, searxng_url) engines = plugin_kwargs.get('engine', None)
urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines)
history = [] history = []
if len(urls) == 0: if len(urls) == 0:
chatbot.append((f"结论:{txt}", chatbot.append((f"结论:{txt}",
"[Local Message] 受到google限制无法从google获取信息")) "[Local Message] 受到限制无法从searxng获取信息请尝试更换搜索引擎。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return return
# ------------- < 第2步依次访问网页 > ------------- # ------------- < 第2步依次访问网页 > -------------
@@ -100,9 +118,10 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
chatbot.append([f"联网检索中 ...", None]) chatbot.append([f"联网检索中 ...", None])
for index, url in enumerate(urls[:max_search_result]): for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies) res = scrape_text(url['link'], proxies)
history.extend([f"{index}份搜索结果:", res]) prefix = f"{index}份搜索结果 [源自{url['source'][0]}搜索] {url['title'][:25]}"
history.extend([prefix, res])
res_squeeze = res.replace('\n', '...') res_squeeze = res.replace('\n', '...')
chatbot[-1] = [f"{index}份搜索结果:\n\n" + res_squeeze[:500] + "......", None] chatbot[-1] = [prefix + "\n\n" + res_squeeze[:500] + "......", None]
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ------------- < 第3步ChatGPT综合 > ------------- # ------------- < 第3步ChatGPT综合 > -------------

View File

@@ -25,6 +25,8 @@ class NetworkGPT_Wrap(GptAcademicPluginTemplate):
ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步 ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"categories": "categories":
ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="", type="dropdown").model_dump_json(), ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="", type="dropdown").model_dump_json(),
"engine":
ArgProperty(title="选择搜索引擎", options=["bing", "google", "duckduckgo"], default_value="bing", description="", type="dropdown").model_dump_json(),
"searxng_url": "searxng_url":
ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步 ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步

View File

@@ -106,7 +106,7 @@ def main():
with gr.Row(): with gr.Row():
audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False) audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False)
with gr.Row(): with gr.Row():
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel") status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。支持将文件直接粘贴到输入区。", elem_id="state-panel")
with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn: with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
with gr.Row(): with gr.Row():

58
tests/test_searxng.py Normal file
View File

@@ -0,0 +1,58 @@
def validate_path():
import os, sys
os.path.dirname(__file__)
root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/..")
os.chdir(root_dir_assume)
sys.path.append(root_dir_assume)
validate_path() # validate path so you can run from base directory
from toolbox import get_conf
import requests
def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None):
url = 'http://localhost:50001/'
if engines is None:
engine = 'bing,'
if categories == 'general':
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'engines': engine,
}
elif categories == 'science':
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'categories': 'science'
}
else:
raise ValueError('不支持的检索类型')
headers = {
'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'X-Forwarded-For': '112.112.112.112',
'X-Real-IP': '112.112.112.112'
}
results = []
response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30)
if response.status_code == 200:
json_result = response.json()
for result in json_result['results']:
item = {
"title": result.get("title", ""),
"content": result.get("content", ""),
"link": result["url"],
}
print(result['engines'])
results.append(item)
return results
else:
if response.status_code == 429:
raise ValueError("Searxng在线搜索服务当前使用人数太多请稍后。")
else:
raise ValueError("在线搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8'))
res = searxng_request("vr environment", None, categories='science', searxng_url=None, engines=None)
print(res)