upgrade searxng
This commit is contained in:
@@ -1,33 +1,44 @@
|
|||||||
|
|
||||||
def check_proxy(proxies):
|
def check_proxy(proxies, return_ip=False):
|
||||||
import requests
|
import requests
|
||||||
proxies_https = proxies['https'] if proxies is not None else '无'
|
proxies_https = proxies['https'] if proxies is not None else '无'
|
||||||
|
ip = None
|
||||||
try:
|
try:
|
||||||
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
|
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
if 'country_name' in data:
|
if 'country_name' in data:
|
||||||
country = data['country_name']
|
country = data['country_name']
|
||||||
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
||||||
|
if 'ip' in data: ip = data['ip']
|
||||||
elif 'error' in data:
|
elif 'error' in data:
|
||||||
alternative = _check_with_backup_source(proxies)
|
alternative, ip = _check_with_backup_source(proxies)
|
||||||
if alternative is None:
|
if alternative is None:
|
||||||
result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
|
result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
|
||||||
else:
|
else:
|
||||||
result = f"代理配置 {proxies_https}, 代理所在地:{alternative}"
|
result = f"代理配置 {proxies_https}, 代理所在地:{alternative}"
|
||||||
else:
|
else:
|
||||||
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
|
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
|
||||||
print(result)
|
if not return_ip:
|
||||||
return result
|
print(result)
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return ip
|
||||||
except:
|
except:
|
||||||
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
|
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
|
||||||
print(result)
|
if not return_ip:
|
||||||
return result
|
print(result)
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return ip
|
||||||
|
|
||||||
def _check_with_backup_source(proxies):
|
def _check_with_backup_source(proxies):
|
||||||
import random, string, requests
|
import random, string, requests
|
||||||
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
|
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
|
||||||
try: return requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()['dns']['geo']
|
try:
|
||||||
except: return None
|
res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()
|
||||||
|
return res_json['dns']['geo'], res_json['dns']['ip']
|
||||||
|
except:
|
||||||
|
return None, None
|
||||||
|
|
||||||
def backup_and_download(current_version, remote_version):
|
def backup_and_download(current_version, remote_version):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -4,27 +4,43 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from request_llms.bridge_all import model_info
|
from request_llms.bridge_all import model_info
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
import random
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from check_proxy import check_proxy
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_auth_ip():
|
def get_auth_ip():
|
||||||
try:
|
ip = check_proxy(None, return_ip=True)
|
||||||
external_ip = urllib.request.urlopen('https://v4.ident.me/').read().decode('utf8')
|
if ip is None:
|
||||||
return external_ip
|
return '114.114.114.' + str(random.randint(1, 10))
|
||||||
except:
|
return ip
|
||||||
return '114.114.114.114'
|
|
||||||
|
|
||||||
def searxng_request(query, proxies, categories='general', searxng_url=None):
|
def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None):
|
||||||
if searxng_url is None:
|
if searxng_url is None:
|
||||||
url = get_conf("SEARXNG_URL")
|
url = get_conf("SEARXNG_URL")
|
||||||
else:
|
else:
|
||||||
url = searxng_url
|
url = searxng_url
|
||||||
params = {
|
|
||||||
'q': query, # 搜索查询
|
if engines is None:
|
||||||
'format': 'json', # 输出格式为JSON
|
engines = 'bing'
|
||||||
'language': 'zh', # 搜索语言
|
|
||||||
'categories': categories
|
if categories == 'general':
|
||||||
}
|
params = {
|
||||||
|
'q': query, # 搜索查询
|
||||||
|
'format': 'json', # 输出格式为JSON
|
||||||
|
'language': 'zh', # 搜索语言
|
||||||
|
'engines': engines,
|
||||||
|
}
|
||||||
|
elif categories == 'science':
|
||||||
|
params = {
|
||||||
|
'q': query, # 搜索查询
|
||||||
|
'format': 'json', # 输出格式为JSON
|
||||||
|
'language': 'zh', # 搜索语言
|
||||||
|
'categories': 'science'
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError('不支持的检索类型')
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
||||||
@@ -32,12 +48,13 @@ def searxng_request(query, proxies, categories='general', searxng_url=None):
|
|||||||
'X-Real-IP': get_auth_ip()
|
'X-Real-IP': get_auth_ip()
|
||||||
}
|
}
|
||||||
results = []
|
results = []
|
||||||
response = requests.post(url, params=params, headers=headers, proxies=proxies)
|
response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
json_result = response.json()
|
json_result = response.json()
|
||||||
for result in json_result['results']:
|
for result in json_result['results']:
|
||||||
item = {
|
item = {
|
||||||
"title": result.get("title", ""),
|
"title": result.get("title", ""),
|
||||||
|
"source": result.get("engines", "unknown"),
|
||||||
"content": result.get("content", ""),
|
"content": result.get("content", ""),
|
||||||
"link": result["url"],
|
"link": result["url"],
|
||||||
}
|
}
|
||||||
@@ -80,7 +97,7 @@ def scrape_text(url, proxies) -> str:
|
|||||||
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||||
|
|
||||||
history = [] # 清空历史,以免输入溢出
|
history = [] # 清空历史,以免输入溢出
|
||||||
chatbot.append((f"请结合互联网信息回答以下问题:{txt}", None))
|
chatbot.append((f"请结合互联网信息回答以下问题:{txt}", "检索中..."))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
||||||
@@ -88,11 +105,12 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|||||||
proxies = get_conf('proxies')
|
proxies = get_conf('proxies')
|
||||||
categories = plugin_kwargs.get('categories', 'general')
|
categories = plugin_kwargs.get('categories', 'general')
|
||||||
searxng_url = plugin_kwargs.get('searxng_url', None)
|
searxng_url = plugin_kwargs.get('searxng_url', None)
|
||||||
urls = searxng_request(txt, proxies, categories, searxng_url)
|
engines = plugin_kwargs.get('engine', None)
|
||||||
|
urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines)
|
||||||
history = []
|
history = []
|
||||||
if len(urls) == 0:
|
if len(urls) == 0:
|
||||||
chatbot.append((f"结论:{txt}",
|
chatbot.append((f"结论:{txt}",
|
||||||
"[Local Message] 受到google限制,无法从google获取信息!"))
|
"[Local Message] 受到限制,无法从searxng获取信息!请尝试更换搜索引擎。"))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
return
|
return
|
||||||
# ------------- < 第2步:依次访问网页 > -------------
|
# ------------- < 第2步:依次访问网页 > -------------
|
||||||
@@ -100,9 +118,10 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|||||||
chatbot.append([f"联网检索中 ...", None])
|
chatbot.append([f"联网检索中 ...", None])
|
||||||
for index, url in enumerate(urls[:max_search_result]):
|
for index, url in enumerate(urls[:max_search_result]):
|
||||||
res = scrape_text(url['link'], proxies)
|
res = scrape_text(url['link'], proxies)
|
||||||
history.extend([f"第{index}份搜索结果:", res])
|
prefix = f"第{index}份搜索结果 [源自{url['source'][0]}搜索] ({url['title'][:25]}):"
|
||||||
|
history.extend([prefix, res])
|
||||||
res_squeeze = res.replace('\n', '...')
|
res_squeeze = res.replace('\n', '...')
|
||||||
chatbot[-1] = [f"第{index}份搜索结果:\n\n" + res_squeeze[:500] + "......", None]
|
chatbot[-1] = [prefix + "\n\n" + res_squeeze[:500] + "......", None]
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
# ------------- < 第3步:ChatGPT综合 > -------------
|
# ------------- < 第3步:ChatGPT综合 > -------------
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ class NetworkGPT_Wrap(GptAcademicPluginTemplate):
|
|||||||
ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
|
ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
|
||||||
"categories":
|
"categories":
|
||||||
ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="无", type="dropdown").model_dump_json(),
|
ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="无", type="dropdown").model_dump_json(),
|
||||||
|
"engine":
|
||||||
|
ArgProperty(title="选择搜索引擎", options=["bing", "google", "duckduckgo"], default_value="bing", description="无", type="dropdown").model_dump_json(),
|
||||||
"searxng_url":
|
"searxng_url":
|
||||||
ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步
|
ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步
|
||||||
|
|
||||||
|
|||||||
2
main.py
2
main.py
@@ -106,7 +106,7 @@ def main():
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False)
|
audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
|
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。支持将文件直接粘贴到输入区。", elem_id="state-panel")
|
||||||
|
|
||||||
with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
|
with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
|||||||
58
tests/test_searxng.py
Normal file
58
tests/test_searxng.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
def validate_path():
|
||||||
|
import os, sys
|
||||||
|
os.path.dirname(__file__)
|
||||||
|
root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/..")
|
||||||
|
os.chdir(root_dir_assume)
|
||||||
|
sys.path.append(root_dir_assume)
|
||||||
|
validate_path() # validate path so you can run from base directory
|
||||||
|
|
||||||
|
from toolbox import get_conf
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None):
|
||||||
|
url = 'http://localhost:50001/'
|
||||||
|
|
||||||
|
if engines is None:
|
||||||
|
engine = 'bing,'
|
||||||
|
if categories == 'general':
|
||||||
|
params = {
|
||||||
|
'q': query, # 搜索查询
|
||||||
|
'format': 'json', # 输出格式为JSON
|
||||||
|
'language': 'zh', # 搜索语言
|
||||||
|
'engines': engine,
|
||||||
|
}
|
||||||
|
elif categories == 'science':
|
||||||
|
params = {
|
||||||
|
'q': query, # 搜索查询
|
||||||
|
'format': 'json', # 输出格式为JSON
|
||||||
|
'language': 'zh', # 搜索语言
|
||||||
|
'categories': 'science'
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError('不支持的检索类型')
|
||||||
|
headers = {
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
||||||
|
'X-Forwarded-For': '112.112.112.112',
|
||||||
|
'X-Real-IP': '112.112.112.112'
|
||||||
|
}
|
||||||
|
results = []
|
||||||
|
response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30)
|
||||||
|
if response.status_code == 200:
|
||||||
|
json_result = response.json()
|
||||||
|
for result in json_result['results']:
|
||||||
|
item = {
|
||||||
|
"title": result.get("title", ""),
|
||||||
|
"content": result.get("content", ""),
|
||||||
|
"link": result["url"],
|
||||||
|
}
|
||||||
|
print(result['engines'])
|
||||||
|
results.append(item)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise ValueError("Searxng(在线搜索服务)当前使用人数太多,请稍后。")
|
||||||
|
else:
|
||||||
|
raise ValueError("在线搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8'))
|
||||||
|
res = searxng_request("vr environment", None, categories='science', searxng_url=None, engines=None)
|
||||||
|
print(res)
|
||||||
Reference in New Issue
Block a user