Add Support for Gemini 1.5 Pro & Gemini 1.5 Flash (#1926)

* Add Support for Gemini 1.5 Pro & 1.5 Flash.

* Update bridge_all.py

fix a spelling error in comments.

* Add Support for Gemini 1.5 Pro & Gemini 1.5 Flash
This commit is contained in:
FatShibaInu
2024-08-12 21:44:24 +08:00
committed by GitHub
parent 6fe5f6ee6e
commit f9384e4e5f
4 changed files with 86 additions and 33 deletions

View File

@@ -36,7 +36,7 @@ AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-p
"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo", "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
"gemini-pro", "chatglm3" "gemini-1.5-pro", "chatglm3"
] ]
# --- --- --- --- # --- --- --- ---
# P.S. 其他可用的模型还包括 # P.S. 其他可用的模型还包括
@@ -50,6 +50,7 @@ AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-p
# "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", # "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
# "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama", # "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
# "deepseek-chat" ,"deepseek-coder", # "deepseek-chat" ,"deepseek-coder",
# "gemini-1.5-flash",
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview", # "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
# ] # ]
# --- --- --- --- # --- --- --- ---

View File

@@ -407,22 +407,46 @@ model_info = {
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, },
# Gemini
# Note: now gemini-pro is an alias of gemini-1.0-pro.
# Warning: gemini-pro-vision has been deprecated.
# Support for gemini-pro-vision has been removed.
"gemini-pro": { "gemini-pro": {
"fn_with_ui": genai_ui, "fn_with_ui": genai_ui,
"fn_without_ui": genai_noui, "fn_without_ui": genai_noui,
"endpoint": gemini_endpoint, "endpoint": gemini_endpoint,
"has_multimodal_capacity": False,
"max_token": 1024 * 32, "max_token": 1024 * 32,
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, },
"gemini-pro-vision": { "gemini-1.0-pro": {
"fn_with_ui": genai_ui, "fn_with_ui": genai_ui,
"fn_without_ui": genai_noui, "fn_without_ui": genai_noui,
"endpoint": gemini_endpoint, "endpoint": gemini_endpoint,
"has_multimodal_capacity": False,
"max_token": 1024 * 32, "max_token": 1024 * 32,
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, },
"gemini-1.5-pro": {
"fn_with_ui": genai_ui,
"fn_without_ui": genai_noui,
"endpoint": gemini_endpoint,
"has_multimodal_capacity": True,
"max_token": 1024 * 204800,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gemini-1.5-flash": {
"fn_with_ui": genai_ui,
"fn_without_ui": genai_noui,
"endpoint": gemini_endpoint,
"has_multimodal_capacity": True,
"max_token": 1024 * 204800,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# cohere # cohere
"cohere-command-r-plus": { "cohere-command-r-plus": {

View File

@@ -8,15 +8,15 @@ import os
import time import time
from request_llms.com_google import GoogleChatInit from request_llms.com_google import GoogleChatInit
from toolbox import ChatBotWithCookies from toolbox import ChatBotWithCookies
from toolbox import get_conf, update_ui, update_ui_lastest_msg, have_any_recent_upload_image_files, trimmed_format_exc, log_chat from toolbox import get_conf, update_ui, update_ui_lastest_msg, have_any_recent_upload_image_files, trimmed_format_exc, log_chat, encode_image
proxies, TIMEOUT_SECONDS, MAX_RETRY = get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY') proxies, TIMEOUT_SECONDS, MAX_RETRY = get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY')
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="", observe_window:list=[],
console_slience=False): console_slience:bool=False):
# 检查API_KEY # 检查API_KEY
if get_conf("GEMINI_API_KEY") == "": if get_conf("GEMINI_API_KEY") == "":
raise ValueError(f"请配置 GEMINI_API_KEY。") raise ValueError(f"请配置 GEMINI_API_KEY。")
@@ -44,9 +44,20 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
raise RuntimeError(f'{gpt_replying_buffer} 对话错误') raise RuntimeError(f'{gpt_replying_buffer} 对话错误')
return gpt_replying_buffer return gpt_replying_buffer
def make_media_input(inputs, image_paths):
image_base64_array = []
for image_path in image_paths:
path = os.path.abspath(image_path)
inputs = inputs + f'<br/><br/><div align="center"><img src="file={path}"></div>'
base64 = encode_image(path)
image_base64_array.append(base64)
return inputs, image_base64_array
def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWithCookies, def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWithCookies,
history:list=[], system_prompt:str='', stream:bool=True, additional_fn:str=None): history:list=[], system_prompt:str='', stream:bool=True, additional_fn:str=None):
from .bridge_all import model_info
# 检查API_KEY # 检查API_KEY
if get_conf("GEMINI_API_KEY") == "": if get_conf("GEMINI_API_KEY") == "":
yield from update_ui_lastest_msg(f"请配置 GEMINI_API_KEY。", chatbot=chatbot, history=history, delay=0) yield from update_ui_lastest_msg(f"请配置 GEMINI_API_KEY。", chatbot=chatbot, history=history, delay=0)
@@ -57,18 +68,17 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
from core_functional import handle_core_functionality from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
if "vision" in llm_kwargs["llm_model"]: # multimodal capacity
have_recent_file, image_paths = have_any_recent_upload_image_files(chatbot) # inspired by codes in bridge_chatgpt
if not have_recent_file: has_multimodal_capacity = model_info[llm_kwargs['llm_model']].get('has_multimodal_capacity', False)
chatbot.append((inputs, "没有检测到任何近期上传的图像文件请上传jpg格式的图片此外请注意拓展名需要小写")) if has_multimodal_capacity:
yield from update_ui(chatbot=chatbot, history=history, msg="等待图片") # 刷新界面 has_recent_image_upload, image_paths = have_any_recent_upload_image_files(chatbot, pop=True)
return else:
def make_media_input(inputs, image_paths): has_recent_image_upload, image_paths = False, []
for image_path in image_paths: if has_recent_image_upload:
inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>' inputs, image_base64_array = make_media_input(inputs, image_paths)
return inputs else:
if have_recent_file: inputs, image_base64_array = inputs, []
inputs = make_media_input(inputs, image_paths)
chatbot.append((inputs, "")) chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
@@ -76,7 +86,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
retry = 0 retry = 0
while True: while True:
try: try:
stream_response = genai.generate_chat(inputs, llm_kwargs, history, system_prompt) stream_response = genai.generate_chat(inputs, llm_kwargs, history, system_prompt, image_base64_array, has_multimodal_capacity)
break break
except Exception as e: except Exception as e:
retry += 1 retry += 1
@@ -112,7 +122,6 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
llm_kwargs = {'llm_model': 'gemini-pro'} llm_kwargs = {'llm_model': 'gemini-pro'}

View File

@@ -7,7 +7,7 @@ import os
import re import re
import requests import requests
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from toolbox import get_conf, encode_image, get_pictures_list, to_markdown_tabs from toolbox import get_conf, update_ui, encode_image, get_pictures_list, to_markdown_tabs
proxies, TIMEOUT_SECONDS = get_conf("proxies", "TIMEOUT_SECONDS") proxies, TIMEOUT_SECONDS = get_conf("proxies", "TIMEOUT_SECONDS")
@@ -112,6 +112,14 @@ def html_local_img(__file, layout="left", max_width=None, max_height=None, md=Tr
return a return a
def reverse_base64_from_input(inputs):
pattern = re.compile(r'<br/><br/><div align="center"><img[^<>]+base64="([^"]+)"></div>')
base64_strings = pattern.findall(inputs)
return base64_strings
def contain_base64(inputs):
base64_strings = reverse_base64_from_input(inputs)
return len(base64_strings) > 0
class GoogleChatInit: class GoogleChatInit:
def __init__(self, llm_kwargs): def __init__(self, llm_kwargs):
@@ -119,9 +127,9 @@ class GoogleChatInit:
endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
self.url_gemini = endpoint + "/%m:streamGenerateContent?key=%k" self.url_gemini = endpoint + "/%m:streamGenerateContent?key=%k"
def generate_chat(self, inputs, llm_kwargs, history, system_prompt): def generate_chat(self, inputs, llm_kwargs, history, system_prompt, image_base64_array:list=[], has_multimodal_capacity:bool=False):
headers, payload = self.generate_message_payload( headers, payload = self.generate_message_payload(
inputs, llm_kwargs, history, system_prompt inputs, llm_kwargs, history, system_prompt, image_base64_array, has_multimodal_capacity
) )
response = requests.post( response = requests.post(
url=self.url_gemini, url=self.url_gemini,
@@ -133,13 +141,16 @@ class GoogleChatInit:
) )
return response.iter_lines() return response.iter_lines()
def __conversation_user(self, user_input, llm_kwargs): def __conversation_user(self, user_input, llm_kwargs, enable_multimodal_capacity):
what_i_have_asked = {"role": "user", "parts": []} what_i_have_asked = {"role": "user", "parts": []}
if "vision" not in self.url_gemini: from .bridge_all import model_info
if enable_multimodal_capacity:
input_, encode_img = input_encode_handler(user_input, llm_kwargs=llm_kwargs)
else:
input_ = user_input input_ = user_input
encode_img = [] encode_img = []
else:
input_, encode_img = input_encode_handler(user_input, llm_kwargs=llm_kwargs)
what_i_have_asked["parts"].append({"text": input_}) what_i_have_asked["parts"].append({"text": input_})
if encode_img: if encode_img:
for data in encode_img: for data in encode_img:
@@ -153,12 +164,12 @@ class GoogleChatInit:
) )
return what_i_have_asked return what_i_have_asked
def __conversation_history(self, history, llm_kwargs): def __conversation_history(self, history, llm_kwargs, enable_multimodal_capacity):
messages = [] messages = []
conversation_cnt = len(history) // 2 conversation_cnt = len(history) // 2
if conversation_cnt: if conversation_cnt:
for index in range(0, 2 * conversation_cnt, 2): for index in range(0, 2 * conversation_cnt, 2):
what_i_have_asked = self.__conversation_user(history[index], llm_kwargs) what_i_have_asked = self.__conversation_user(history[index], llm_kwargs, enable_multimodal_capacity)
what_gpt_answer = { what_gpt_answer = {
"role": "model", "role": "model",
"parts": [{"text": history[index + 1]}], "parts": [{"text": history[index + 1]}],
@@ -168,7 +179,7 @@ class GoogleChatInit:
return messages return messages
def generate_message_payload( def generate_message_payload(
self, inputs, llm_kwargs, history, system_prompt self, inputs, llm_kwargs, history, system_prompt, image_base64_array:list=[], has_multimodal_capacity:bool=False
) -> Tuple[Dict, Dict]: ) -> Tuple[Dict, Dict]:
messages = [ messages = [
# {"role": "system", "parts": [{"text": system_prompt}]}, # gemini 不允许对话轮次为偶数,所以这个没有用,看后续支持吧。。。 # {"role": "system", "parts": [{"text": system_prompt}]}, # gemini 不允许对话轮次为偶数,所以这个没有用,看后续支持吧。。。
@@ -179,21 +190,29 @@ class GoogleChatInit:
"%m", llm_kwargs["llm_model"] "%m", llm_kwargs["llm_model"]
).replace("%k", get_conf("GEMINI_API_KEY")) ).replace("%k", get_conf("GEMINI_API_KEY"))
header = {"Content-Type": "application/json"} header = {"Content-Type": "application/json"}
if "vision" not in self.url_gemini: # 不是vision 才处理history
if has_multimodal_capacity:
enable_multimodal_capacity = (len(image_base64_array) > 0) or any([contain_base64(h) for h in history])
else:
enable_multimodal_capacity = False
if not enable_multimodal_capacity:
messages.extend( messages.extend(
self.__conversation_history(history, llm_kwargs) self.__conversation_history(history, llm_kwargs, enable_multimodal_capacity)
) # 处理 history ) # 处理 history
messages.append(self.__conversation_user(inputs, llm_kwargs)) # 处理用户对话
messages.append(self.__conversation_user(inputs, llm_kwargs, enable_multimodal_capacity)) # 处理用户对话
payload = { payload = {
"contents": messages, "contents": messages,
"generationConfig": { "generationConfig": {
# "maxOutputTokens": 800, # "maxOutputTokens": llm_kwargs.get("max_token", 1024),
"stopSequences": str(llm_kwargs.get("stop", "")).split(" "), "stopSequences": str(llm_kwargs.get("stop", "")).split(" "),
"temperature": llm_kwargs.get("temperature", 1), "temperature": llm_kwargs.get("temperature", 1),
"topP": llm_kwargs.get("top_p", 0.8), "topP": llm_kwargs.get("top_p", 0.8),
"topK": 10, "topK": 10,
}, },
} }
return header, payload return header, payload