disallow special token + limit num of file < 512

This commit is contained in:
505030475
2023-04-14 09:50:14 +08:00
parent a2002ebd85
commit dd648bd446
8 changed files with 10 additions and 9 deletions

View File

@@ -62,7 +62,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
def get_token_fn(txt): return len(enc.encode(txt))
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
# 第6步任务函数