Compare commits
473 Commits
version2.7
...
version2.6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c917f6e6fe | ||
|
|
97d33e9413 | ||
|
|
76940a3041 | ||
|
|
de658b3104 | ||
|
|
35a5c97eca | ||
|
|
7b44010a71 | ||
|
|
e467ab2d3f | ||
|
|
4681ba5e3c | ||
|
|
7c494be291 | ||
|
|
4a96d165ca | ||
|
|
c151adf697 | ||
|
|
095d32df84 | ||
|
|
3084c03e55 | ||
|
|
37ffbacd08 | ||
|
|
8f8b1e9d10 | ||
|
|
c84753beb9 | ||
|
|
e356029248 | ||
|
|
db6137dd27 | ||
|
|
e9b4e0c75a | ||
|
|
df246195d3 | ||
|
|
7395290aaa | ||
|
|
87d7e320d3 | ||
|
|
8afcc522df | ||
|
|
38b0ce6805 | ||
|
|
04b280be1b | ||
|
|
fe7b2d763b | ||
|
|
90dcc8f1f2 | ||
|
|
225c731589 | ||
|
|
d9a92c9016 | ||
|
|
039405c3bb | ||
|
|
f0dbf33d06 | ||
|
|
53df6d5647 | ||
|
|
78f93c6eae | ||
|
|
bc8cda92ac | ||
|
|
caf1cba1a0 | ||
|
|
8e128300e0 | ||
|
|
7c94300bba | ||
|
|
b2b058fc27 | ||
|
|
4179495ac4 | ||
|
|
501093c201 | ||
|
|
931c941450 | ||
|
|
9a2a625910 | ||
|
|
c11e7f6e92 | ||
|
|
e56e625fca | ||
|
|
93ebd4be96 | ||
|
|
ebf1055ff4 | ||
|
|
115c035ff8 | ||
|
|
6eb397ead2 | ||
|
|
f8bd5fcd27 | ||
|
|
4641d0551e | ||
|
|
a9d6c933f4 | ||
|
|
587271ec42 | ||
|
|
34a678141a | ||
|
|
f9c4fb4b5d | ||
|
|
9f505fb4ca | ||
|
|
40804913ca | ||
|
|
bb3dd97cfe | ||
|
|
8ddc1adae4 | ||
|
|
4e3f759d0c | ||
|
|
94ff62bdaa | ||
|
|
2cbb5dbdaa | ||
|
|
3b85a29f91 | ||
|
|
166daa1ea7 | ||
|
|
5c3ecd7477 | ||
|
|
d5b03377ff | ||
|
|
7cd11f2bbd | ||
|
|
f65cc8deea | ||
|
|
48ee620524 | ||
|
|
8a5be8fb8d | ||
|
|
f26b8e28e1 | ||
|
|
b005b84ad6 | ||
|
|
1edf7ef80d | ||
|
|
3fed08f65e | ||
|
|
fa8603d745 | ||
|
|
6b5c2538cf | ||
|
|
7f1c7ebd68 | ||
|
|
ff87aebc29 | ||
|
|
2c746056ff | ||
|
|
0e4cac29f8 | ||
|
|
8513d46398 | ||
|
|
b2495a6f7e | ||
|
|
5603d33d67 | ||
|
|
d06d4f3a6f | ||
|
|
b2adc77a73 | ||
|
|
1f6e2547b2 | ||
|
|
fd0e3fb5c4 | ||
|
|
a0b7ae6674 | ||
|
|
8ca232cda3 | ||
|
|
34e983c7a5 | ||
|
|
c0d096726c | ||
|
|
969e8c1d89 | ||
|
|
d4e3082db4 | ||
|
|
777e56882b | ||
|
|
4da7d75ad4 | ||
|
|
1538acaa5a | ||
|
|
b47f69978e | ||
|
|
823c136de4 | ||
|
|
cede926b54 | ||
|
|
bb036d92c6 | ||
|
|
7d96a5753c | ||
|
|
179d87d3eb | ||
|
|
e42f84a812 | ||
|
|
8c21ada50c | ||
|
|
f99a7bb6f6 | ||
|
|
1c4cdde478 | ||
|
|
7b851ff6b4 | ||
|
|
f1b50dd5f5 | ||
|
|
d936800765 | ||
|
|
b6e05f93d1 | ||
|
|
2c9bf11464 | ||
|
|
e5d014ea2f | ||
|
|
cd89a4809e | ||
|
|
fe8a1ab590 | ||
|
|
f63100939a | ||
|
|
9587808c12 | ||
|
|
527ef979dc | ||
|
|
bb0b6a2f34 | ||
|
|
19302a33b4 | ||
|
|
03cf9fda6c | ||
|
|
ab35afd399 | ||
|
|
53d20b26d9 | ||
|
|
d5c1c150d2 | ||
|
|
3eb4bbb123 | ||
|
|
f4d252ebe2 | ||
|
|
ee7494c0b7 | ||
|
|
dcdc8351e7 | ||
|
|
0aeb5b28cd | ||
|
|
1dd1720d38 | ||
|
|
19be0490af | ||
|
|
0c9e18291a | ||
|
|
9f47d0f714 | ||
|
|
7a254c150f | ||
|
|
3648648b3d | ||
|
|
1da60b7a0c | ||
|
|
c40f6f00bb | ||
|
|
a239abac50 | ||
|
|
1042d28e1f | ||
|
|
7b75422c26 | ||
|
|
99817e9040 | ||
|
|
c9fa26405d | ||
|
|
005232afa6 | ||
|
|
5b8cc5a899 | ||
|
|
a4137e7170 | ||
|
|
aaf44750d9 | ||
|
|
bd6eb90449 | ||
|
|
b5a48369a4 | ||
|
|
6b5bdbe98a | ||
|
|
69624c66d7 | ||
|
|
69be335d22 | ||
|
|
bb1e410cb4 | ||
|
|
9ccc53fa96 | ||
|
|
4b83486b3d | ||
|
|
417c8325de | ||
|
|
d51ae6abb2 | ||
|
|
fff7b8ef91 | ||
|
|
c59eb8ff9e | ||
|
|
a74f0a9343 | ||
|
|
f4905a60e2 | ||
|
|
a562849e4c | ||
|
|
6105b7f73b | ||
|
|
3486fb5c10 | ||
|
|
5f7a1a3da3 | ||
|
|
8d086ce7c0 | ||
|
|
b188c4a2b5 | ||
|
|
10cf456aa8 | ||
|
|
4043db7f33 | ||
|
|
9a192fd473 | ||
|
|
9f91fca4d2 | ||
|
|
160b001bef | ||
|
|
1d912bc10d | ||
|
|
51b3f8adca | ||
|
|
16de1812d3 | ||
|
|
641b96548a | ||
|
|
34f4ba211d | ||
|
|
19aba350a3 | ||
|
|
ab57f4bfb0 | ||
|
|
b7e0a48cd2 | ||
|
|
58b051ead3 | ||
|
|
0c7378e096 | ||
|
|
a52ae14457 | ||
|
|
ce3e9b6289 | ||
|
|
9f07531a16 | ||
|
|
2f646b3199 | ||
|
|
30b1cbd95c | ||
|
|
6c32961211 | ||
|
|
ba7c7290c1 | ||
|
|
f245f94444 | ||
|
|
e86ffad6e7 | ||
|
|
706b2604d8 | ||
|
|
e35f7a7186 | ||
|
|
7c91cfebfa | ||
|
|
9d84ddbc62 | ||
|
|
3d3ffd6de2 | ||
|
|
82038a42da | ||
|
|
0ce2d423cf | ||
|
|
4d6bdca3fc | ||
|
|
c64dbb03fd | ||
|
|
8575c82ed7 | ||
|
|
bca61754e3 | ||
|
|
f61ea1559c | ||
|
|
12c36a68ce | ||
|
|
998e127b2f | ||
|
|
c9c9449a59 | ||
|
|
722b538261 | ||
|
|
29fb436e76 | ||
|
|
6789eaee45 | ||
|
|
937823ec64 | ||
|
|
3c95299f48 | ||
|
|
639e24fc82 | ||
|
|
364810983a | ||
|
|
cb9404c4de | ||
|
|
17a18e99fa | ||
|
|
30ea77a496 | ||
|
|
01931b0bd2 | ||
|
|
6a2c7db7c1 | ||
|
|
9c15c446a6 | ||
|
|
44ed8a46ad | ||
|
|
41801c017c | ||
|
|
3e88422ab6 | ||
|
|
7c8b8b95b2 | ||
|
|
65b1d78516 | ||
|
|
e815afb792 | ||
|
|
ac681d3201 | ||
|
|
ecebdf3ab5 | ||
|
|
8aea6536e0 | ||
|
|
9c90b28bed | ||
|
|
36ef2fa884 | ||
|
|
89ca010a11 | ||
|
|
60fc2bf4c1 | ||
|
|
838c3dc881 | ||
|
|
16c59e1bf6 | ||
|
|
fb889cb4ce | ||
|
|
c91cfc64d9 | ||
|
|
dcab956cff | ||
|
|
da55ae68f6 | ||
|
|
201f53c0f0 | ||
|
|
77a98f03d0 | ||
|
|
9ee5545ad5 | ||
|
|
d37b0ce447 | ||
|
|
0abb84ae4b | ||
|
|
e6034b6928 | ||
|
|
ac9e72b9f8 | ||
|
|
cb1ac5d13d | ||
|
|
3497addc7b | ||
|
|
f17c580fd9 | ||
|
|
808e23c98a | ||
|
|
e3e4fa19a2 | ||
|
|
3cc0635628 | ||
|
|
7149f9fe90 | ||
|
|
d2f009ba8d | ||
|
|
8f4f13efd5 | ||
|
|
fefe96144f | ||
|
|
5521f0e41c | ||
|
|
2d5d719696 | ||
|
|
2b339464ee | ||
|
|
43ad798c68 | ||
|
|
a441c90436 | ||
|
|
5ca623e9c5 | ||
|
|
c74a8b04b5 | ||
|
|
34c693a571 | ||
|
|
d4cd1877f4 | ||
|
|
57a668bf30 | ||
|
|
700d14be10 | ||
|
|
3fe96956ce | ||
|
|
c358c95630 | ||
|
|
70c02a08e9 | ||
|
|
27f7153f37 | ||
|
|
eb69704c20 | ||
|
|
d616915348 | ||
|
|
5381df8f35 | ||
|
|
a7c857d4d9 | ||
|
|
1c3ce18eff | ||
|
|
a689960e31 | ||
|
|
07eca9fe55 | ||
|
|
00f29f2120 | ||
|
|
10ea3daaa5 | ||
|
|
eb95eec122 | ||
|
|
e03634f9e2 | ||
|
|
77d4628877 | ||
|
|
8c3d37bbce | ||
|
|
28f6801870 | ||
|
|
6d7fb20b5a | ||
|
|
9dc0d3273b | ||
|
|
13e976774f | ||
|
|
35d3346a1c | ||
|
|
eaec1c3728 | ||
|
|
0140b0cda8 | ||
|
|
f98fe5b9ab | ||
|
|
932c430d5d | ||
|
|
0063f8dd82 | ||
|
|
23b8c47c54 | ||
|
|
5353a32345 | ||
|
|
d569d2f9c1 | ||
|
|
67e6070d80 | ||
|
|
01873f7811 | ||
|
|
c2318bc197 | ||
|
|
86dc4ca3af | ||
|
|
4a2f73d007 | ||
|
|
9c643e6d8c | ||
|
|
34323c9d36 | ||
|
|
48cc2349e3 | ||
|
|
7d57967519 | ||
|
|
68ffa54c84 | ||
|
|
02ddcdf731 | ||
|
|
14ea206a5a | ||
|
|
814c93bb75 | ||
|
|
fb953148b9 | ||
|
|
2bcd34360a | ||
|
|
b3211362f9 | ||
|
|
be1bf6cb77 | ||
|
|
5f771d8acf | ||
|
|
095579c323 | ||
|
|
f9308300f3 | ||
|
|
6ca28fbff2 | ||
|
|
11f619f76f | ||
|
|
39d0176673 | ||
|
|
043bd59ffc | ||
|
|
2ac602e0aa | ||
|
|
50ee37f23c | ||
|
|
f0d9098df5 | ||
|
|
2401cf2136 | ||
|
|
dd3c4f988c | ||
|
|
22e7dc617b | ||
|
|
a3c937c202 | ||
|
|
1b01d0fd8a | ||
|
|
0b018bf871 | ||
|
|
88d41ab4ca | ||
|
|
6ebadeb2a6 | ||
|
|
515045a8d1 | ||
|
|
74d5061969 | ||
|
|
0b7c1c50ca | ||
|
|
f45e0d3486 | ||
|
|
881132557e | ||
|
|
6d852d76b0 | ||
|
|
b588291cdf | ||
|
|
b4e0fe39ea | ||
|
|
7295978c93 | ||
|
|
f2359e0442 | ||
|
|
ced6898daa | ||
|
|
39ad492a65 | ||
|
|
dd8ec0d511 | ||
|
|
43d59d936f | ||
|
|
4bca8b4f82 | ||
|
|
46eba1f399 | ||
|
|
5a6877f9fa | ||
|
|
2c15b51ea4 | ||
|
|
9666b9b99b | ||
|
|
baf61477d2 | ||
|
|
00e411bd68 | ||
|
|
37bcdf684d | ||
|
|
c80808a0c5 | ||
|
|
b69313884d | ||
|
|
3b6675755e | ||
|
|
3e2e4937db | ||
|
|
e6f7e75e19 | ||
|
|
232d06a1ab | ||
|
|
22db1147db | ||
|
|
eb77532df5 | ||
|
|
9ee3d1390d | ||
|
|
141df08332 | ||
|
|
248bcb7095 | ||
|
|
7c7b1cb030 | ||
|
|
290a33ea74 | ||
|
|
aabe2818e7 | ||
|
|
c3a6a09c4c | ||
|
|
8c5332748f | ||
|
|
0b0860defc | ||
|
|
18894b04f1 | ||
|
|
8e75ad8134 | ||
|
|
7c33580bf6 | ||
|
|
223e747d57 | ||
|
|
8c7be26661 | ||
|
|
a1fceeae45 | ||
|
|
4b534400ea | ||
|
|
ac9b590660 | ||
|
|
695ed5e02d | ||
|
|
c063510442 | ||
|
|
bc6d0926b1 | ||
|
|
0f20ffeff4 | ||
|
|
9299c93b17 | ||
|
|
3463a1173a | ||
|
|
74eaff4919 | ||
|
|
dd51708309 | ||
|
|
25693c362c | ||
|
|
9eb15f5e68 | ||
|
|
7e195244f1 | ||
|
|
98d97ebbc8 | ||
|
|
6ab3672cfe | ||
|
|
6f1ad29e3f | ||
|
|
61c65d90fe | ||
|
|
8965706169 | ||
|
|
a63ae898ce | ||
|
|
186f10a3e3 | ||
|
|
67ed394484 | ||
|
|
2f4c46b22c | ||
|
|
2f5c977a27 | ||
|
|
f801cfbcf6 | ||
|
|
15693f3f8f | ||
|
|
8cbac095c1 | ||
|
|
289ace03cf | ||
|
|
cefc40aff8 | ||
|
|
15dd4b7a80 | ||
|
|
5f8473badb | ||
|
|
a8cfe2f113 | ||
|
|
27e056ea35 | ||
|
|
278f21fde0 | ||
|
|
121f288878 | ||
|
|
5a6b46f0b5 | ||
|
|
22766d3c12 | ||
|
|
4c72e6cc80 | ||
|
|
38705af90b | ||
|
|
35e15aab8c | ||
|
|
1cad53f641 | ||
|
|
818690037a | ||
|
|
8053f696d5 | ||
|
|
0b1afab5dd | ||
|
|
aa87d031f5 | ||
|
|
ba2dd3d0ff | ||
|
|
c19419e1fb | ||
|
|
031232c986 | ||
|
|
ea4cd5a75a | ||
|
|
3fb519bf83 | ||
|
|
1e4bcc0757 | ||
|
|
44b40ff726 | ||
|
|
791dbf8592 | ||
|
|
82fe0f758d | ||
|
|
8b1def3425 | ||
|
|
8e85e83cec | ||
|
|
ecdeda8e92 | ||
|
|
831009f027 | ||
|
|
c6da02422b | ||
|
|
2ad73e9ec3 | ||
|
|
a0f73a61ea | ||
|
|
1b6d611888 | ||
|
|
47d9ea4921 | ||
|
|
b1e33b0f7a | ||
|
|
1e125ca05f | ||
|
|
16253ed53c | ||
|
|
8beb75762c | ||
|
|
8da4a0af45 | ||
|
|
371bef6e1a | ||
|
|
2aaa836d81 | ||
|
|
14d53a288f | ||
|
|
b1d6e711c0 | ||
|
|
32b005199d | ||
|
|
a8886562a1 | ||
|
|
a60d881268 | ||
|
|
b9967686a1 | ||
|
|
dd3b3524bd | ||
|
|
a880819ffd | ||
|
|
b8d802a922 | ||
|
|
01aafd4df2 | ||
|
|
7b3b6f5241 | ||
|
|
7a50af1897 | ||
|
|
5dc406c641 | ||
|
|
3103817990 | ||
|
|
88aaf310c4 | ||
|
|
e8d86a3242 | ||
|
|
37fd1b1c97 | ||
|
|
e3e313beab | ||
|
|
380a8a9d4d | ||
|
|
d00f6bb1a6 | ||
|
|
f8a44a82a9 | ||
|
|
2be45c386d | ||
|
|
2b7e8b9278 | ||
|
|
8590014462 | ||
|
|
8215caddb2 | ||
|
|
93a9d27b1e | ||
|
|
e128523103 | ||
|
|
da0caab4cf | ||
|
|
6e9813565a | ||
|
|
01cc409f99 | ||
|
|
3438f8f291 |
12
.github/ISSUE_TEMPLATE/bug_report.md
vendored
12
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -7,17 +7,11 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
- **(1) Describe the bug 简述**
|
||||
**Describe the bug 简述**
|
||||
|
||||
**Screen Shot 截图**
|
||||
|
||||
- **(2) Screen Shot 截图**
|
||||
|
||||
|
||||
- **(3) Terminal Traceback 终端traceback(如有)**
|
||||
|
||||
|
||||
- **(4) Material to Help Reproduce Bugs 帮助我们复现的测试材料样本(如有)**
|
||||
|
||||
**Terminal Traceback 终端traceback(如果有)**
|
||||
|
||||
|
||||
Before submitting an issue 提交issue之前:
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -55,7 +55,6 @@ coverage.xml
|
||||
*.pot
|
||||
github
|
||||
.github
|
||||
.idea/
|
||||
TEMP
|
||||
TRASH
|
||||
|
||||
@@ -133,7 +132,6 @@ dmypy.json
|
||||
.pyre/
|
||||
|
||||
.vscode
|
||||
.idea
|
||||
|
||||
history
|
||||
ssr_conf
|
||||
|
||||
88
README.md
88
README.md
@@ -1,15 +1,23 @@
|
||||
# ChatGPT 学术优化
|
||||
|
||||
**如果喜欢这个项目,请给它一个Star;如果你发明了更好用的快捷键或函数插件,欢迎发issue或者pull requests**
|
||||
|
||||
If you like this project, please give it a Star. If you've come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request. We also have a [README in English](img/README_EN.md) translated by this project itself.
|
||||
|
||||
> **Note**
|
||||
>
|
||||
> 1.请注意只有**红颜色**标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR!
|
||||
> 紧急:很抱歉2.60版本的一部分代码重构出错,目前2.67及以上版本已经解决,请您及时更新。
|
||||
>
|
||||
|
||||
# ChatGPT 学术优化
|
||||
|
||||
**如果喜欢这个项目,请给它一个Star;如果你发明了更好用的快捷键或函数插件,欢迎发issue或者pull requests(dev分支)**
|
||||
|
||||
If you like this project, please give it a Star. If you've come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request (to `dev` branch).
|
||||
|
||||
> **Note**
|
||||
>
|
||||
> 1.请注意只有“红颜色”标识的函数插件(按钮)才支持读取文件。目前对pdf/word格式文件的支持插件正在逐步完善中,需要更多developer的帮助。
|
||||
>
|
||||
> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。
|
||||
>
|
||||
> 3.如果您不太习惯部分中文命名的函数、注释或者界面,您可以随时点击相关函数插件,调用ChatGPT一键生成纯英文的项目源代码。
|
||||
>
|
||||
|
||||
|
||||
<div align="center">
|
||||
@@ -22,40 +30,46 @@ If you like this project, please give it a Star. If you've come up with more use
|
||||
[自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
|
||||
[配置代理服务器](https://www.bilibili.com/video/BV1rc411W7Dr) | 支持配置代理服务器
|
||||
模块化设计 | 支持自定义高阶的实验性功能与[函数插件],插件支持[热更新](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
|
||||
[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码
|
||||
[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树
|
||||
[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键读懂本项目的源代码
|
||||
[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java项目树
|
||||
读论文 | [函数插件] 一键解读latex论文全文并生成摘要
|
||||
Latex全文翻译、润色 | [函数插件] 一键翻译或润色latex论文
|
||||
批量注释生成 | [函数插件] 一键批量生成函数注释
|
||||
chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
|
||||
[arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
|
||||
[PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程)
|
||||
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你选择有趣的文章
|
||||
公式/图片/表格显示 | 可以同时显示公式的tex形式和渲染形式,支持公式、代码高亮
|
||||
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) (Version>=2.45) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你选择有趣的文章
|
||||
公式显示 | 可以同时显示公式的tex形式和渲染形式
|
||||
图片显示 | 可以在markdown中显示图片
|
||||
多线程函数插件支持 | 支持多线调用chatgpt,一键处理海量文本或程序
|
||||
支持GPT输出的markdown表格 | 可以输出支持GPT的markdown表格
|
||||
启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__dark-theme=true```可以切换dark主题
|
||||
[多LLM模型](https://www.bilibili.com/video/BV1EM411K7VH/)支持([v3.1分支](https://github.com/binary-husky/chatgpt_academic/tree/v3.1)) | 同时被ChatGPT和[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)伺候的感觉一定会很不错吧?
|
||||
兼容[TGUI](https://github.com/oobabooga/text-generation-webui)接入更多样的语言模型 | 接入opt-1.3b, galactica-1.3b等模型([v3.1分支](https://github.com/binary-husky/chatgpt_academic/tree/v3.0)测试中)
|
||||
huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/gpt-academic) | 登陆huggingface后复制[此空间](https://huggingface.co/spaces/qingxu98/gpt-academic)
|
||||
…… | ……
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
- 新界面(修改config.py中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
|
||||
<!-- - 新界面(左:master主分支, 右:dev开发前沿) -->
|
||||
- 新界面
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230361456-61078362-a966-4eb5-b49e-3c62ef18b860.gif" width="700" >
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
- 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放粘贴板
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
|
||||
<img src="img/公式.gif" width="700" >
|
||||
</div>
|
||||
|
||||
- 润色/纠错
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
|
||||
<img src="img/润色.gif" width="700" >
|
||||
</div>
|
||||
|
||||
|
||||
- 支持GPT输出的markdown表格
|
||||
<div align="center">
|
||||
<img src="img/demo2.jpg" width="500" >
|
||||
</div>
|
||||
|
||||
- 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读
|
||||
@@ -63,20 +77,13 @@ huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
- 懒得看项目代码?整个工程直接给chatgpt炫嘴里
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
|
||||
</div>
|
||||
|
||||
- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + [API2D](https://api2d.com/)-GPT4, [v3.1分支](https://github.com/binary-husky/chatgpt_academic/tree/v3.1)测试中)
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
|
||||
</div>
|
||||
|
||||
v3.1的[huggingface测试版](https://huggingface.co/spaces/qingxu98/academic-chatgpt-beta)(huggingface版不支持chatglm)
|
||||
|
||||
|
||||
|
||||
## 直接运行 (Windows, Linux or MacOS)
|
||||
|
||||
### 1. 下载项目
|
||||
@@ -158,15 +165,15 @@ input区域 输入 ./crazy_functions/test_project/python/dqn , 然后点击 "[
|
||||
```
|
||||
|
||||
## 其他部署方式
|
||||
|
||||
- 远程云服务器部署
|
||||
请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
|
||||
|
||||
- 使用WSL2(Windows Subsystem for Linux 子系统)
|
||||
请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
|
||||
|
||||
- nginx远程部署
|
||||
请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E7%9A%84%E6%8C%87%E5%AF%BC)
|
||||
|
||||
|
||||
## 自定义新的便捷按钮(学术快捷键自定义)
|
||||
任意文本编辑器打开`core_functional.py`,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。)
|
||||
打开functional.py,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。)
|
||||
例如
|
||||
```
|
||||
"超级英译中": {
|
||||
@@ -184,7 +191,7 @@ input区域 输入 ./crazy_functions/test_project/python/dqn , 然后点击 "[
|
||||
</div>
|
||||
|
||||
|
||||
如果你发明了更好用的快捷键,欢迎发issue或者pull requests!
|
||||
如果你发明了更好用的学术快捷键,欢迎发issue或者pull requests!
|
||||
|
||||
## 配置代理
|
||||
### 方法一:常规方法
|
||||
@@ -202,7 +209,7 @@ python check_proxy.py
|
||||
### 方法二:纯新手教程
|
||||
[纯新手教程](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BB%A3%E7%90%86%E8%BD%AF%E4%BB%B6%E9%97%AE%E9%A2%98%E7%9A%84%E6%96%B0%E6%89%8B%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95%EF%BC%88%E6%96%B9%E6%B3%95%E5%8F%AA%E9%80%82%E7%94%A8%E4%BA%8E%E6%96%B0%E6%89%8B%EF%BC%89)
|
||||
|
||||
## 功能测试
|
||||
## 兼容性测试
|
||||
|
||||
### 图片显示:
|
||||
|
||||
@@ -257,9 +264,13 @@ python check_proxy.py
|
||||
|
||||
## Todo 与 版本规划:
|
||||
|
||||
- version 3.0 (Todo): 优化对chatglm和其他小型llm的支持
|
||||
- version 2.6: 重构了插件结构,提高了交互性,加入更多插件
|
||||
- version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
|
||||
- version 3 (Todo):
|
||||
- - 支持gpt4和其他更多llm
|
||||
- version 2.4+ (Todo):
|
||||
- - 总结大工程源代码时文本过长、token溢出的问题
|
||||
- - 实现项目打包部署
|
||||
- - 函数插件参数接口优化
|
||||
- - 自更新
|
||||
- version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
|
||||
- version 2.3: 增强多线程交互性
|
||||
- version 2.2: 函数插件支持热重载
|
||||
@@ -276,7 +287,8 @@ python check_proxy.py
|
||||
# 借鉴项目1:借鉴了ChuanhuChatGPT中读取OpenAI json的方法、记录历史问询记录的方法以及gradio queue的使用技巧
|
||||
https://github.com/GaiZhenbiao/ChuanhuChatGPT
|
||||
|
||||
# 借鉴项目2:
|
||||
https://github.com/THUDM/ChatGLM-6B
|
||||
# 借鉴项目2:借鉴了mdtex2html中公式处理的方法
|
||||
https://github.com/polarwinkel/mdtex2html
|
||||
|
||||
|
||||
```
|
||||
|
||||
@@ -19,11 +19,6 @@ if USE_PROXY:
|
||||
else:
|
||||
proxies = None
|
||||
|
||||
# 多线程函数插件中,默认允许多少路线程同时访问OpenAI。
|
||||
# Free trial users的限制是每分钟3次,Pay-as-you-go users的限制是每分钟3500次。提高限制请查询:
|
||||
# https://platform.openai.com/docs/guides/rate-limits/overview
|
||||
DEFAULT_WORKER_NUM = 3
|
||||
|
||||
|
||||
# [step 3]>> 以下配置可以优化体验,但大部分场合下并不需要修改
|
||||
# 对话窗的高度
|
||||
|
||||
@@ -16,8 +16,7 @@ def get_crazy_functions():
|
||||
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
||||
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
||||
from crazy_functions.Latex全文润色 import Latex英文润色
|
||||
from crazy_functions.解析项目源代码 import 解析一个Lua项目
|
||||
from crazy_functions.解析项目源代码 import 解析一个CSharp项目
|
||||
|
||||
function_plugins = {
|
||||
|
||||
"解析整个Python项目": {
|
||||
@@ -48,16 +47,6 @@ def get_crazy_functions():
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(解析一个Rect项目)
|
||||
},
|
||||
"解析整个Lua项目": {
|
||||
"Color": "stop", # 按钮颜色
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(解析一个Lua项目)
|
||||
},
|
||||
"解析整个CSharp项目": {
|
||||
"Color": "stop", # 按钮颜色
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(解析一个CSharp项目)
|
||||
},
|
||||
"读Tex论文写摘要": {
|
||||
"Color": "stop", # 按钮颜色
|
||||
"Function": HotReload(读文章写摘要)
|
||||
@@ -87,12 +76,11 @@ def get_crazy_functions():
|
||||
from crazy_functions.总结word文档 import 总结word文档
|
||||
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
|
||||
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
|
||||
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
|
||||
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
||||
from crazy_functions.Latex全文润色 import Latex中文润色
|
||||
from crazy_functions.Latex全文翻译 import Latex中译英
|
||||
from crazy_functions.Latex全文翻译 import Latex英译中
|
||||
from crazy_functions.批量Markdown翻译 import Markdown中译英
|
||||
from crazy_functions.批量Markdown翻译 import Markdown英译中
|
||||
|
||||
function_plugins.update({
|
||||
"批量翻译PDF文档(多线程)": {
|
||||
@@ -120,7 +108,12 @@ def get_crazy_functions():
|
||||
"Color": "stop",
|
||||
"Function": HotReload(总结word文档)
|
||||
},
|
||||
"理解PDF文档内容 (模仿ChatPDF)": {
|
||||
# "[测试功能] 理解PDF文档内容(Tk文件选择接口,仅本地)": {
|
||||
# # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
# "AsButton": False, # 加入下拉菜单中
|
||||
# "Function": HotReload(理解PDF文档内容)
|
||||
# },
|
||||
"[测试功能] 理解PDF文档内容(通用接口,读取文件输入区)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
@@ -138,6 +131,7 @@ def get_crazy_functions():
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(Latex中文润色)
|
||||
},
|
||||
|
||||
"[测试功能] Latex项目全文中译英(输入路径或上传压缩包)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Color": "stop",
|
||||
@@ -150,18 +144,7 @@ def get_crazy_functions():
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(Latex英译中)
|
||||
},
|
||||
"[测试功能] 批量Markdown中译英(输入路径或上传压缩包)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(Markdown中译英)
|
||||
},
|
||||
"[测试功能] 批量Markdown英译中(输入路径或上传压缩包)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(Markdown英译中)
|
||||
},
|
||||
|
||||
|
||||
})
|
||||
|
||||
@@ -180,7 +163,5 @@ def get_crazy_functions():
|
||||
except Exception as err:
|
||||
print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
|
||||
|
||||
|
||||
|
||||
###################### 第n组插件 ###########################
|
||||
return function_plugins
|
||||
|
||||
@@ -14,7 +14,7 @@ class PaperFileGroup():
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
def get_token_num(txt): return len(enc.encode(txt))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
@@ -45,7 +45,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
pfg = PaperFileGroup()
|
||||
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
# 定义注释的正则表达式
|
||||
comment_pattern = r'%.*'
|
||||
@@ -92,7 +92,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # 并行任务数量限制,最多同时执行5个,其他的排队等待
|
||||
max_workers=10, # OpenAI所允许的最大并行过载
|
||||
scroller_max_len = 80
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ class PaperFileGroup():
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
def get_token_num(txt): return len(enc.encode(txt))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
@@ -44,7 +44,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
pfg = PaperFileGroup()
|
||||
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
# 定义注释的正则表达式
|
||||
comment_pattern = r'%.*'
|
||||
@@ -80,7 +80,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
elif language == 'zh->en':
|
||||
inputs_array = [f"Below is a section from a Chinese academic paper, translate it into English, do not modify any latex command such as \section, \cite and equations:" +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
|
||||
inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
|
||||
sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
@@ -90,7 +90,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # OpenAI所允许的最大并行过载
|
||||
max_workers=10, # OpenAI所允许的最大并行过载
|
||||
scroller_max_len = 80
|
||||
)
|
||||
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import traceback
|
||||
from toolbox import update_ui, get_conf
|
||||
from toolbox import update_ui
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit):
|
||||
import tiktoken
|
||||
import numpy as np
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
def get_token_num(txt): return len(enc.encode(txt))
|
||||
|
||||
mode = 'input-and-history'
|
||||
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
||||
@@ -22,7 +23,7 @@ def input_clipping(inputs, history, max_token_limit):
|
||||
|
||||
while n_token > max_token_limit:
|
||||
where = np.argmax(everything_token)
|
||||
encoded = enc.encode(everything[where], disallowed_special=())
|
||||
encoded = enc.encode(everything[where])
|
||||
clipped_encoded = encoded[:len(encoded)-delta]
|
||||
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
|
||||
everything_token[where] = get_token_num(everything[where])
|
||||
@@ -64,6 +65,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
||||
# 用户反馈
|
||||
chatbot.append([inputs_show_user, ""])
|
||||
msg = '正常'
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
executor = ThreadPoolExecutor(max_workers=16)
|
||||
mutable = ["", time.time()]
|
||||
@@ -71,9 +73,6 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
retry_op = retry_times_at_unknown_error
|
||||
exceeded_cnt = 0
|
||||
while True:
|
||||
# watchdog error
|
||||
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
|
||||
raise RuntimeError("检测到程序终止。")
|
||||
try:
|
||||
# 【第一种情况】:顺利完成
|
||||
result = predict_no_ui_long_connection(
|
||||
@@ -100,20 +99,16 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
except:
|
||||
# 【第三种情况】:其他错误:重试几次
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
print(tb_str)
|
||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if retry_op > 0:
|
||||
retry_op -= 1
|
||||
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
|
||||
if "Rate limit reached" in tb_str:
|
||||
time.sleep(30)
|
||||
mutable[0] += f"[Local Message] 重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
|
||||
time.sleep(5)
|
||||
continue # 返回重试
|
||||
else:
|
||||
time.sleep(5)
|
||||
return mutable[0] # 放弃
|
||||
|
||||
# 提交任务
|
||||
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
|
||||
while True:
|
||||
# yield一次以刷新前端页面
|
||||
@@ -134,7 +129,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array, inputs_show_user_array, llm_kwargs,
|
||||
chatbot, history_array, sys_prompt_array,
|
||||
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
||||
refresh_interval=0.2, max_workers=10, scroller_max_len=30,
|
||||
handle_token_exceed=True, show_user_at_complete=False,
|
||||
retry_times_at_unknown_error=2,
|
||||
):
|
||||
@@ -155,7 +150,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
|
||||
sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
|
||||
refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
|
||||
max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
|
||||
max_workers (int, optional): Maximum number of threads (default: 10) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
|
||||
scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
|
||||
handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
|
||||
handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
|
||||
@@ -170,28 +165,21 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
||||
assert len(inputs_array) == len(history_array)
|
||||
assert len(inputs_array) == len(sys_prompt_array)
|
||||
if max_workers == -1: # 读取配置文件
|
||||
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
||||
except: max_workers = 8
|
||||
if max_workers <= 0 or max_workers >= 20: max_workers = 8
|
||||
executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
n_frag = len(inputs_array)
|
||||
# 用户反馈
|
||||
chatbot.append(["请开始多线程操作。", ""])
|
||||
msg = '正常'
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
# 跨线程传递
|
||||
# 异步原子
|
||||
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
||||
|
||||
# 子线程任务
|
||||
def _req_gpt(index, inputs, history, sys_prompt):
|
||||
gpt_say = ""
|
||||
retry_op = retry_times_at_unknown_error
|
||||
exceeded_cnt = 0
|
||||
mutable[index][2] = "执行中"
|
||||
while True:
|
||||
# watchdog error
|
||||
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
|
||||
raise RuntimeError("检测到程序终止。")
|
||||
try:
|
||||
# 【第一种情况】:顺利完成
|
||||
# time.sleep(10); raise RuntimeError("测试")
|
||||
@@ -224,21 +212,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
except:
|
||||
# 【第三种情况】:其他错误
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
print(tb_str)
|
||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||
if retry_op > 0:
|
||||
retry_op -= 1
|
||||
wait = random.randint(5, 20)
|
||||
if "Rate limit reached" in tb_str:
|
||||
wait = wait * 3
|
||||
fail_info = "OpenAI请求速率限制 "
|
||||
else:
|
||||
fail_info = ""
|
||||
# 也许等待十几秒后,情况会好转
|
||||
for i in range(wait):
|
||||
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
|
||||
# 开始重试
|
||||
for i in range(wait):# 也许等待十几秒后,情况会好转
|
||||
mutable[index][2] = f"等待重试 {wait-i}"; time.sleep(1)
|
||||
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
||||
continue # 返回重试
|
||||
else:
|
||||
@@ -261,6 +241,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
break
|
||||
# 更好的UI视觉效果
|
||||
observe_win = []
|
||||
# print([mutable[thread_index][2] for thread_index, _ in enumerate(worker_done)])
|
||||
# 每个线程都要“喂狗”(看门狗)
|
||||
for thread_index, _ in enumerate(worker_done):
|
||||
mutable[thread_index][1] = time.time()
|
||||
@@ -270,30 +251,49 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
replace('\n', '').replace('```', '...').replace(
|
||||
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
||||
observe_win.append(print_something_really_funny)
|
||||
# 在前端打印些好玩的东西
|
||||
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
||||
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
||||
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
||||
# 在前端打印些好玩的东西
|
||||
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
||||
msg = "正常"
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
|
||||
# 异步任务结束
|
||||
gpt_response_collection = []
|
||||
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
||||
gpt_res = f.result()
|
||||
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
||||
|
||||
# 是否在结束时,在界面上显示结果
|
||||
if show_user_at_complete:
|
||||
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
||||
gpt_res = f.result()
|
||||
chatbot.append([inputs_show_user, gpt_res])
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
time.sleep(0.3)
|
||||
time.sleep(1)
|
||||
return gpt_response_collection
|
||||
|
||||
|
||||
def WithRetry(f):
|
||||
"""
|
||||
装饰器函数,用于自动重试。
|
||||
"""
|
||||
def decorated(retry, res_when_fail, *args, **kwargs):
|
||||
assert retry >= 0
|
||||
while True:
|
||||
try:
|
||||
res = yield from f(*args, **kwargs)
|
||||
return res
|
||||
except:
|
||||
retry -= 1
|
||||
if retry<0:
|
||||
print("达到最大重试次数")
|
||||
break
|
||||
else:
|
||||
print("重试中……")
|
||||
continue
|
||||
return res_when_fail
|
||||
return decorated
|
||||
|
||||
|
||||
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
||||
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
||||
if get_token_fn(txt_tocut) <= limit:
|
||||
@@ -312,6 +312,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
||||
if get_token_fn(prev) < limit:
|
||||
break
|
||||
if cnt == 0:
|
||||
print('what the fuck ?')
|
||||
raise RuntimeError("存在一行极长的文本!")
|
||||
# print(len(post))
|
||||
# 列表递归接龙
|
||||
@@ -324,18 +325,8 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
||||
return cut(txt, must_break_at_empty_line=False)
|
||||
|
||||
|
||||
def force_breakdown(txt, limit, get_token_fn):
|
||||
"""
|
||||
当无法用标点、空行分割时,我们用最暴力的方法切割
|
||||
"""
|
||||
for i in reversed(range(len(txt))):
|
||||
if get_token_fn(txt[:i]) < limit:
|
||||
return txt[:i], txt[i:]
|
||||
return "Tiktoken未知错误", "Tiktoken未知错误"
|
||||
|
||||
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
||||
# 递归
|
||||
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
|
||||
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
||||
if get_token_fn(txt_tocut) <= limit:
|
||||
return [txt_tocut]
|
||||
else:
|
||||
@@ -347,216 +338,25 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
||||
if must_break_at_empty_line:
|
||||
if lines[cnt] != "":
|
||||
continue
|
||||
print(cnt)
|
||||
prev = "\n".join(lines[:cnt])
|
||||
post = "\n".join(lines[cnt:])
|
||||
if get_token_fn(prev) < limit:
|
||||
break
|
||||
if cnt == 0:
|
||||
if break_anyway:
|
||||
prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
|
||||
else:
|
||||
raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
|
||||
# print('what the fuck ? 存在一行极长的文本!')
|
||||
raise RuntimeError("存在一行极长的文本!")
|
||||
# print(len(post))
|
||||
# 列表递归接龙
|
||||
result = [prev]
|
||||
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
|
||||
result.extend(cut(post, must_break_at_empty_line))
|
||||
return result
|
||||
try:
|
||||
# 第1次尝试,将双空行(\n\n)作为切分点
|
||||
return cut(txt, must_break_at_empty_line=True)
|
||||
except RuntimeError:
|
||||
try:
|
||||
# 第2次尝试,将单空行(\n)作为切分点
|
||||
return cut(txt, must_break_at_empty_line=False)
|
||||
except RuntimeError:
|
||||
try:
|
||||
# 第3次尝试,将英文句号(.)作为切分点
|
||||
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
||||
# 这个中文的句号是故意的,作为一个标识而存在
|
||||
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
|
||||
return [r.replace('。\n', '.') for r in res]
|
||||
except RuntimeError as e:
|
||||
try:
|
||||
# 第4次尝试,将中文句号(。)作为切分点
|
||||
res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False)
|
||||
return [r.replace('。。\n', '。') for r in res]
|
||||
except RuntimeError as e:
|
||||
# 第5次尝试,没办法了,随便切一下敷衍吧
|
||||
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
|
||||
|
||||
|
||||
|
||||
def read_and_clean_pdf_text(fp):
|
||||
"""
|
||||
这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
|
||||
|
||||
**输入参数说明**
|
||||
- `fp`:需要读取和清理文本的pdf文件路径
|
||||
|
||||
**输出参数说明**
|
||||
- `meta_txt`:清理后的文本内容字符串
|
||||
- `page_one_meta`:第一页清理后的文本内容列表
|
||||
|
||||
**函数功能**
|
||||
读取pdf文件并清理其中的文本内容,清理规则包括:
|
||||
- 提取所有块元的文本信息,并合并为一个字符串
|
||||
- 去除短块(字符数小于100)并替换为回车符
|
||||
- 清理多余的空行
|
||||
- 合并小写字母开头的段落块并替换为空格
|
||||
- 清除重复的换行
|
||||
- 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
|
||||
"""
|
||||
import fitz, copy
|
||||
import re
|
||||
import numpy as np
|
||||
from colorful import print亮黄, print亮绿
|
||||
fc = 0 # Index 0 文本
|
||||
fs = 1 # Index 1 字体
|
||||
fb = 2 # Index 2 框框
|
||||
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
|
||||
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
|
||||
def primary_ffsize(l):
|
||||
"""
|
||||
提取文本块主字体
|
||||
"""
|
||||
fsize_statiscs = {}
|
||||
for wtf in l['spans']:
|
||||
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
||||
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
||||
return max(fsize_statiscs, key=fsize_statiscs.get)
|
||||
|
||||
def ffsize_same(a,b):
|
||||
"""
|
||||
提取字体大小是否近似相等
|
||||
"""
|
||||
return abs((a-b)/max(a,b)) < 0.02
|
||||
|
||||
with fitz.open(fp) as doc:
|
||||
meta_txt = []
|
||||
meta_font = []
|
||||
|
||||
meta_line = []
|
||||
meta_span = []
|
||||
############################## <第 1 步,搜集初始信息> ##################################
|
||||
for index, page in enumerate(doc):
|
||||
# file_content += page.get_text()
|
||||
text_areas = page.get_text("dict") # 获取页面上的文本信息
|
||||
for t in text_areas['blocks']:
|
||||
if 'lines' in t:
|
||||
pf = 998
|
||||
for l in t['lines']:
|
||||
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
||||
if len(txt_line) == 0: continue
|
||||
pf = primary_ffsize(l)
|
||||
meta_line.append([txt_line, pf, l['bbox'], l])
|
||||
for wtf in l['spans']: # for l in t['lines']:
|
||||
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
|
||||
# meta_line.append(["NEW_BLOCK", pf])
|
||||
# 块元提取 for each word segment with in line for each line cross-line words for each block
|
||||
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
||||
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
||||
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
||||
if index == 0:
|
||||
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
||||
|
||||
############################## <第 2 步,获取正文主字体> ##################################
|
||||
fsize_statiscs = {}
|
||||
for span in meta_span:
|
||||
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
|
||||
fsize_statiscs[span[1]] += span[2]
|
||||
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
|
||||
if REMOVE_FOOT_NOTE:
|
||||
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
|
||||
|
||||
############################## <第 3 步,切分和重新整合> ##################################
|
||||
mega_sec = []
|
||||
sec = []
|
||||
for index, line in enumerate(meta_line):
|
||||
if index == 0:
|
||||
sec.append(line[fc])
|
||||
continue
|
||||
if REMOVE_FOOT_NOTE:
|
||||
if meta_line[index][fs] <= give_up_fize_threshold:
|
||||
continue
|
||||
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
|
||||
# 尝试识别段落
|
||||
if meta_line[index][fc].endswith('.') and\
|
||||
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
|
||||
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
|
||||
sec[-1] += line[fc]
|
||||
sec[-1] += "\n\n"
|
||||
else:
|
||||
sec[-1] += " "
|
||||
sec[-1] += line[fc]
|
||||
else:
|
||||
if (index+1 < len(meta_line)) and \
|
||||
meta_line[index][fs] > main_fsize:
|
||||
# 单行 + 字体大
|
||||
mega_sec.append(copy.deepcopy(sec))
|
||||
sec = []
|
||||
sec.append("# " + line[fc])
|
||||
else:
|
||||
# 尝试识别section
|
||||
if meta_line[index-1][fs] > meta_line[index][fs]:
|
||||
sec.append("\n" + line[fc])
|
||||
else:
|
||||
sec.append(line[fc])
|
||||
mega_sec.append(copy.deepcopy(sec))
|
||||
|
||||
finals = []
|
||||
for ms in mega_sec:
|
||||
final = " ".join(ms)
|
||||
final = final.replace('- ', ' ')
|
||||
finals.append(final)
|
||||
meta_txt = finals
|
||||
|
||||
############################## <第 4 步,乱七八糟的后处理> ##################################
|
||||
def 把字符太少的块清除为回车(meta_txt):
|
||||
for index, block_txt in enumerate(meta_txt):
|
||||
if len(block_txt) < 100:
|
||||
meta_txt[index] = '\n'
|
||||
return meta_txt
|
||||
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
||||
|
||||
def 清理多余的空行(meta_txt):
|
||||
for index in reversed(range(1, len(meta_txt))):
|
||||
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
|
||||
meta_txt.pop(index)
|
||||
return meta_txt
|
||||
meta_txt = 清理多余的空行(meta_txt)
|
||||
|
||||
def 合并小写开头的段落块(meta_txt):
|
||||
def starts_with_lowercase_word(s):
|
||||
pattern = r"^[a-z]+"
|
||||
match = re.match(pattern, s)
|
||||
if match:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
for _ in range(100):
|
||||
for index, block_txt in enumerate(meta_txt):
|
||||
if starts_with_lowercase_word(block_txt):
|
||||
if meta_txt[index-1] != '\n':
|
||||
meta_txt[index-1] += ' '
|
||||
else:
|
||||
meta_txt[index-1] = ''
|
||||
meta_txt[index-1] += meta_txt[index]
|
||||
meta_txt[index] = '\n'
|
||||
return meta_txt
|
||||
meta_txt = 合并小写开头的段落块(meta_txt)
|
||||
meta_txt = 清理多余的空行(meta_txt)
|
||||
|
||||
meta_txt = '\n'.join(meta_txt)
|
||||
# 清除重复的换行
|
||||
for _ in range(5):
|
||||
meta_txt = meta_txt.replace('\n\n', '\n')
|
||||
|
||||
# 换行 -> 双换行
|
||||
meta_txt = meta_txt.replace('\n', '\n\n')
|
||||
|
||||
############################## <第 5 步,展示分割效果> ##################################
|
||||
# for f in finals:
|
||||
# print亮黄(f)
|
||||
# print亮绿('***************************')
|
||||
|
||||
return meta_txt, page_one_meta
|
||||
|
||||
@@ -49,7 +49,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
# 第4步:随便显示点什么防止卡顿的感觉
|
||||
for index, fp in enumerate(file_manifest):
|
||||
# if 'test_project' in fp: continue
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}'
|
||||
i_say_show_user_buffer.append(i_say_show_user)
|
||||
@@ -62,7 +62,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
def get_token_fn(txt): return len(enc.encode(txt))
|
||||
|
||||
|
||||
# 第6步:任务函数
|
||||
@@ -72,7 +72,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
if index > 10:
|
||||
time.sleep(60)
|
||||
print('Openai 限制免费用户每分钟20次请求,降低请求频率中。')
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```'
|
||||
try:
|
||||
|
||||
@@ -1,162 +0,0 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
fast_debug = False
|
||||
|
||||
class PaperFileGroup():
|
||||
def __init__(self):
|
||||
self.file_paths = []
|
||||
self.file_contents = []
|
||||
self.sp_file_contents = []
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
"""
|
||||
将长文本分离开来
|
||||
"""
|
||||
for index, file_content in enumerate(self.file_contents):
|
||||
if self.get_token_num(file_content) < max_token_limit:
|
||||
self.sp_file_contents.append(file_content)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.md")
|
||||
|
||||
print('Segmentation: done')
|
||||
|
||||
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
|
||||
import time, os, re
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
# <-------- 读取Markdown文件,删除其中的所有注释 ---------->
|
||||
pfg = PaperFileGroup()
|
||||
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
file_content = f.read()
|
||||
# 记录删除注释后的文本
|
||||
pfg.file_paths.append(fp)
|
||||
pfg.file_contents.append(file_content)
|
||||
|
||||
# <-------- 拆分过长的Markdown文件 ---------->
|
||||
pfg.run_file_split(max_token_limit=2048)
|
||||
n_split = len(pfg.sp_file_contents)
|
||||
|
||||
# <-------- 多线程润色开始 ---------->
|
||||
if language == 'en->zh':
|
||||
inputs_array = ["This is a Markdown file, translate it into Chinese, do not modify any existing Markdown commands:" +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
|
||||
sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
|
||||
elif language == 'zh->en':
|
||||
inputs_array = [f"This is a Markdown file, translate it into English, do not modify any existing Markdown commands:" +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
|
||||
sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # OpenAI所允许的最大并行过载
|
||||
scroller_max_len = 80
|
||||
)
|
||||
|
||||
# <-------- 整理结果,退出 ---------->
|
||||
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
|
||||
res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
|
||||
history = gpt_response_collection
|
||||
chatbot.append((f"{fp}完成了吗?", res))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"对整个Markdown项目进行翻译。函数插件贡献者: Binary-Husky"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import tiktoken
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"对整个Markdown项目进行翻译。函数插件贡献者: Binary-Husky"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import tiktoken
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
if txt.endswith('.md'):
|
||||
file_manifest = [txt]
|
||||
else:
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
|
||||
@@ -68,7 +68,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
|
||||
print('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
if ".tex" in fp:
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
if ".pdf" in fp.lower():
|
||||
file_content = readPdf(fp)
|
||||
|
||||
@@ -2,9 +2,174 @@ from toolbox import CatchException, report_execption, write_results_to_file
|
||||
from toolbox import update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from colorful import *
|
||||
|
||||
def read_and_clean_pdf_text(fp):
|
||||
"""
|
||||
这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好,不建议任何人去读这个函数
|
||||
|
||||
**输入参数说明**
|
||||
- `fp`:需要读取和清理文本的pdf文件路径
|
||||
|
||||
**输出参数说明**
|
||||
- `meta_txt`:清理后的文本内容字符串
|
||||
- `page_one_meta`:第一页清理后的文本内容列表
|
||||
|
||||
**函数功能**
|
||||
读取pdf文件并清理其中的文本内容,清理规则包括:
|
||||
- 提取所有块元的文本信息,并合并为一个字符串
|
||||
- 去除短块(字符数小于100)并替换为回车符
|
||||
- 清理多余的空行
|
||||
- 合并小写字母开头的段落块并替换为空格
|
||||
- 清除重复的换行
|
||||
- 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
|
||||
"""
|
||||
import fitz, copy
|
||||
import re
|
||||
import numpy as np
|
||||
fc = 0
|
||||
fs = 1
|
||||
fb = 2
|
||||
REMOVE_FOOT_NOTE = True
|
||||
REMOVE_FOOT_FFSIZE_PERCENT = 0.95
|
||||
def primary_ffsize(l):
|
||||
fsize_statiscs = {}
|
||||
for wtf in l['spans']:
|
||||
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
||||
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
||||
return max(fsize_statiscs, key=fsize_statiscs.get)
|
||||
|
||||
def ffsize_same(a,b):
|
||||
return abs((a-b)/max(a,b)) < 0.02
|
||||
# file_content = ""
|
||||
with fitz.open(fp) as doc:
|
||||
meta_txt = []
|
||||
meta_font = []
|
||||
|
||||
meta_line = []
|
||||
meta_span = []
|
||||
for index, page in enumerate(doc):
|
||||
# file_content += page.get_text()
|
||||
text_areas = page.get_text("dict") # 获取页面上的文本信息
|
||||
for t in text_areas['blocks']:
|
||||
if 'lines' in t:
|
||||
pf = 998
|
||||
for l in t['lines']:
|
||||
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
||||
pf = primary_ffsize(l)
|
||||
meta_line.append([txt_line, pf, l['bbox'], l])
|
||||
for wtf in l['spans']: # for l in t['lines']:
|
||||
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
|
||||
# meta_line.append(["NEW_BLOCK", pf])
|
||||
# 块元提取 for each word segment with in line for each line cross-line words for each block
|
||||
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
||||
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
||||
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
||||
if index == 0:
|
||||
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
||||
# 获取正文主字体
|
||||
fsize_statiscs = {}
|
||||
for span in meta_span:
|
||||
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
|
||||
fsize_statiscs[span[1]] += span[2]
|
||||
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
|
||||
if REMOVE_FOOT_NOTE:
|
||||
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
|
||||
|
||||
# 切分和重新整合
|
||||
mega_sec = []
|
||||
sec = []
|
||||
for index, line in enumerate(meta_line):
|
||||
if index == 0:
|
||||
sec.append(line[fc])
|
||||
continue
|
||||
if REMOVE_FOOT_NOTE:
|
||||
if meta_line[index][fs] <= give_up_fize_threshold:
|
||||
continue
|
||||
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
|
||||
# 尝试识别段落
|
||||
if meta_line[index][fc].endswith('.') and\
|
||||
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
|
||||
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
|
||||
sec[-1] += line[fc]
|
||||
sec[-1] += "\n\n"
|
||||
else:
|
||||
sec[-1] += " "
|
||||
sec[-1] += line[fc]
|
||||
else:
|
||||
if (index+1 < len(meta_line)) and \
|
||||
meta_line[index][fs] > main_fsize:
|
||||
# 单行 + 字体大
|
||||
mega_sec.append(copy.deepcopy(sec))
|
||||
sec = []
|
||||
sec.append("# " + line[fc])
|
||||
else:
|
||||
# 尝试识别section
|
||||
if meta_line[index-1][fs] > meta_line[index][fs]:
|
||||
sec.append("\n" + line[fc])
|
||||
else:
|
||||
sec.append(line[fc])
|
||||
mega_sec.append(copy.deepcopy(sec))
|
||||
|
||||
finals = []
|
||||
for ms in mega_sec:
|
||||
final = " ".join(ms)
|
||||
final = final.replace('- ', ' ')
|
||||
finals.append(final)
|
||||
meta_txt = finals
|
||||
|
||||
def 把字符太少的块清除为回车(meta_txt):
|
||||
for index, block_txt in enumerate(meta_txt):
|
||||
if len(block_txt) < 100:
|
||||
meta_txt[index] = '\n'
|
||||
return meta_txt
|
||||
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
||||
|
||||
def 清理多余的空行(meta_txt):
|
||||
for index in reversed(range(1, len(meta_txt))):
|
||||
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
|
||||
meta_txt.pop(index)
|
||||
return meta_txt
|
||||
meta_txt = 清理多余的空行(meta_txt)
|
||||
|
||||
def 合并小写开头的段落块(meta_txt):
|
||||
def starts_with_lowercase_word(s):
|
||||
pattern = r"^[a-z]+"
|
||||
match = re.match(pattern, s)
|
||||
if match:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
for _ in range(100):
|
||||
for index, block_txt in enumerate(meta_txt):
|
||||
if starts_with_lowercase_word(block_txt):
|
||||
if meta_txt[index-1] != '\n':
|
||||
meta_txt[index-1] += ' '
|
||||
else:
|
||||
meta_txt[index-1] = ''
|
||||
meta_txt[index-1] += meta_txt[index]
|
||||
meta_txt[index] = '\n'
|
||||
return meta_txt
|
||||
meta_txt = 合并小写开头的段落块(meta_txt)
|
||||
meta_txt = 清理多余的空行(meta_txt)
|
||||
|
||||
meta_txt = '\n'.join(meta_txt)
|
||||
# 清除重复的换行
|
||||
for _ in range(5):
|
||||
meta_txt = meta_txt.replace('\n\n', '\n')
|
||||
|
||||
# 换行 -> 双换行
|
||||
meta_txt = meta_txt.replace('\n', '\n\n')
|
||||
|
||||
for f in finals:
|
||||
print亮黄(f)
|
||||
print亮绿('***************************')
|
||||
|
||||
return meta_txt, page_one_meta
|
||||
|
||||
|
||||
@CatchException
|
||||
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port):
|
||||
import glob
|
||||
@@ -13,7 +178,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
|
||||
"批量总结PDF文档。函数插件贡献者: Binary-Husky"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
@@ -59,7 +224,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt):
|
||||
import os
|
||||
import tiktoken
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1600
|
||||
generated_conclusion_files = []
|
||||
for index, fp in enumerate(file_manifest):
|
||||
|
||||
@@ -70,7 +235,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
def get_token_num(txt): return len(enc.encode(txt))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
@@ -91,14 +256,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
# 多线,翻译
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=[
|
||||
f"你需要翻译以下内容:\n{frag}" for frag in paper_fragments],
|
||||
f"以下是你需要翻译的论文片段:\n{frag}" for frag in paper_fragments],
|
||||
inputs_show_user_array=[f"\n---\n 原文: \n\n {frag.replace('#', '')} \n---\n 翻译:\n " for frag in paper_fragments],
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[paper_meta] for _ in paper_fragments],
|
||||
sys_prompt_array=[
|
||||
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in paper_fragments],
|
||||
# max_workers=5 # OpenAI所允许的最大并行过载
|
||||
"请你作为一个学术翻译,负责把学术论文的片段准确翻译成中文。" for _ in paper_fragments],
|
||||
max_workers=16 # OpenAI所允许的最大并行过载
|
||||
)
|
||||
|
||||
# 整理报告的格式
|
||||
|
||||
@@ -1,68 +1,142 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
import re
|
||||
import unicodedata
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
fast_debug = False
|
||||
|
||||
def is_paragraph_break(match):
|
||||
"""
|
||||
根据给定的匹配结果来判断换行符是否表示段落分隔。
|
||||
如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
|
||||
也可以根据之前的内容长度来判断段落是否已经足够长。
|
||||
"""
|
||||
prev_char, next_char = match.groups()
|
||||
|
||||
# 句子结束标志
|
||||
sentence_endings = ".!?"
|
||||
|
||||
# 设定一个最小段落长度阈值
|
||||
min_paragraph_length = 140
|
||||
|
||||
if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
|
||||
return "\n\n"
|
||||
else:
|
||||
return " "
|
||||
|
||||
def normalize_text(text):
|
||||
"""
|
||||
通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
|
||||
例如,将连字 "fi" 转换为 "f" 和 "i"。
|
||||
"""
|
||||
# 对文本进行归一化处理,分解连字
|
||||
normalized_text = unicodedata.normalize("NFKD", text)
|
||||
|
||||
# 替换其他特殊字符
|
||||
cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
|
||||
|
||||
return cleaned_text
|
||||
|
||||
def clean_text(raw_text):
|
||||
"""
|
||||
对从 PDF 提取出的原始文本进行清洗和格式化处理。
|
||||
1. 对原始文本进行归一化处理。
|
||||
2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
|
||||
3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
|
||||
"""
|
||||
# 对文本进行归一化处理
|
||||
normalized_text = normalize_text(raw_text)
|
||||
|
||||
# 替换跨行的连词
|
||||
text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
|
||||
|
||||
# 根据前后相邻字符的特点,找到原文本中的换行符
|
||||
newlines = re.compile(r'(\S)\n(\S)')
|
||||
|
||||
# 根据 heuristic 规则,用空格或段落分隔符替换原换行符
|
||||
final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
|
||||
|
||||
return final_text.strip()
|
||||
|
||||
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import tiktoken
|
||||
import time, glob, os, fitz
|
||||
print('begin analysis on:', file_name)
|
||||
|
||||
############################## <第 0 步,切割PDF> ##################################
|
||||
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
||||
# 的长度必须小于 2500 个 Token
|
||||
file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF
|
||||
with fitz.open(file_name) as doc:
|
||||
file_content = ""
|
||||
for page in doc:
|
||||
file_content += page.get_text()
|
||||
file_content = clean_text(file_content)
|
||||
# print(file_content)
|
||||
split_number = 10000
|
||||
split_group = (len(file_content)//split_number)+1
|
||||
for i in range(0,split_group):
|
||||
if i==0:
|
||||
prefix = "接下来请你仔细分析下面的论文,学习里面的内容(专业术语、公式、数学概念).并且注意:由于论文内容较多,将分批次发送,每次发送完之后,你只需要回答“接受完成”"
|
||||
i_say = prefix + f'文件名是{file_name},文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'文件名是:\n{file_name},\n由于论文内容过长,将分批请求(共{len(file_content)}字符,将分为{split_group}批,每批{split_number}字符)。\n当前发送{i+1}/{split_group}部分'
|
||||
elif i==split_group-1:
|
||||
i_say = f'你只需要回答“所有论文接受完成,请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
|
||||
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
||||
else:
|
||||
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
||||
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt="") # 带超时倒计时
|
||||
while "完成" not in gpt_say:
|
||||
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'出现error,重新发送{i+1}/{split_group}部分'
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt="") # 带超时倒计时
|
||||
time.sleep(1)
|
||||
chatbot[-1] = (i_say_show_user, gpt_say)
|
||||
history.append(i_say_show_user); history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
time.sleep(2)
|
||||
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
i_say = f'接下来,请你扮演一名专业的学术教授,利用你的所有知识并且结合这篇文章,回答我的问题。(请牢记:1.直到我说“退出”,你才能结束任务;2.所有问题需要紧密围绕文章内容;3.如果有公式,请使用tex渲染)'
|
||||
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
|
||||
# 为了更好的效果,我们剥离Introduction之后的部分(如果有)
|
||||
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
|
||||
# ** gpt request **
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt="") # 带超时倒计时
|
||||
chatbot[-1] = (i_say, gpt_say)
|
||||
history.append(i_say); history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
############################## <第 1 步,从摘要中提取高价值信息,放到history中> ##################################
|
||||
final_results = []
|
||||
final_results.append(paper_meta)
|
||||
|
||||
############################## <第 2 步,迭代地历遍整个文章,提取精炼信息> ##################################
|
||||
i_say_show_user = f'首先你在英文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。" # 用户提示
|
||||
chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[]) # 更新UI
|
||||
@CatchException
|
||||
def 理解PDF文档内容(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
import glob, os
|
||||
|
||||
iteration_results = []
|
||||
last_iteration_result = paper_meta # 初始值是摘要
|
||||
MAX_WORD_TOTAL = 4096
|
||||
n_fragment = len(paper_fragments)
|
||||
if n_fragment >= 20: print('文章极长,不能达到预期效果')
|
||||
for i in range(n_fragment):
|
||||
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
|
||||
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
|
||||
i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]}"
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问
|
||||
llm_kwargs, chatbot,
|
||||
history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果
|
||||
sys_prompt="Extract the main idea of this section." # 提示
|
||||
)
|
||||
iteration_results.append(gpt_say)
|
||||
last_iteration_result = gpt_say
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
############################## <第 3 步,整理history> ##################################
|
||||
final_results.extend(iteration_results)
|
||||
final_results.append(f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。')
|
||||
# 接下来两句话只显示在界面上,不起实际作用
|
||||
i_say_show_user = f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。'; gpt_say = "[Local Message] 收到。"
|
||||
chatbot.append([i_say_show_user, gpt_say])
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
txt = filedialog.askopenfilename()
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import fitz
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a = f"解析项目: {txt}",
|
||||
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
# 开始正式执行任务
|
||||
yield from 解析PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
############################## <第 4 步,设置一个token上限,防止回答时Token溢出> ##################################
|
||||
from .crazy_utils import input_clipping
|
||||
_, final_results = input_clipping("", final_results, max_token_limit=3200)
|
||||
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
|
||||
|
||||
|
||||
@CatchException
|
||||
@@ -72,7 +146,7 @@ def 理解PDF文档内容标准文件输入(txt, llm_kwargs, plugin_kwargs, chat
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe, binary-husky"])
|
||||
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
|
||||
@@ -7,7 +7,7 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
import time, os
|
||||
print('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
|
||||
i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
|
||||
|
||||
@@ -12,11 +12,9 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
sys_prompt_array = []
|
||||
report_part_1 = []
|
||||
|
||||
assert len(file_manifest) <= 1024, "源文件太多(超过1024个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
|
||||
############################## <第一步,逐个文件分析,多线程> ##################################
|
||||
for index, fp in enumerate(file_manifest):
|
||||
# 读取文件
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
||||
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
||||
@@ -27,7 +25,6 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
history_array.append([])
|
||||
sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
|
||||
|
||||
# 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到chatgpt进行分析
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array = inputs_array,
|
||||
inputs_show_user_array = inputs_show_user_array,
|
||||
@@ -38,13 +35,28 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
show_user_at_complete = True
|
||||
)
|
||||
|
||||
# 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
|
||||
report_part_1 = copy.deepcopy(gpt_response_collection)
|
||||
history_to_return = report_part_1
|
||||
res = write_results_to_file(report_part_1)
|
||||
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
|
||||
|
||||
############################## <存储中间数据进行调试> ##################################
|
||||
|
||||
# def objdump(obj):
|
||||
# import pickle
|
||||
# with open('objdump.tmp', 'wb+') as f:
|
||||
# pickle.dump(obj, f)
|
||||
# return
|
||||
|
||||
# def objload():
|
||||
# import pickle, os
|
||||
# if not os.path.exists('objdump.tmp'):
|
||||
# return
|
||||
# with open('objdump.tmp', 'rb') as f:
|
||||
# return pickle.load(f)
|
||||
# objdump([report_part_1, gpt_response_collection, history_to_return, file_manifest, project_folder, fp, llm_kwargs, chatbot])
|
||||
|
||||
############################## <第二步,综合,单线程,分组+迭代处理> ##################################
|
||||
batchsize = 16 # 10个文件为一组
|
||||
report_part_2 = []
|
||||
@@ -213,54 +225,9 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/go.mod', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/go.sum', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/go.work', recursive=True)]
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
@CatchException
|
||||
def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.lua', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
@CatchException
|
||||
def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.cs', recursive=True)] + \
|
||||
[f for f in glob.glob(f'{project_folder}/**/*.csproj', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
@@ -8,7 +8,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
|
||||
import time, glob, os
|
||||
print('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
|
||||
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|
||||
|
||||
@@ -6,7 +6,7 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
|
||||
294
img/README_EN.md
294
img/README_EN.md
@@ -1,294 +0,0 @@
|
||||
# ChatGPT Academic Optimization
|
||||
> **Note**
|
||||
>
|
||||
> This English readme is automatically generated by the markdown translation plugin in this project, and may not be 100% correct.
|
||||
>
|
||||
|
||||
|
||||
**If you like this project, please give it a star. If you have come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request (to the `dev` branch).**
|
||||
|
||||
> **Note**
|
||||
>
|
||||
> 1. Please note that only function plugins (buttons) marked in **red** support reading files, and some plugins are located in the **dropdown menu** in the plugin area. Additionally, we welcome and process PRs for any new plugins with the **highest priority**!
|
||||
>
|
||||
> 2. The functions of each file in this project are detailed in the self-translation report [self_analysis.md](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A). With the version iteration, you can click on a relevant function plugin at any time to call GPT to regenerate the self-analysis report for the project. Commonly asked questions are summarized in the [`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98).
|
||||
>
|
||||
> 3. If you are not used to the function, comments or interface with some Chinese names, you can click on the relevant function plugin at any time to call ChatGPT to generate the source code of the project in English.
|
||||
|
||||
<div align="center">
|
||||
|
||||
Function | Description
|
||||
--- | ---
|
||||
One-click refinement | Supports one-click refinement, one-click searching for grammatical errors in papers.
|
||||
One-click translation between Chinese and English | One-click translation between Chinese and English.
|
||||
One-click code interpretation | Can correctly display and interpret the code.
|
||||
[Custom shortcuts](https://www.bilibili.com/video/BV14s4y1E7jN) | Supports custom shortcuts.
|
||||
[Configure proxy server](https://www.bilibili.com/video/BV1rc411W7Dr) | Supports configuring proxy server.
|
||||
Modular design | Supports custom high-order experimental features and [function plug-ins], and plug-ins support [hot update](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97).
|
||||
[Self-program analysis](https://www.bilibili.com/video/BV1cj411A7VW) | [Function Plug-in] [One-Key Understanding](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A) the source code of this project.
|
||||
[Program analysis](https://www.bilibili.com/video/BV1cj411A7VW) | [Function Plug-in] One-click can analyze other Python/C/C++/Java/Golang/Lua/Rect project trees.
|
||||
Read papers | [Function Plug-in] One-click reads the full text of a latex paper and generates an abstract.
|
||||
Latex full-text translation/refinement | [Function Plug-in] One-click translates or refines a latex paper.
|
||||
Batch annotation generation | [Function Plug-in] One-click generates function annotations in batches.
|
||||
Chat analysis report generation | [Function Plug-in] Automatically generate summary reports after running.
|
||||
[Arxiv assistant](https://www.bilibili.com/video/BV1LM4y1279X) | [Function Plug-in] Enter the arxiv paper url and you can translate the abstract and download the PDF with one click.
|
||||
[PDF paper full-text translation function](https://www.bilibili.com/video/BV1KT411x7Wn) | [Function Plug-in] Extract title and abstract of PDF papers + translate full text (multi-threaded).
|
||||
[Google Scholar integration assistant](https://www.bilibili.com/video/BV19L411U7ia) (Version>=2.45) | [Function Plug-in] Given any Google Scholar search page URL, let GPT help you choose interesting articles.
|
||||
Formula display | Can simultaneously display the tex form and rendering form of formulas.
|
||||
Image display | Can display images in Markdown.
|
||||
Multithreaded function plug-in support | Supports multi-threaded calling of chatgpt, one-click processing of massive texts or programs.
|
||||
Support for markdown tables output by GPT | Can output markdown tables that support GPT.
|
||||
Start dark gradio theme [theme](https://github.com/binary-husky/chatgpt_academic/issues/173) | Add ```/?__dark-theme=true``` to the browser URL to switch to the dark theme.
|
||||
Huggingface free scientific online experience](https://huggingface.co/spaces/qingxu98/gpt-academic) | After logging in to Huggingface, copy [this space](https://huggingface.co/spaces/qingxu98/gpt-academic).
|
||||
[Mixed support for multiple LLM models](https://www.bilibili.com/video/BV1EM411K7VH/) ([v3.0 branch](https://github.com/binary-husky/chatgpt_academic/tree/v3.0) in testing) | It must feel great to be served by both ChatGPT and [Tsinghua ChatGLM](https://github.com/THUDM/ChatGLM-6B)!
|
||||
Compatible with [TGUI](https://github.com/oobabooga/text-generation-webui) to access more language models | Access to opt-1.3b, galactica-1.3b and other models ([v3.0 branch](https://github.com/binary-husky/chatgpt_academic/tree/v3.0) under testing).
|
||||
… | ...
|
||||
|
||||
</div>
|
||||
|
||||
<!-- - New interface (left: master branch, right: dev development frontier) -->
|
||||
- New interface (modify the `LAYOUT` option in `config.py` to switch between "left and right layout" and "up and down layout").
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230361456-61078362-a966-4eb5-b49e-3c62ef18b860.gif" width="700" >
|
||||
</div>
|
||||
|
||||
- All buttons are dynamically generated by reading `functional.py`, and custom functions can be added freely, freeing up the clipboard.
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
|
||||
</div>
|
||||
|
||||
- Refinement/Correction
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
|
||||
</div>
|
||||
|
||||
- Supports markdown tables output by GPT.
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
||||
</div>
|
||||
|
||||
- If the output contains formulas, both the tex form and the rendering form are displayed simultaneously for easy copying and reading.
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
||||
</div>
|
||||
|
||||
- Don't want to read project code? Let chatgpt boast about the whole project.
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
|
||||
</div>
|
||||
|
||||
- Multiple large language models mixed calling. ([v3.0 branch](https://github.com/binary-husky/chatgpt_academic/tree/v3.0) in testing)
|
||||
|
||||
|
||||
## Running Directly (Windows, Linux or MacOS)
|
||||
|
||||
### 1. Download the Project
|
||||
```sh
|
||||
git clone https://github.com/binary-husky/chatgpt_academic.git
|
||||
cd chatgpt_academic
|
||||
```
|
||||
|
||||
### 2. Configure API_KEY and Proxy Settings
|
||||
|
||||
In `config.py`, configure the overseas Proxy and OpenAI API KEY, as follows:
|
||||
```
|
||||
1. If you are in China, you need to set an overseas proxy to use the OpenAI API smoothly. Please read the instructions in config.py carefully (1. Modify the USE_PROXY to True; 2. Modify the proxies according to the instructions).
|
||||
2. Configure OpenAI API KEY. You need to register on the OpenAI official website and obtain an API KEY. Once you get the API KEY, configure it in the config.py file.
|
||||
3. Issues related to proxy network (network timeout, proxy not working) are summarized to https://github.com/binary-husky/chatgpt_academic/issues/1
|
||||
```
|
||||
(Note: When the program is running, it will first check whether there is a private configuration file named `config_private.py`, and use the configuration in it to overwrite the same name configuration in `config.py`. Therefore, if you can understand our configuration reading logic, we strongly recommend that you create a new configuration file next to `config.py` named `config_private.py` and transfer (copy) the configuration in `config.py` to `config_private.py`. `config_private.py` is not managed by Git, which can make your privacy information more secure.)
|
||||
|
||||
### 3. Install Dependencies
|
||||
```sh
|
||||
# (Option 1) Recommended
|
||||
python -m pip install -r requirements.txt
|
||||
|
||||
# (Option 2) If you use anaconda, the steps are also similar:
|
||||
# (Option 2.1) conda create -n gptac_venv python=3.11
|
||||
# (Option 2.2) conda activate gptac_venv
|
||||
# (Option 2.3) python -m pip install -r requirements.txt
|
||||
|
||||
# Note: Use the official pip source or the Ali pip source. Other pip sources (such as some university pips) may have problems. Temporary substitution method:
|
||||
# python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
|
||||
```
|
||||
|
||||
### 4. Run
|
||||
```sh
|
||||
python main.py
|
||||
```
|
||||
|
||||
### 5. Test Experimental Features
|
||||
```
|
||||
- Test C++ Project Header Analysis
|
||||
In the input area, enter `./crazy_functions/test_project/cpp/libJPG` , and then click "[Experiment] Parse the entire C++ project (input inputs the root path of the project)"
|
||||
- Test Writing Abstracts for Latex Projects
|
||||
In the input area, enter `./crazy_functions/test_project/latex/attention` , and then click "[Experiment] Read the tex paper and write an abstract (input inputs the root path of the project)"
|
||||
- Test Python Project Analysis
|
||||
In the input area, enter `./crazy_functions/test_project/python/dqn` , and then click "[Experiment] Parse the entire py project (input inputs the root path of the project)"
|
||||
- Test Self-code Interpretation
|
||||
Click "[Experiment] Please analyze and deconstruct this project itself"
|
||||
- Test Experimental Function Template (asking GPT what happened in history today), you can implement more complex functions based on this template function
|
||||
Click "[Experiment] Experimental function template"
|
||||
```
|
||||
|
||||
## Use Docker (Linux)
|
||||
|
||||
``` sh
|
||||
# Download Project
|
||||
git clone https://github.com/binary-husky/chatgpt_academic.git
|
||||
cd chatgpt_academic
|
||||
# Configure Overseas Proxy and OpenAI API KEY
|
||||
Configure config.py with any text editor
|
||||
# Installation
|
||||
docker build -t gpt-academic .
|
||||
# Run
|
||||
docker run --rm -it --net=host gpt-academic
|
||||
|
||||
# Test Experimental Features
|
||||
## Test Self-code Interpretation
|
||||
Click "[Experiment] Please analyze and deconstruct this project itself"
|
||||
## Test Experimental Function Template (asking GPT what happened in history today), you can implement more complex functions based on this template function
|
||||
Click "[Experiment] Experimental function template"
|
||||
## (Please note that when running in docker, you need to pay extra attention to file access rights issues of the program.)
|
||||
## Test C++ Project Header Analysis
|
||||
In the input area, enter ./crazy_functions/test_project/cpp/libJPG , and then click "[Experiment] Parse the entire C++ project (input inputs the root path of the project)"
|
||||
## Test Writing Abstracts for Latex Projects
|
||||
In the input area, enter ./crazy_functions/test_project/latex/attention , and then click "[Experiment] Read the tex paper and write an abstract (input inputs the root path of the project)"
|
||||
## Test Python Project Analysis
|
||||
In the input area, enter ./crazy_functions/test_project/python/dqn , and then click "[Experiment] Parse the entire py project (input inputs the root path of the project)"
|
||||
|
||||
```
|
||||
|
||||
## Other Deployment Methods
|
||||
- Use WSL2 (Windows Subsystem for Linux subsystem)
|
||||
Please visit [Deploy Wiki-1] (https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
|
||||
|
||||
- nginx remote deployment
|
||||
Please visit [Deploy Wiki-2] (https://github.com/binary-husky/chatgpt_academic/wiki/%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E7%9A%84%E6%8C%87%E5%AF%BC)
|
||||
|
||||
|
||||
## Customizing New Convenient Buttons (Academic Shortcut Key Customization)
|
||||
Open functional.py and add the entry as follows, and then restart the program. (If the button has been successfully added and is visible, both the prefix and suffix support hot modification and take effect without restarting the program.)
|
||||
|
||||
For example,
|
||||
```
|
||||
"Super English to Chinese Translation": {
|
||||
|
||||
# Prefix, which will be added before your input. For example, it is used to describe your requirements, such as translation, code interpretation, polishing, etc.
|
||||
"Prefix": "Please translate the following content into Chinese, and then use a markdown table to explain each proprietary term in the text:\n\n",
|
||||
|
||||
# Suffix, which will be added after your input. For example, in conjunction with the prefix, you can bracket your input in quotes.
|
||||
"Suffix": "",
|
||||
|
||||
},
|
||||
```
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
|
||||
</div>
|
||||
|
||||
|
||||
If you invent a more user-friendly academic shortcut key, welcome to post an issue or pull request!
|
||||
|
||||
## Configure Proxy
|
||||
### Method 1: General Method
|
||||
Modify the port and proxy software corresponding in ```config.py```
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226571294-37a47cd9-4d40-4c16-97a2-d360845406f7.png" width="500" >
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226838985-e5c95956-69c2-4c23-a4dd-cd7944eeb451.png" width="500" >
|
||||
</div>
|
||||
|
||||
|
||||
After configuring, you can use the following command to test whether the proxy works. If everything is normal, the code below will output the location of your proxy server:
|
||||
|
||||
```
|
||||
python check_proxy.py
|
||||
```
|
||||
|
||||
### Method Two: Pure Beginner Tutorial
|
||||
[Pure Beginner Tutorial](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BB%A3%E7%90%86%E8%BD%AF%E4%BB%B6%E9%97%AE%E9%A2%98%E7%9A%84%E6%96%B0%E6%89%8B%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95%EF%BC%88%E6%96%B9%E6%B3%95%E5%8F%AA%E9%80%82%E7%94%A8%E4%BA%8E%E6%96%B0%E6%89%8B%EF%BC%89)
|
||||
|
||||
## Compatibility Testing
|
||||
|
||||
### Image Display:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/228737599-bf0a9d9c-1808-4f43-ae15-dfcc7af0f295.png" width="800" >
|
||||
</div>
|
||||
|
||||
|
||||
### If the program can read and analyze itself:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226936850-c77d7183-0749-4c1c-9875-fd4891842d0c.png" width="800" >
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226936618-9b487e4b-ab5b-4b6e-84c6-16942102e917.png" width="800" >
|
||||
</div>
|
||||
|
||||
### Any other Python/Cpp project analysis:
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="800" >
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" width="800" >
|
||||
</div>
|
||||
|
||||
### Latex paper reading comprehension and abstract generation with one click
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/227504406-86ab97cd-f208-41c3-8e4a-7000e51cf980.png" width="800" >
|
||||
</div>
|
||||
|
||||
### Automatic Report Generation
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/227503770-fe29ce2c-53fd-47b0-b0ff-93805f0c2ff4.png" height="300" >
|
||||
<img src="https://user-images.githubusercontent.com/96192199/227504617-7a497bb3-0a2a-4b50-9a8a-95ae60ea7afd.png" height="300" >
|
||||
<img src="https://user-images.githubusercontent.com/96192199/227504005-efeaefe0-b687-49d0-bf95-2d7b7e66c348.png" height="300" >
|
||||
</div>
|
||||
|
||||
### Modular Function Design
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/229288270-093643c1-0018-487a-81e6-1d7809b6e90f.png" height="400" >
|
||||
<img src="https://user-images.githubusercontent.com/96192199/227504931-19955f78-45cd-4d1c-adac-e71e50957915.png" height="400" >
|
||||
</div>
|
||||
|
||||
|
||||
### Translating source code to English
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/229720562-fe6c3508-6142-4635-a83d-21eb3669baee.png" height="400" >
|
||||
</div>
|
||||
|
||||
## Todo and Version Planning:
|
||||
|
||||
- version 3 (Todo):
|
||||
- - Support for gpt4 and other llm
|
||||
- version 2.4+ (Todo):
|
||||
- - Summary of long text and token overflow problems in large project source code
|
||||
- - Implementation of project packaging and deployment
|
||||
- - Function plugin parameter interface optimization
|
||||
- - Self-updating
|
||||
- version 2.4: (1) Added PDF full-text translation function; (2) Added input area switching function; (3) Added vertical layout option; (4) Optimized multi-threaded function plugin.
|
||||
- version 2.3: Enhanced multi-threaded interactivity
|
||||
- version 2.2: Function plug-in supports hot reloading
|
||||
- version 2.1: Collapsible layout
|
||||
- version 2.0: Introduction of modular function plugins
|
||||
- version 1.0: Basic functions
|
||||
|
||||
## References and Learning
|
||||
|
||||
|
||||
```
|
||||
The code refers to the design of many other excellent projects, mainly including:
|
||||
|
||||
# Reference Project 1: Referenced the method of reading OpenAI json, recording historical inquiry records, and using gradio queue in ChuanhuChatGPT
|
||||
https://github.com/GaiZhenbiao/ChuanhuChatGPT
|
||||
|
||||
# Reference Project 2:
|
||||
https://github.com/THUDM/ChatGLM-6B
|
||||
|
||||
```
|
||||
|
||||
|
||||
BIN
img/公式.gif
Normal file
BIN
img/公式.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 11 MiB |
BIN
img/润色.gif
Normal file
BIN
img/润色.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 12 MiB |
BIN
objdump.tmp
Normal file
BIN
objdump.tmp
Normal file
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
# 如何使用其他大语言模型(v3.0分支测试中)
|
||||
# 如何使用其他大语言模型(dev分支测试中)
|
||||
|
||||
## 1. 先运行text-generation
|
||||
``` sh
|
||||
|
||||
@@ -96,7 +96,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
# 看门狗,如果超过期限没有喂狗,则终止
|
||||
if len(observe_window) >= 2:
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("用户取消了程序。")
|
||||
raise RuntimeError("程序终止。")
|
||||
else: raise RuntimeError("意外Json结构:"+delta)
|
||||
if json_data['finish_reason'] == 'length':
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
gradio==3.25.0
|
||||
tiktoken>=0.3.3
|
||||
gradio>=3.23
|
||||
requests[socks]
|
||||
transformers
|
||||
python-markdown-math
|
||||
@@ -8,6 +7,7 @@ latex2mathml
|
||||
python-docx
|
||||
mdtex2html
|
||||
colorama
|
||||
tiktoken
|
||||
Markdown
|
||||
pygments
|
||||
pymupdf
|
||||
|
||||
@@ -455,7 +455,7 @@ def on_file_uploaded(files, chatbot, txt):
|
||||
chatbot.append(['我上传了文件,请查收',
|
||||
f'[Local Message] 收到以下文件: \n\n{moved_files_str}' +
|
||||
f'\n\n调用路径参数已自动修正到: \n\n{txt}' +
|
||||
f'\n\n现在您点击任意“红颜色”标识的函数插件时,以上文件将被作为输入参数'+err_msg])
|
||||
f'\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'+err_msg])
|
||||
return chatbot, txt
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user