Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	unify tiktoken model
Browse files
    	
        crazy_functions/Latex全文润色.py
    CHANGED
    
    | @@ -13,7 +13,7 @@ class PaperFileGroup(): | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            -
                    enc = tiktoken.encoding_for_model( | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
|  | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            +
                    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
    	
        crazy_functions/Latex全文翻译.py
    CHANGED
    
    | @@ -13,7 +13,7 @@ class PaperFileGroup(): | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            -
                    enc = tiktoken.encoding_for_model( | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
|  | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            +
                    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
    	
        crazy_functions/crazy_utils.py
    CHANGED
    
    | @@ -4,7 +4,7 @@ from toolbox import update_ui, get_conf | |
| 4 | 
             
            def input_clipping(inputs, history, max_token_limit):
         | 
| 5 | 
             
                import tiktoken
         | 
| 6 | 
             
                import numpy as np
         | 
| 7 | 
            -
                enc = tiktoken.encoding_for_model( | 
| 8 | 
             
                def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 9 |  | 
| 10 | 
             
                mode = 'input-and-history'
         | 
|  | |
| 4 | 
             
            def input_clipping(inputs, history, max_token_limit):
         | 
| 5 | 
             
                import tiktoken
         | 
| 6 | 
             
                import numpy as np
         | 
| 7 | 
            +
                enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 8 | 
             
                def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 9 |  | 
| 10 | 
             
                mode = 'input-and-history'
         | 
    	
        crazy_functions/代码重写为全英文_多线程.py
    CHANGED
    
    | @@ -61,7 +61,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ | |
| 61 | 
             
                MAX_TOKEN = 3000
         | 
| 62 | 
             
                import tiktoken
         | 
| 63 | 
             
                from toolbox import get_conf
         | 
| 64 | 
            -
                enc = tiktoken.encoding_for_model( | 
| 65 | 
             
                def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 66 |  | 
| 67 |  | 
|  | |
| 61 | 
             
                MAX_TOKEN = 3000
         | 
| 62 | 
             
                import tiktoken
         | 
| 63 | 
             
                from toolbox import get_conf
         | 
| 64 | 
            +
                enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 65 | 
             
                def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 66 |  | 
| 67 |  | 
    	
        crazy_functions/批量Markdown翻译.py
    CHANGED
    
    | @@ -13,7 +13,7 @@ class PaperFileGroup(): | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            -
                    enc = tiktoken.encoding_for_model( | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
|  | |
| 13 | 
             
                    # count_token
         | 
| 14 | 
             
                    import tiktoken
         | 
| 15 | 
             
                    from toolbox import get_conf
         | 
| 16 | 
            +
                    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 17 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 18 | 
             
                    self.get_token_num = get_token_num
         | 
| 19 |  | 
    	
        crazy_functions/批量翻译PDF文档_多线程.py
    CHANGED
    
    | @@ -69,7 +69,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, | |
| 69 | 
             
                    # 递归地切割PDF文件
         | 
| 70 | 
             
                    from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
         | 
| 71 | 
             
                    from toolbox import get_conf
         | 
| 72 | 
            -
                    enc = tiktoken.encoding_for_model( | 
| 73 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 74 | 
             
                    paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
         | 
| 75 | 
             
                        txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
         | 
|  | |
| 69 | 
             
                    # 递归地切割PDF文件
         | 
| 70 | 
             
                    from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
         | 
| 71 | 
             
                    from toolbox import get_conf
         | 
| 72 | 
            +
                    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 73 | 
             
                    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 74 | 
             
                    paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
         | 
| 75 | 
             
                        txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
         | 
    	
        crazy_functions/理解PDF文档内容.py
    CHANGED
    
    | @@ -18,7 +18,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro | |
| 18 |  | 
| 19 | 
             
                from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
         | 
| 20 | 
             
                from toolbox import get_conf
         | 
| 21 | 
            -
                enc = tiktoken.encoding_for_model( | 
| 22 | 
             
                def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 23 | 
             
                paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
         | 
| 24 | 
             
                    txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
         | 
|  | |
| 18 |  | 
| 19 | 
             
                from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
         | 
| 20 | 
             
                from toolbox import get_conf
         | 
| 21 | 
            +
                enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         | 
| 22 | 
             
                def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         | 
| 23 | 
             
                paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
         | 
| 24 | 
             
                    txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
         | 
    	
        request_llm/bridge_chatgpt.py
    CHANGED
    
    | @@ -22,8 +22,8 @@ import importlib | |
| 22 | 
             
            # config_private.py放自己的秘密如API和代理网址
         | 
| 23 | 
             
            # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
         | 
| 24 | 
             
            from toolbox import get_conf, update_ui
         | 
| 25 | 
            -
            proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY | 
| 26 | 
            -
                get_conf('proxies', 'API_URL', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY' | 
| 27 |  | 
| 28 | 
             
            timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
         | 
| 29 | 
             
                              '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
         | 
|  | |
| 22 | 
             
            # config_private.py放自己的秘密如API和代理网址
         | 
| 23 | 
             
            # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
         | 
| 24 | 
             
            from toolbox import get_conf, update_ui
         | 
| 25 | 
            +
            proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
         | 
| 26 | 
            +
                get_conf('proxies', 'API_URL', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY')
         | 
| 27 |  | 
| 28 | 
             
            timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
         | 
| 29 | 
             
                              '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
         |