Spaces:
Running
Running
feihu.hf
commited on
Commit
·
602373e
1
Parent(s):
7f72bb4
update file types
Browse files- app.py +1 -1
- patching.py +32 -3
- web_ui.py +2 -1
app.py
CHANGED
@@ -71,7 +71,7 @@ def app_gui():
|
|
71 |
'max_retries': 10,
|
72 |
}},
|
73 |
name='Qwen-Turbo-1M',
|
74 |
-
description='Qwen-Turbo natively supports input length of up to 1M tokens. You can upload documents for Q&A
|
75 |
rag_cfg={'max_ref_token': 1000000, 'rag_searchers': ['no_search']},
|
76 |
)
|
77 |
chatbot_config = {
|
|
|
71 |
'max_retries': 10,
|
72 |
}},
|
73 |
name='Qwen-Turbo-1M',
|
74 |
+
description='Qwen-Turbo natively supports input length of up to 1M tokens. You can upload documents for Q&A (eg., pdf/docx/pptx/txt/html).',
|
75 |
rag_cfg={'max_ref_token': 1000000, 'rag_searchers': ['no_search']},
|
76 |
)
|
77 |
chatbot_config = {
|
patching.py
CHANGED
@@ -68,6 +68,35 @@ def memory_run(self, messages: List[Message], lang: str = 'en', **kwargs) -> Ite
|
|
68 |
|
69 |
Memory._run = memory_run
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
def SimpleDocParser_call(self, params: Union[str, dict], **kwargs) -> Union[str, list]:
|
73 |
params = self._verify_json_format_args(params)
|
@@ -88,7 +117,7 @@ def SimpleDocParser_call(self, params: Union[str, dict], **kwargs) -> Union[str,
|
|
88 |
time1 = time.time()
|
89 |
|
90 |
f_type = get_file_type(path)
|
91 |
-
if f_type in PARSER_SUPPORTED_FILE_TYPES:
|
92 |
if path.startswith('https://') or path.startswith('http://') or re.match(
|
93 |
r'^[A-Za-z]:\\', path) or re.match(r'^[A-Za-z]:/', path):
|
94 |
path = path
|
@@ -108,7 +137,7 @@ def SimpleDocParser_call(self, params: Union[str, dict], **kwargs) -> Union[str,
|
|
108 |
parsed_file = parse_word(path, self.extract_image)
|
109 |
elif f_type == 'pptx':
|
110 |
parsed_file = parse_ppt(path, self.extract_image)
|
111 |
-
elif f_type == 'txt':
|
112 |
parsed_file = parse_txt(path)
|
113 |
elif f_type == 'html':
|
114 |
parsed_file = parse_html_bs(path, self.extract_image)
|
@@ -120,7 +149,7 @@ def SimpleDocParser_call(self, params: Union[str, dict], **kwargs) -> Union[str,
|
|
120 |
parsed_file = parse_excel(path, self.extract_image)
|
121 |
else:
|
122 |
raise ValueError(
|
123 |
-
f'Failed: The current parser does not support this file type! Supported types: {"/".join(PARSER_SUPPORTED_FILE_TYPES)}'
|
124 |
)
|
125 |
for page in parsed_file:
|
126 |
for para in page['content']:
|
|
|
68 |
|
69 |
Memory._run = memory_run
|
70 |
|
71 |
+
common_programming_language_extensions = [
|
72 |
+
"py", # Python
|
73 |
+
"java", # Java
|
74 |
+
"cpp", # C++
|
75 |
+
"c", # C
|
76 |
+
"h", # C/C++ 头文件
|
77 |
+
"cs", # C#
|
78 |
+
"js", # JavaScript
|
79 |
+
"ts", # TypeScript
|
80 |
+
"rb", # Ruby
|
81 |
+
"php", # PHP
|
82 |
+
"swift", # Swift
|
83 |
+
"go", # Go
|
84 |
+
"rs", # Rust
|
85 |
+
"kt", # Kotlin
|
86 |
+
"scala", # Scala
|
87 |
+
"m", # Objective-C
|
88 |
+
"css", # CSS
|
89 |
+
"sql", # SQL
|
90 |
+
"sh", # Shell
|
91 |
+
"pl", # Perl
|
92 |
+
"r", # R
|
93 |
+
"jl", # Julia
|
94 |
+
"dart", # Dart
|
95 |
+
"json", # JSON
|
96 |
+
"xml", # XML
|
97 |
+
"yml", # YAML
|
98 |
+
"toml", # TOML
|
99 |
+
]
|
100 |
|
101 |
def SimpleDocParser_call(self, params: Union[str, dict], **kwargs) -> Union[str, list]:
|
102 |
params = self._verify_json_format_args(params)
|
|
|
117 |
time1 = time.time()
|
118 |
|
119 |
f_type = get_file_type(path)
|
120 |
+
if f_type in PARSER_SUPPORTED_FILE_TYPES + common_programming_language_extensions:
|
121 |
if path.startswith('https://') or path.startswith('http://') or re.match(
|
122 |
r'^[A-Za-z]:\\', path) or re.match(r'^[A-Za-z]:/', path):
|
123 |
path = path
|
|
|
137 |
parsed_file = parse_word(path, self.extract_image)
|
138 |
elif f_type == 'pptx':
|
139 |
parsed_file = parse_ppt(path, self.extract_image)
|
140 |
+
elif f_type == 'txt' or f_type in common_programming_language_extensions:
|
141 |
parsed_file = parse_txt(path)
|
142 |
elif f_type == 'html':
|
143 |
parsed_file = parse_html_bs(path, self.extract_image)
|
|
|
149 |
parsed_file = parse_excel(path, self.extract_image)
|
150 |
else:
|
151 |
raise ValueError(
|
152 |
+
f'Failed: The current parser does not support this file type! Supported types: {"/".join(PARSER_SUPPORTED_FILE_TYPES + common_programming_language_extensions)}'
|
153 |
)
|
154 |
for page in parsed_file:
|
155 |
for para in page['content']:
|
web_ui.py
CHANGED
@@ -10,6 +10,7 @@ from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chat
|
|
10 |
from qwen_agent.llm.schema import CONTENT, FILE, IMAGE, NAME, ROLE, USER, Message
|
11 |
from qwen_agent.log import logger
|
12 |
from qwen_agent.utils.utils import print_traceback
|
|
|
13 |
|
14 |
class WebUI:
|
15 |
"""A Common chatbot application for agent."""
|
@@ -129,7 +130,7 @@ class WebUI:
|
|
129 |
'display': True
|
130 |
}])
|
131 |
|
132 |
-
input = mgr.MultimodalInput(placeholder=self.input_placeholder, upload_button_props=dict(file_types=[".pdf", ".
|
133 |
|
134 |
with gr.Column(scale=1):
|
135 |
if len(self.agent_list) > 1:
|
|
|
10 |
from qwen_agent.llm.schema import CONTENT, FILE, IMAGE, NAME, ROLE, USER, Message
|
11 |
from qwen_agent.log import logger
|
12 |
from qwen_agent.utils.utils import print_traceback
|
13 |
+
from patching import common_programming_language_extensions
|
14 |
|
15 |
class WebUI:
|
16 |
"""A Common chatbot application for agent."""
|
|
|
130 |
'display': True
|
131 |
}])
|
132 |
|
133 |
+
input = mgr.MultimodalInput(placeholder=self.input_placeholder, upload_button_props=dict(file_types=[".pdf", ".docx", ".pptx", ".txt", ".html", ".csv", ".tsv", ".xlsx", ".xls"] + ["." + file_type for file_type in common_programming_language_extensions]))
|
134 |
|
135 |
with gr.Column(scale=1):
|
136 |
if len(self.agent_list) > 1:
|