", "", h["content"], flags=re.DOTALL)
clean_content = self._strip_html(raw).strip()
if clean_content:
msgs.append({"role": "assistant", "content": self._wrap_text(clean_content)})
return msgs
def stream_generate(self, raw_hist, sys_prompt: str, *, skip_special_tokens: bool = False):
global stop_generation
stop_generation = False
msgs = self._build_messages(raw_hist, sys_prompt)
reasoning_buffer = ""
content_buffer = ""
try:
for delta in stream_from_vllm(msgs):
if stop_generation:
break
if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
reasoning_buffer += delta.reasoning_content
elif hasattr(delta, 'content') and delta.content:
content_buffer += delta.content
else:
if isinstance(delta, dict):
if 'reasoning_content' in delta and delta['reasoning_content']:
reasoning_buffer += delta['reasoning_content']
if 'content' in delta and delta['content']:
content_buffer += delta['content']
elif hasattr(delta, 'content') and delta.content:
content_buffer += delta.content
yield self._stream_fragment(reasoning_buffer, content_buffer)
except Exception as e:
error_msg = f"Error during streaming: {str(e)}"
yield self._stream_fragment("", error_msg)
glm4v = GLM4VModel()
sys_prompt = """Instructions:
Extract only "BILL OF METERIAL" table containing columns same as it is!
colums: (POSITION, DESCRIPTION, N PIECES, MATERIAL (like SA 516 Gr.70N or SA 105 N), DIMENSIONS(like 1700 I.D. X 2045H 50 THK.), WT.Kgs
Ignore title blocks, revision notes, drawing numbers, and general annotations outside the "BILL OF METERIAL".
If a page contains multiple tables, extract only those explicitly related to BILL OF METERIAL.
Preserve the row and column's order and structure as it is!
Do not include any surrounding decorative lines or borders—only.
give clean tabular data.
output format: markdown table format with following columns (POSITION, DESCRIPTION, N PIECES, MATERIAL, DIMENSIONS(like 1700 I.D. X 2045H 50 THK.) and WT.Kgs)"""
def extract_table_from_file(file):
if file is None:
return "Please upload a file."
payload = glm4v._files_to_content([file.name])
raw_hist = [{"role": "user", "content": payload}]
full_response = ""
yield "🌀 Processing...
\n"
try:
for chunk in glm4v.stream_generate(raw_hist, sys_prompt):
full_response = chunk
yield full_response
except Exception as e:
yield f"Error: {html.escape(str(e))}
"
theme = gr.themes.Ocean(
primary_hue="gray",
)
with gr.Blocks(title="demo", theme=theme) as demo:
gr.Markdown(
" PDF Extraction Demo