akhaliq HF Staff commited on
Commit
0182706
·
1 Parent(s): 6039a02

add new glm model

Browse files
Files changed (1) hide show
  1. app.py +374 -1
app.py CHANGED
@@ -18,6 +18,8 @@ import json
18
  import time
19
  import webbrowser
20
  import urllib.parse
 
 
21
 
22
  import gradio as gr
23
  from huggingface_hub import InferenceClient
@@ -359,6 +361,11 @@ AVAILABLE_MODELS = [
359
  "id": "HuggingFaceTB/SmolLM3-3B",
360
  "description": "SmolLM3-3B model for code generation and general tasks"
361
  },
 
 
 
 
 
362
  {
363
  "name": "GLM-4.1V-9B-Thinking",
364
  "id": "THUDM/GLM-4.1V-9B-Thinking",
@@ -1358,6 +1365,226 @@ The HTML code above contains the complete original website structure with all im
1358
  except Exception as e:
1359
  return f"Error extracting website content: {str(e)}"
1360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1361
  def generation_code(query: Optional[str], image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto"):
1362
  if query is None:
1363
  query = ''
@@ -1439,7 +1666,153 @@ This will help me create a better design for you."""
1439
  # Enhance query with search if enabled
1440
  enhanced_query = enhance_query_with_search(query, enable_search)
1441
 
1442
- # Use dynamic client based on selected model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1443
  client = get_inference_client(_current_model["id"], provider)
1444
 
1445
  if image is not None:
 
18
  import time
19
  import webbrowser
20
  import urllib.parse
21
+ import copy
22
+ import html
23
 
24
  import gradio as gr
25
  from huggingface_hub import InferenceClient
 
361
  "id": "HuggingFaceTB/SmolLM3-3B",
362
  "description": "SmolLM3-3B model for code generation and general tasks"
363
  },
364
+ {
365
+ "name": "GLM-4.5",
366
+ "id": "GLM-4.5",
367
+ "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
368
+ },
369
  {
370
  "name": "GLM-4.1V-9B-Thinking",
371
  "id": "THUDM/GLM-4.1V-9B-Thinking",
 
1365
  except Exception as e:
1366
  return f"Error extracting website content: {str(e)}"
1367
 
1368
+
1369
+ # GLM-4.5 Model Implementation
1370
+ stop_generation = False
1371
+
1372
+ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
1373
+ global stop_generation
1374
+
1375
+ # Get GLM API configuration from environment variables
1376
+ glm_api_key = os.getenv('OPENAI_API_KEY')
1377
+ glm_base_url = os.getenv('GLM_BASE_URL', 'https://open.bigmodel.cn/api/paas/v4/')
1378
+
1379
+ if not glm_api_key:
1380
+ # Return configuration error if no API key
1381
+ error_msg = """
1382
+ GLM-4.5 API Key Not Configured
1383
+
1384
+ To use GLM-4.5, please:
1385
+ 1. Get your API key from: https://open.bigmodel.cn/
1386
+ 2. Set environment variable: OPENAI_API_KEY=your_api_key_here
1387
+ 3. Optionally set GLM_BASE_URL if using different endpoint
1388
+
1389
+ Example HTML code generation with Gradio:
1390
+ ```html
1391
+ <!DOCTYPE html>
1392
+ <html lang="en">
1393
+ <head>
1394
+ <meta charset="UTF-8">
1395
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1396
+ <title>Sample App</title>
1397
+ <style>
1398
+ body { font-family: Arial, sans-serif; margin: 40px; }
1399
+ .container { max-width: 600px; margin: 0 auto; }
1400
+ h1 { color: #333; }
1401
+ .button {
1402
+ background: #007acc;
1403
+ color: white;
1404
+ padding: 10px 20px;
1405
+ border: none;
1406
+ border-radius: 5px;
1407
+ cursor: pointer;
1408
+ }
1409
+ </style>
1410
+ </head>
1411
+ <body>
1412
+ <div class="container">
1413
+ <h1>GLM-4.5 Configuration Required</h1>
1414
+ <p>Please configure your GLM-4.5 API key to use this model.</p>
1415
+ <button class="button" onclick="alert('Configure OPENAI_API_KEY environment variable')">Get Started</button>
1416
+ </div>
1417
+ </body>
1418
+ </html>
1419
+ ```
1420
+ """
1421
+ yield type('Delta', (), {'content': error_msg, 'reasoning_content': None})()
1422
+ return
1423
+
1424
+ # Configure OpenAI client for GLM-4.5
1425
+ try:
1426
+ client = OpenAI(
1427
+ base_url=glm_base_url,
1428
+ api_key=glm_api_key,
1429
+ )
1430
+
1431
+ response = client.chat.completions.create(
1432
+ model="GLM-4.5",
1433
+ messages=messages,
1434
+ temperature=temperature,
1435
+ stream=True,
1436
+ max_tokens=65536,
1437
+ extra_body={
1438
+ "thinking": {
1439
+ "type": "enabled" if thinking_enabled else "disabled",
1440
+ }
1441
+ }
1442
+ )
1443
+
1444
+ for chunk in response:
1445
+ if stop_generation:
1446
+ break
1447
+ if chunk.choices and chunk.choices[0].delta:
1448
+ yield chunk.choices[0].delta
1449
+
1450
+ except Exception as e:
1451
+ # Fallback: if GLM-4.5 API fails, yield error with sample code
1452
+ error_msg = f"""Error connecting to GLM-4.5: {str(e)}
1453
+
1454
+ Please check:
1455
+ 1. OPENAI_API_KEY environment variable is set correctly
1456
+ 2. API key is valid and has credits
1457
+ 3. Network connection is working
1458
+ 4. GLM_BASE_URL is correct (current: {glm_base_url})
1459
+
1460
+ Here's a sample HTML code to test the UI:
1461
+ ```html
1462
+ <!DOCTYPE html>
1463
+ <html lang="en">
1464
+ <head>
1465
+ <meta charset="UTF-8">
1466
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1467
+ <title>GLM-4.5 Error - Sample Output</title>
1468
+ <style>
1469
+ body {{
1470
+ font-family: Arial, sans-serif;
1471
+ margin: 40px;
1472
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1473
+ color: white;
1474
+ }}
1475
+ .container {{
1476
+ max-width: 600px;
1477
+ margin: 0 auto;
1478
+ background: rgba(255,255,255,0.1);
1479
+ padding: 30px;
1480
+ border-radius: 15px;
1481
+ backdrop-filter: blur(10px);
1482
+ }}
1483
+ h1 {{ color: #fff; text-align: center; }}
1484
+ .error {{ background: rgba(255,0,0,0.2); padding: 15px; border-radius: 8px; margin: 20px 0; }}
1485
+ .button {{
1486
+ background: rgba(255,255,255,0.2);
1487
+ color: white;
1488
+ padding: 12px 24px;
1489
+ border: 1px solid rgba(255,255,255,0.3);
1490
+ border-radius: 8px;
1491
+ cursor: pointer;
1492
+ display: block;
1493
+ margin: 20px auto;
1494
+ }}
1495
+ .button:hover {{ background: rgba(255,255,255,0.3); }}
1496
+ </style>
1497
+ </head>
1498
+ <body>
1499
+ <div class="container">
1500
+ <h1>🤖 GLM-4.5 Configuration Error</h1>
1501
+ <div class="error">
1502
+ <strong>Error:</strong> {str(e)}
1503
+ </div>
1504
+ <p>This is a sample HTML output to demonstrate the UI while you configure GLM-4.5.</p>
1505
+ <button class="button" onclick="window.open('https://open.bigmodel.cn/', '_blank')">Configure GLM-4.5 API</button>
1506
+ </div>
1507
+ <script>
1508
+ console.log('GLM-4.5 API Error: {str(e)}');
1509
+ </script>
1510
+ </body>
1511
+ </html>
1512
+ ```"""
1513
+ print(f"GLM-4.5 API Error: {e}")
1514
+ yield type('Delta', (), {'content': error_msg, 'reasoning_content': None})()
1515
+
1516
+
1517
+ class GLM45Model:
1518
+ def __init__(self):
1519
+ self.accumulated_content = ""
1520
+ self.accumulated_reasoning = ""
1521
+
1522
+ def reset_state(self):
1523
+ self.accumulated_content = ""
1524
+ self.accumulated_reasoning = ""
1525
+
1526
+ def _render_response(self, reasoning_content, regular_content, skip_think=False):
1527
+ html_parts = []
1528
+
1529
+ if reasoning_content and not skip_think:
1530
+ reasoning_escaped = html.escape(reasoning_content).replace("\n", "<br>")
1531
+ think_html = (
1532
+ "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>Thinking</summary>"
1533
+ "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
1534
+ + reasoning_escaped +
1535
+ "</div></details>"
1536
+ )
1537
+ html_parts.append(think_html)
1538
+
1539
+ if regular_content:
1540
+ content_escaped = html.escape(regular_content).replace("\n", "<br>")
1541
+ content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_escaped}</div>"
1542
+ html_parts.append(content_html)
1543
+
1544
+ return "".join(html_parts)
1545
+
1546
+ def _build_messages(self, raw_hist, sys_prompt):
1547
+ msgs = []
1548
+ if sys_prompt.strip():
1549
+ msgs.append({"role": "system", "content": sys_prompt.strip()})
1550
+
1551
+ for h in raw_hist:
1552
+ if h["role"] == "user":
1553
+ msgs.append({"role": "user", "content": h["content"]})
1554
+ else:
1555
+ msg = {"role": "assistant", "content": h.get("content", "")}
1556
+ if h.get("reasoning_content"):
1557
+ msg["reasoning_content"] = h.get("reasoning_content")
1558
+ msgs.append(msg)
1559
+ return msgs
1560
+
1561
+ def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=1.0):
1562
+ global stop_generation
1563
+ stop_generation = False
1564
+ msgs = self._build_messages(raw_hist, sys_prompt)
1565
+ self.reset_state()
1566
+
1567
+ try:
1568
+ for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
1569
+ if stop_generation:
1570
+ break
1571
+
1572
+ if hasattr(delta, 'content') and delta.content:
1573
+ self.accumulated_content += delta.content
1574
+
1575
+ if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
1576
+ self.accumulated_reasoning += delta.reasoning_content
1577
+
1578
+ yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled)
1579
+
1580
+ except Exception as e:
1581
+ yield self._render_response("", f"Error: {str(e)}")
1582
+
1583
+
1584
+ # Global GLM-4.5 instance
1585
+ glm45 = GLM45Model()
1586
+
1587
+
1588
  def generation_code(query: Optional[str], image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto"):
1589
  if query is None:
1590
  query = ''
 
1666
  # Enhance query with search if enabled
1667
  enhanced_query = enhance_query_with_search(query, enable_search)
1668
 
1669
+ # Check if this is GLM-4.5 model and handle differently
1670
+ if _current_model["id"] == "GLM-4.5":
1671
+ # For GLM-4.5, use the specialized implementation with simpler streaming
1672
+ if image is not None:
1673
+ messages.append(create_multimodal_message(enhanced_query, image))
1674
+ else:
1675
+ messages.append({'role': 'user', 'content': enhanced_query})
1676
+
1677
+ content = ""
1678
+ reasoning_content = ""
1679
+
1680
+ try:
1681
+ # Use GLM-4.5 streaming directly
1682
+ for delta in stream_from_vllm(messages, True, 1.0):
1683
+ if stop_generation:
1684
+ break
1685
+
1686
+ if hasattr(delta, 'content') and delta.content:
1687
+ content += delta.content
1688
+
1689
+ if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
1690
+ reasoning_content += delta.reasoning_content
1691
+
1692
+ # Show streaming content (extract just the code part)
1693
+ clean_code = remove_code_block(content)
1694
+ search_status = " (with web search)" if enable_search and tavily_client else ""
1695
+
1696
+ # Handle different language outputs for GLM-4.5 during streaming
1697
+ if language == "transformers.js":
1698
+ files = parse_transformers_js_output(clean_code)
1699
+ if files['index.html'] and files['index.js'] and files['style.css']:
1700
+ formatted_output = format_transformers_js_output(files)
1701
+ yield {
1702
+ code_output: gr.update(value=formatted_output, language="html"),
1703
+ history_output: history_to_chatbot_messages(_history),
1704
+ sandbox: send_to_sandbox(files['index.html']) if files['index.html'] else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1705
+ }
1706
+ else:
1707
+ yield {
1708
+ code_output: gr.update(value=clean_code, language="html"),
1709
+ history_output: history_to_chatbot_messages(_history),
1710
+ sandbox: "<div style='padding:1em;color:#888;text-align:center;'>Generating transformers.js app...</div>",
1711
+ }
1712
+ elif language == "svelte":
1713
+ yield {
1714
+ code_output: gr.update(value=clean_code, language="html"),
1715
+ history_output: history_to_chatbot_messages(_history),
1716
+ sandbox: "<div style='padding:1em;color:#888;text-align:center;'>Generating Svelte app...</div>",
1717
+ }
1718
+ else:
1719
+ if has_existing_content:
1720
+ if clean_code.strip().startswith("<!DOCTYPE html>") or clean_code.strip().startswith("<html"):
1721
+ yield {
1722
+ code_output: gr.update(value=clean_code, language=get_gradio_language(language)),
1723
+ history_output: history_to_chatbot_messages(_history),
1724
+ sandbox: send_to_sandbox(clean_code) if language == "html" else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1725
+ }
1726
+ else:
1727
+ last_content = _history[-1][1] if _history and len(_history[-1]) > 1 else ""
1728
+ modified_content = apply_search_replace_changes(last_content, clean_code)
1729
+ clean_content = remove_code_block(modified_content)
1730
+ yield {
1731
+ code_output: gr.update(value=clean_content, language=get_gradio_language(language)),
1732
+ history_output: history_to_chatbot_messages(_history),
1733
+ sandbox: send_to_sandbox(clean_content) if language == "html" else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1734
+ }
1735
+ else:
1736
+ yield {
1737
+ code_output: gr.update(value=clean_code, language=get_gradio_language(language)),
1738
+ history_output: history_to_chatbot_messages(_history),
1739
+ sandbox: send_to_sandbox(clean_code) if language == "html" else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1740
+ }
1741
+
1742
+ except Exception as e:
1743
+ content = f"Error: {str(e)}"
1744
+ print(f"GLM-4.5 Error: {e}")
1745
+
1746
+ # Final processing for GLM-4.5
1747
+ clean_code = remove_code_block(content)
1748
+
1749
+ # Store content with thinking information if available
1750
+ if reasoning_content:
1751
+ full_response = f"**Thinking:**\n{reasoning_content}\n\n**Code:**\n{content}"
1752
+ else:
1753
+ full_response = content
1754
+
1755
+ if language == "transformers.js":
1756
+ files = parse_transformers_js_output(clean_code)
1757
+ if files['index.html'] and files['index.js'] and files['style.css']:
1758
+ formatted_output = format_transformers_js_output(files)
1759
+ _history.append([query, full_response])
1760
+ yield {
1761
+ code_output: formatted_output,
1762
+ history: _history,
1763
+ sandbox: send_to_sandbox(files['index.html']),
1764
+ history_output: history_to_chatbot_messages(_history),
1765
+ }
1766
+ else:
1767
+ _history.append([query, full_response])
1768
+ yield {
1769
+ code_output: clean_code,
1770
+ history: _history,
1771
+ sandbox: "<div style='padding:1em;color:#888;text-align:center;'>Error parsing transformers.js output. Please try again.</div>",
1772
+ history_output: history_to_chatbot_messages(_history),
1773
+ }
1774
+ elif language == "svelte":
1775
+ files = parse_svelte_output(clean_code)
1776
+ if files['src/App.svelte'] and files['src/app.css']:
1777
+ formatted_output = format_svelte_output(files)
1778
+ _history.append([query, full_response])
1779
+ yield {
1780
+ code_output: formatted_output,
1781
+ history: _history,
1782
+ sandbox: "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your Svelte code using the download button above.</div>",
1783
+ history_output: history_to_chatbot_messages(_history),
1784
+ }
1785
+ else:
1786
+ _history.append([query, full_response])
1787
+ yield {
1788
+ code_output: clean_code,
1789
+ history: _history,
1790
+ sandbox: "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your Svelte code using the download button above.</div>",
1791
+ history_output: history_to_chatbot_messages(_history),
1792
+ }
1793
+ else:
1794
+ if has_existing_content and not (clean_code.strip().startswith("<!DOCTYPE html>") or clean_code.strip().startswith("<html")):
1795
+ last_content = _history[-1][1] if _history and len(_history[-1]) > 1 else ""
1796
+ modified_content = apply_search_replace_changes(last_content, clean_code)
1797
+ clean_content = remove_code_block(modified_content)
1798
+ _history.append([query, full_response])
1799
+ yield {
1800
+ code_output: clean_content,
1801
+ history: _history,
1802
+ sandbox: send_to_sandbox(clean_content) if language == "html" else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1803
+ history_output: history_to_chatbot_messages(_history),
1804
+ }
1805
+ else:
1806
+ _history.append([query, full_response])
1807
+ yield {
1808
+ code_output: clean_code,
1809
+ history: _history,
1810
+ sandbox: send_to_sandbox(clean_code) if language == "html" else "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML. Please download your code using the download button above.</div>",
1811
+ history_output: history_to_chatbot_messages(_history),
1812
+ }
1813
+ return
1814
+
1815
+ # Use dynamic client based on selected model (for non-GLM-4.5 models)
1816
  client = get_inference_client(_current_model["id"], provider)
1817
 
1818
  if image is not None: