Semnykcz commited on
Commit
ac5ebc8
·
verified ·
1 Parent(s): 646a901

Upload 8 files

Browse files
Files changed (8) hide show
  1. app.py +277 -314
  2. public/app.js +228 -0
  3. public/index.html +18 -0
  4. public/styles.css +335 -0
  5. readme.md +124 -0
  6. requirements.txt +10 -8
  7. utils/__init__.py +0 -0
  8. utils/model_utils.py +94 -0
app.py CHANGED
@@ -1,330 +1,293 @@
1
  #!/usr/bin/env python3
2
  """
3
- Qwen3 Coder FastAPI server (OpenAI-compatible /v1/chat/completions)
4
-
5
- Refaktorováno do čisté, přehledné struktury a sloučeno s konfigurací z config.py.
6
- - Konfigurace přes env s rozumnými defaulty (viz třída AppConfig)
7
- - Deterministické načítání modelu/tokenizeru s volitelným prewarm přes snapshot_download
8
- - Oddělené sekce: konfigurace, model, API schémata, routy
9
  """
10
 
11
- from __future__ import annotations
12
-
13
  import os
14
- import time
 
15
  import logging
16
- from dataclasses import dataclass
17
- from typing import List, Optional, Dict, Any
18
-
19
  import torch
20
- from fastapi import FastAPI
21
- from fastapi.responses import FileResponse, HTMLResponse
22
- from fastapi.staticfiles import StaticFiles
 
23
  from pydantic import BaseModel
24
- from transformers import AutoTokenizer, AutoModelForCausalLM
25
-
26
- # =============================
27
- # Konfigurace
28
- # =============================
29
-
30
- @dataclass(frozen=True)
31
- class AppConfig:
32
- """Aplikační konfigurace s env fallbacky.
33
-
34
- Default hodnoty vycházejí z původního config.py:
35
- - APP_NAME = "Qwen3 Coder"
36
- - APP_LANG = "en"
37
- - MODEL_ID = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
38
- - MODEL_ALIAS = "qwen3"
39
- - PERSISTENT_DIR = "data"
40
- Navíc:
41
- - SNAPSHOT_DOWNLOAD ("1" / "0")
42
- - PORT (výchozí 7860)
43
- """
44
-
45
- app_name: str
46
- app_lang: str
47
- model_id: str
48
- model_alias: str
49
- persistent_dir: str
50
- snapshot_download: bool
51
- port: int
52
-
53
-
54
- def _env(key: str, default: Optional[str] = None) -> str:
55
- v = os.getenv(key)
56
- return v if v is not None else (default or "")
57
-
58
-
59
- def make_config() -> AppConfig:
60
- # Podpora obou názvů proměnných kvůli zpětné kompatibilitě: PERSISTENT_HOME i PERSISTENT_DIR
61
- persistent_dir = (
62
- os.getenv("PERSISTENT_HOME")
63
- or os.getenv("PERSISTENT_DIR")
64
- or "data" # default z config.py
65
- )
66
-
67
- return AppConfig(
68
- app_name=_env("APP_NAME", "Qwen3 Coder"),
69
- app_lang=_env("APP_LANG", "en"),
70
- model_id=_env("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct"),
71
- model_alias=_env("MODEL_ALIAS", "qwen3"),
72
- persistent_dir=persistent_dir,
73
- snapshot_download=_env("SNAPSHOT_DOWNLOAD", "0") == "1",
74
- port=int(_env("PORT", "7860") or 7860),
75
- )
76
-
77
-
78
- CONFIG: AppConfig = make_config()
79
-
80
- # Logování
81
- logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
82
- LOGGER = logging.getLogger("qwen3-coder")
83
-
84
- # Absolutní cesty
85
- PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
86
- FRONTEND_DIR = os.path.join(PROJECT_DIR, "app")
87
- CACHE_DIR = os.path.abspath(CONFIG.persistent_dir)
88
- os.makedirs(CACHE_DIR, exist_ok=True)
89
-
90
- # =============================
91
- # Načtení modelu a tokenizeru
92
- # =============================
93
-
94
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
95
-
96
-
97
- def maybe_snapshot_download(model_id: str, cache_dir: str, enabled: bool) -> None:
98
- """Volitelně stáhne snapshot modelu do cache (prewarm)."""
99
- if not enabled:
100
- return
101
- try:
102
- from huggingface_hub import snapshot_download # import on-demand
103
-
104
- LOGGER.info("Prewarming model cache via snapshot_download …")
105
- snapshot_download(repo_id=model_id, cache_dir=cache_dir, local_files_only=False)
106
- LOGGER.info("Snapshot download completed")
107
- except Exception as e:
108
- LOGGER.warning("snapshot_download failed: %s", e)
109
-
110
-
111
- def load_models(model_id: str, cache_dir: str):
112
- """Načte tokenizer a model. U 30B variant vyžaduje výkonné GPU; CPU je velmi pomalé."""
113
- LOGGER.info("Loading tokenizer '%s' (cache: %s)", model_id, cache_dir)
114
- tokenizer = AutoTokenizer.from_pretrained(
115
- model_id,
116
- cache_dir=cache_dir,
117
- trust_remote_code=True,
118
- )
119
-
120
- LOGGER.info("Loading model '%s' on device=%s", model_id, DEVICE)
121
- model = AutoModelForCausalLM.from_pretrained(
122
- model_id,
123
- cache_dir=cache_dir,
124
- device_map="auto" if DEVICE == "cuda" else None,
125
- torch_dtype=(torch.float16 if DEVICE == "cuda" else torch.float32),
126
- trust_remote_code=True,
127
- )
128
- if DEVICE != "cuda":
129
- model.to(DEVICE)
130
- model.eval()
131
-
132
- return tokenizer, model
133
-
134
-
135
- # Prewarm (volitelně)
136
- maybe_snapshot_download(CONFIG.model_id, CACHE_DIR, CONFIG.snapshot_download)
137
-
138
- # Načtení modelu/tokenizeru (synchronně při startu – zachováno jako původní chování)
139
- TOKENIZER, MODEL = load_models(CONFIG.model_id, CACHE_DIR)
140
-
141
- # =============================
142
- # API schémata (OpenAI-compatible)
143
- # =============================
144
-
145
- class Message(BaseModel):
146
- role: str # "system" | "user" | "assistant"
 
 
 
 
 
 
 
 
 
 
 
147
  content: str
148
 
149
-
150
- class ChatCompletionsRequest(BaseModel):
151
- model: Optional[str] = None
152
- messages: List[Message]
153
- temperature: Optional[float] = 0.2
154
- top_p: Optional[float] = 0.95
155
- max_tokens: Optional[int] = 1024
156
- stream: Optional[bool] = False # stream není implementován
157
- stop: Optional[List[str]] = None
158
-
159
-
160
- # =============================
161
- # FastAPI aplikace a routy
162
- # =============================
163
-
164
- app = FastAPI(title=f"{CONFIG.app_name} ({CONFIG.model_alias})")
165
-
166
- # Statické soubory a frontend ve složce ./app
167
- if os.path.isdir(FRONTEND_DIR):
168
- app.mount("/app", StaticFiles(directory=FRONTEND_DIR), name="app")
169
-
170
-
171
- @app.get("/", response_class=HTMLResponse)
172
- def serve_index():
173
- """Vrátí app/index.html, pokud existuje; jinak zobrazí jednoduché info."""
174
- index_path = os.path.join(FRONTEND_DIR, "index.html")
175
- if os.path.exists(index_path):
176
- return FileResponse(index_path)
177
- return HTMLResponse(
178
- """
179
- <h1>Qwen3 Coder</h1>
180
- <p>Vlož prosím frontend do složky <code>/app</code> (soubor <code>index.html</code>).</p>
181
- """,
182
- status_code=200,
183
- )
184
-
185
-
186
- @app.get("/healthz")
187
- def healthz() -> Dict[str, Any]:
188
- return {
189
- "ok": True,
190
- "app_name": CONFIG.app_name,
191
- "lang": CONFIG.app_lang,
192
- "model": CONFIG.model_id,
193
- "alias": CONFIG.model_alias,
194
- "device": DEVICE,
195
- "cache_dir": CACHE_DIR,
196
- }
197
-
198
-
199
- @app.get("/v1/models")
200
- def list_models():
201
- return {"object": "list", "data": [{"id": CONFIG.model_id, "object": "model"}]}
202
-
203
-
204
- @app.post("/v1/chat/completions")
205
- def chat_completions(req: ChatCompletionsRequest):
206
- """OpenAI-compatible Chat Completions (bez streamu)."""
207
- # Převod zpráv na formát očekávaný chat šablonou
208
- msgs = [{"role": m.role, "content": m.content} for m in req.messages]
209
-
210
- if req.stream:
211
- def event_gen():
212
- try:
213
- input_ids = TOKENIZER.apply_chat_template(
214
- msgs,
215
- tokenize=True,
216
- add_generation_prompt=True,
217
- return_tensors="pt",
218
- ).to(MODEL.device)
219
-
220
- streamer = TextIteratorStreamer(
221
- TOKENIZER,
222
- skip_prompt=True,
223
- skip_special_tokens=True,
224
- )
225
-
226
- gen_kwargs = dict(
227
- input_ids=input_ids,
228
- max_new_tokens=req.max_tokens or 1024,
229
- do_sample=(req.temperature or 0) > 0,
230
- temperature=req.temperature or 0.2,
231
- top_p=req.top_p or 0.95,
232
- pad_token_id=TOKENIZER.eos_token_id,
233
- eos_token_id=TOKENIZER.eos_token_id,
234
- use_cache=True,
235
- streamer=streamer,
236
- )
237
-
238
- thread = threading.Thread(target=MODEL.generate, kwargs=gen_kwargs, daemon=True)
239
- thread.start()
240
-
241
- started = False
242
- for piece in streamer:
243
- # první token => indikace "typing"
244
- if not started:
245
- started = True
246
- now = int(time.time())
247
- chunk = {
248
- "id": f"chatcmpl-{now}",
249
- "object": "chat.completion.chunk",
250
- "created": now,
251
- "model": req.model or CONFIG.model_id,
252
- "choices": [
253
- {"index": 0, "delta": {"content": piece}, "finish_reason": None}
254
- ],
255
- }
256
- yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
257
-
258
- # ukončovací chunk
259
- now = int(time.time())
260
- done_chunk = {
261
- "id": f"chatcmpl-{now}",
262
- "object": "chat.completion.chunk",
263
- "created": now,
264
- "model": req.model or CONFIG.model_id,
265
- "choices": [
266
- {"index": 0, "delta": {}, "finish_reason": "stop"}
267
- ],
268
- }
269
- yield f"data: {json.dumps(done_chunk, ensure_ascii=False)}\n\n"
270
- yield "data: [DONE]\n\n"
271
- except Exception as e:
272
- err = {"error": str(e)}
273
- yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
274
- yield "data: [DONE]\n\n"
275
-
276
- return StreamingResponse(event_gen(), media_type="text/event-stream")
277
-
278
- # Non-stream varianta
279
- input_ids = TOKENIZER.apply_chat_template(
280
- msgs,
281
- tokenize=True,
282
- add_generation_prompt=True,
283
- return_tensors="pt",
284
- ).to(MODEL.device)
285
-
286
- outputs = MODEL.generate(
287
- input_ids=input_ids,
288
- max_new_tokens=req.max_tokens or 1024,
289
- do_sample=(req.temperature or 0) > 0,
290
- temperature=req.temperature or 0.2,
291
- top_p=req.top_p or 0.95,
292
- pad_token_id=TOKENIZER.eos_token_id,
293
- eos_token_id=TOKENIZER.eos_token_id,
294
- use_cache=True,
295
- )
296
-
297
- # Nově vygenerovaná část za promptem
298
- gen_ids = outputs[0][input_ids.shape[-1] :]
299
- text = TOKENIZER.decode(gen_ids, skip_special_tokens=True).strip()
300
-
301
- now = int(time.time())
302
- usage = {
303
- "prompt_tokens": int(input_ids.numel()),
304
- "completion_tokens": int(gen_ids.numel()),
305
- "total_tokens": int(input_ids.numel() + gen_ids.numel()),
306
- }
307
-
308
- return {
309
- "id": f"chatcmpl-{now}",
310
- "object": "chat.completion",
311
- "created": now,
312
- "model": req.model or CONFIG.model_id,
313
- "choices": [
314
- {
315
  "index": 0,
316
- "message": {"role": "assistant", "content": text},
317
- "finish_reason": "stop",
 
 
 
 
 
 
 
 
318
  }
319
- ],
320
- "usage": usage,
321
- }
 
322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- # =============================
325
- # Lokální běh (HF Spaces spouští automaticky)
326
- # =============================
327
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  import uvicorn
 
329
 
330
- uvicorn.run(app, host="0.0.0.0", port=CONFIG.port)
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ AI Chat Application for HuggingFace Spaces
4
+ Integration with Qwen/Qwen3-Coder-30B-A3B-Instruct model
5
+ OPENAI API compatibility features
 
 
 
6
  """
7
 
 
 
8
  import os
9
+ import sys
10
+ import json
11
  import logging
12
+ import time
13
+ from typing import Optional, Dict, Any, Generator
 
14
  import torch
15
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
16
+ import gradio as gr
17
+ from fastapi import FastAPI, HTTPException, Response
18
+ from fastapi.responses import StreamingResponse
19
  from pydantic import BaseModel
20
+ import redis
21
+ import asyncio
22
+ import threading
23
+ from threading import Thread
24
+
25
+ # Configure logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Model configuration
30
+ MODEL_NAME = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
31
+ DEFAULT_MAX_TOKENS = 1024
32
+ DEFAULT_TEMPERATURE = 0.7
33
+
34
+ class ConversationManager:
35
+ """Manage conversation history and caching"""
36
+
37
+ def __init__(self):
38
+ self.redis_client = None
39
+ try:
40
+ self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
41
+ self.redis_client.ping()
42
+ except:
43
+ logger.warning("Redis not available, using in-memory storage")
44
+ self.conversations = {}
45
+
46
+ def save_conversation(self, conv_id: str, messages: list) -> None:
47
+ """Save conversation to cache"""
48
+ try:
49
+ if self.redis_client:
50
+ self.redis_client.setex(conv_id, 86400, json.dumps(messages)) # 24 hours expiry
51
+ else:
52
+ self.conversations[conv_id] = messages
53
+ except Exception as e:
54
+ logger.error(f"Error saving conversation: {e}")
55
+
56
+ def load_conversation(self, conv_id: str) -> list:
57
+ """Load conversation from cache"""
58
+ try:
59
+ if self.redis_client:
60
+ data = self.redis_client.get(conv_id)
61
+ if data:
62
+ return json.loads(data)
63
+ else:
64
+ return self.conversations.get(conv_id, [])
65
+ except Exception as e:
66
+ logger.error(f"Error loading conversation: {e}")
67
+ return []
68
+
69
+ class ModelManager:
70
+ """Manage Qwen model loading and inference"""
71
+
72
+ def __init__(self):
73
+ self.model = None
74
+ self.tokenizer = None
75
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
76
+ self.load_model()
77
+
78
+ def load_model(self) -> None:
79
+ """Load the Qwen model"""
80
+ try:
81
+ logger.info(f"Loading model {MODEL_NAME} on {self.device}")
82
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
83
+ self.model = AutoModelForCausalLM.from_pretrained(
84
+ MODEL_NAME,
85
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
86
+ low_cpu_mem_usage=True,
87
+ device_map="auto"
88
+ )
89
+ logger.info("Model loaded successfully")
90
+ except Exception as e:
91
+ logger.error(f"Error loading model: {e}")
92
+ raise
93
+
94
+ def generate_response(self, prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS, temperature: float = DEFAULT_TEMPERATURE) -> str:
95
+ """Generate response from the model"""
96
+ try:
97
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
98
+
99
+ # Generate without streaming for simple response
100
+ generated = self.model.generate(
101
+ **inputs,
102
+ max_new_tokens=max_tokens,
103
+ temperature=temperature,
104
+ do_sample=True,
105
+ pad_token_id=self.tokenizer.eos_token_id
106
+ )
107
+
108
+ response = self.tokenizer.decode(generated[0], skip_special_tokens=True)
109
+ # Remove the prompt from the response
110
+ response = response[len(prompt):].strip()
111
+ return response
112
+ except Exception as e:
113
+ logger.error(f"Error generating response: {e}")
114
+ raise
115
+
116
+ def generate_streaming_response(self, prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS, temperature: float = DEFAULT_TEMPERATURE) -> Generator[str, None, None]:
117
+ """Generate streaming response from the model"""
118
+ try:
119
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
120
+
121
+ # Create streamer for streaming response
122
+ streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
123
+
124
+ # Start generation in a separate thread
125
+ generation_kwargs = dict(
126
+ inputs,
127
+ streamer=streamer,
128
+ max_new_tokens=max_tokens,
129
+ temperature=temperature,
130
+ do_sample=True,
131
+ pad_token_id=self.tokenizer.eos_token_id
132
+ )
133
+
134
+ thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
135
+ thread.start()
136
+
137
+ # Yield tokens as they are generated
138
+ for new_text in streamer:
139
+ yield new_text
140
+
141
+ except Exception as e:
142
+ logger.error(f"Error generating streaming response: {e}")
143
+ yield f"Error: {str(e)}"
144
+
145
+ # Initialize managers
146
+ conversation_manager = ConversationManager()
147
+ model_manager = ModelManager()
148
+
149
+ # FastAPI app for OPENAI API compatibility
150
+ app = FastAPI(title="AI Chat API", description="OPENAI API compatible interface for Qwen model")
151
+
152
+ class ChatMessage(BaseModel):
153
+ role: str
154
  content: str
155
 
156
+ class ChatRequest(BaseModel):
157
+ messages: list[ChatMessage]
158
+ model: str = MODEL_NAME
159
+ max_tokens: Optional[int] = DEFAULT_MAX_TOKENS
160
+ temperature: Optional[float] = DEFAULT_TEMPERATURE
161
+
162
+ class ChatResponse(BaseModel):
163
+ id: str
164
+ object: str = "chat.completion"
165
+ created: int
166
+ model: str
167
+ choices: list
168
+ usage: Dict[str, int]
169
+
170
+ @app.post("/v1/chat/completions", response_model=ChatResponse)
171
+ async def chat_completion(request: ChatRequest):
172
+ """OPENAI API compatible chat completion endpoint"""
173
+ try:
174
+ # Convert messages to prompt
175
+ prompt = ""
176
+ for msg in request.messages:
177
+ if msg.role == "system":
178
+ prompt += f"System: {msg.content}\n"
179
+ elif msg.role == "user":
180
+ prompt += f"User: {msg.content}\n"
181
+ elif msg.role == "assistant":
182
+ prompt += f"Assistant: {msg.content}\n"
183
+
184
+ # Generate response
185
+ response_text = model_manager.generate_response(
186
+ prompt,
187
+ request.max_tokens or DEFAULT_MAX_TOKENS,
188
+ request.temperature or DEFAULT_TEMPERATURE
189
+ )
190
+
191
+ # Return in OPENAI format
192
+ return ChatResponse(
193
+ id="chatcmpl-" + str(hash(prompt))[:10],
194
+ created=int(time.time()),
195
+ model=request.model,
196
+ choices=[{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "index": 0,
198
+ "message": {
199
+ "role": "assistant",
200
+ "content": response_text
201
+ },
202
+ "finish_reason": "stop"
203
+ }],
204
+ usage={
205
+ "prompt_tokens": len(prompt.split()),
206
+ "completion_tokens": len(response_text.split()),
207
+ "total_tokens": len(prompt.split()) + len(response_text.split())
208
  }
209
+ )
210
+ except Exception as e:
211
+ logger.error(f"Error in chat completion: {e}")
212
+ raise HTTPException(status_code=500, detail=str(e))
213
 
214
+ @app.post("/chat")
215
+ async def chat_endpoint(request: dict):
216
+ """Endpoint for frontend chat interface"""
217
+ try:
218
+ message = request.get("message", "")
219
+ history = request.get("history", [])
220
+
221
+ # Convert history to prompt
222
+ prompt = ""
223
+ for msg in history:
224
+ if msg["role"] == "user":
225
+ prompt += f"User: {msg['content']}\n"
226
+ elif msg["role"] == "assistant":
227
+ prompt += f"Assistant: {msg['content']}\n"
228
+ prompt += f"User: {message}\nAssistant:"
229
+
230
+ # Return streaming response
231
+ return StreamingResponse(
232
+ model_manager.generate_streaming_response(prompt),
233
+ media_type="text/plain"
234
+ )
235
+ except Exception as e:
236
+ logger.error(f"Error in chat endpoint: {e}")
237
+ raise HTTPException(status_code=500, detail=str(e))
238
+
239
+ # Gradio interface
240
+ def predict(message, history):
241
+ """Gradio prediction function"""
242
+ # Convert history to prompt
243
+ prompt = ""
244
+ for human, ai in history:
245
+ prompt += f"User: {human}\nAssistant: {ai}\n"
246
+ prompt += f"User: {message}\nAssistant:"
247
+
248
+ # Generate response
249
+ response = model_manager.generate_response(prompt)
250
+ return response
251
+
252
+ # Create Gradio interface
253
+ gradio_interface = gr.ChatInterface(
254
+ fn=predict,
255
+ title="AI Chat with Qwen Coder",
256
+ description="Chat with Qwen/Qwen3-Coder-30B-A3B-Instruct model",
257
+ examples=[
258
+ ["Hello, how are you today?"],
259
+ ["Can you explain quantum computing in simple terms?"],
260
+ ["Write a Python function to calculate Fibonacci numbers"]
261
+ ],
262
+ cache_examples=False
263
+ )
264
+
265
+ # Serve static files
266
+ from fastapi.staticfiles import StaticFiles
267
 
268
+ # Combine FastAPI and Gradio
269
+ def launch_app():
270
+ """Launch the combined FastAPI and Gradio app"""
271
+ from fastapi.middleware.cors import CORSMiddleware
272
+
273
+ # Add CORS middleware
274
+ app.add_middleware(
275
+ CORSMiddleware,
276
+ allow_origins=["*"],
277
+ allow_credentials=True,
278
+ allow_methods=["*"],
279
+ allow_headers=["*"],
280
+ )
281
+
282
+ # Mount static files
283
+ app.mount("/public", StaticFiles(directory="public"), name="public")
284
+
285
+ # Mount Gradio interface
286
+ app.mount("/", gradio_interface.app)
287
+
288
+ # Run the app
289
  import uvicorn
290
+ uvicorn.run(app, host="0.0.0.0", port=7860)
291
 
292
+ if __name__ == "__main__":
293
+ launch_app()
public/app.js ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // AI Chat Application JavaScript Logic
2
+ // This file contains the React component for the chat interface
3
+
4
+ // Main App component
5
+ function App() {
6
+ const [messages, setMessages] = React.useState([]);
7
+ const [inputValue, setInputValue] = React.useState('');
8
+ const [isLoading, setIsLoading] = React.useState(false);
9
+ const [darkMode, setDarkMode] = React.useState(false);
10
+ const messagesEndRef = React.useRef(null);
11
+
12
+ // Scroll to bottom of messages
13
+ const scrollToBottom = () => {
14
+ messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
15
+ };
16
+
17
+ // Scroll to bottom when messages change
18
+ React.useEffect(() => {
19
+ scrollToBottom();
20
+ }, [messages]);
21
+
22
+ // Toggle dark mode
23
+ const toggleDarkMode = () => {
24
+ setDarkMode(!darkMode);
25
+ document.documentElement.classList.toggle('dark', !darkMode);
26
+ };
27
+
28
+ // Handle input change
29
+ const handleInputChange = (e) => {
30
+ setInputValue(e.target.value);
31
+ };
32
+
33
+ // Handle form submission
34
+ const handleSubmit = async (e) => {
35
+ e.preventDefault();
36
+ if (!inputValue.trim() || isLoading) return;
37
+
38
+ // Add user message to chat
39
+ const userMessage = { id: Date.now(), text: inputValue, sender: 'user' };
40
+ setMessages(prev => [...prev, userMessage]);
41
+ setInputValue('');
42
+ setIsLoading(true);
43
+
44
+ try {
45
+ // Add temporary AI message
46
+ const aiMessageId = Date.now() + 1;
47
+ setMessages(prev => [...prev, { id: aiMessageId, text: '', sender: 'ai', isLoading: true }]);
48
+
49
+ // Send request to backend
50
+ const response = await fetch('/chat', {
51
+ method: 'POST',
52
+ headers: {
53
+ 'Content-Type': 'application/json',
54
+ },
55
+ body: JSON.stringify({
56
+ message: inputValue,
57
+ history: messages.filter(m => !m.isLoading).map(m => ({
58
+ role: m.sender === 'user' ? 'user' : 'assistant',
59
+ content: m.text
60
+ }))
61
+ })
62
+ });
63
+
64
+ if (!response.ok) {
65
+ throw new Error(`HTTP error! status: ${response.status}`);
66
+ }
67
+
68
+ // Process streaming response
69
+ const reader = response.body.getReader();
70
+ const decoder = new TextDecoder();
71
+ let aiResponse = '';
72
+
73
+ while (true) {
74
+ const { done, value } = await reader.read();
75
+ if (done) break;
76
+
77
+ const chunk = decoder.decode(value);
78
+ aiResponse += chunk;
79
+
80
+ // Update AI message with new content
81
+ setMessages(prev => prev.map(msg =>
82
+ msg.id === aiMessageId
83
+ ? { ...msg, text: aiResponse, isLoading: false }
84
+ : msg
85
+ ));
86
+ }
87
+ } catch (error) {
88
+ console.error('Error sending message:', error);
89
+ setMessages(prev => prev.map(msg =>
90
+ msg.id === aiMessageId
91
+ ? { ...msg, text: 'Sorry, I encountered an error. Please try again.', isLoading: false, error: true }
92
+ : msg
93
+ ));
94
+ } finally {
95
+ setIsLoading(false);
96
+ }
97
+ };
98
+
99
+ // Copy message to clipboard
100
+ const copyToClipboard = (text) => {
101
+ navigator.clipboard.writeText(text).then(() => {
102
+ // Show success message (could be a toast notification)
103
+ console.log('Copied to clipboard');
104
+ }).catch(err => {
105
+ console.error('Failed to copy: ', err);
106
+ });
107
+ };
108
+
109
+ // Clear chat history
110
+ const clearChat = () => {
111
+ setMessages([]);
112
+ };
113
+
114
+ return (
115
+ <div className="chat-container">
116
+ {/* Header */}
117
+ <div className="chat-header flex justify-between items-center">
118
+ <h1 className="text-2xl font-bold">AI Chat with Qwen Coder</h1>
119
+ <div className="flex gap-2">
120
+ <button
121
+ onClick={toggleDarkMode}
122
+ className="btn btn-secondary"
123
+ aria-label="Toggle dark mode"
124
+ >
125
+ {darkMode ? (
126
+ <i className="fas fa-sun"></i>
127
+ ) : (
128
+ <i className="fas fa-moon"></i>
129
+ )}
130
+ </button>
131
+ <button
132
+ onClick={clearChat}
133
+ className="btn btn-secondary"
134
+ aria-label="Clear chat"
135
+ >
136
+ <i className="fas fa-trash"></i>
137
+ </button>
138
+ </div>
139
+ </div>
140
+
141
+ {/* Chat messages area */}
142
+ <div className="chat-messages">
143
+ {messages.length === 0 ? (
144
+ <div className="flex flex-col items-center justify-center h-full text-center">
145
+ <h2 className="text-2xl font-bold mb-4">Welcome to AI Chat</h2>
146
+ <p className="text-lg mb-8">Start a conversation with Qwen Coder by typing a message below</p>
147
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-4 w-full max-w-2xl">
148
+ <div className="bg-gray-100 dark:bg-gray-800 p-4 rounded-lg">
149
+ <h3 className="font-bold mb-2">Examples</h3>
150
+ <ul className="text-left">
151
+ <li>"Explain quantum computing in simple terms"</li>
152
+ <li>"Write a Python function to calculate Fibonacci numbers"</li>
153
+ <li>"How do I make an HTTP request in JavaScript?"</li>
154
+ </ul>
155
+ </div>
156
+ <div className="bg-gray-100 dark:bg-gray-800 p-4 rounded-lg">
157
+ <h3 className="font-bold mb-2">Capabilities</h3>
158
+ <ul className="text-left">
159
+ <li>Remembers previous conversation</li>
160
+ <li>Understands complex instructions</li>
161
+ <li>Generates code and explanations</li>
162
+ </ul>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ ) : (
167
+ messages.map((message) => (
168
+ <div
169
+ key={message.id}
170
+ className={`message-bubble relative ${message.sender === 'user' ? 'user' : 'ai'}`}
171
+ >
172
+ {message.sender === 'ai' && !message.isLoading && (
173
+ <button
174
+ onClick={() => copyToClipboard(message.text)}
175
+ className="copy-button"
176
+ aria-label="Copy message"
177
+ >
178
+ <i className="fas fa-copy"></i>
179
+ </button>
180
+ )}
181
+ {message.isLoading ? (
182
+ <div className="typing-indicator">
183
+ <div className="typing-dot"></div>
184
+ <div className="typing-dot"></div>
185
+ <div className="typing-dot"></div>
186
+ </div>
187
+ ) : (
188
+ <div>{message.text}</div>
189
+ )}
190
+ </div>
191
+ ))
192
+ )}
193
+ <div ref={messagesEndRef} />
194
+ </div>
195
+
196
+ {/* Input area */}
197
+ <div className="chat-input-area">
198
+ <form onSubmit={handleSubmit} className="flex gap-2">
199
+ <input
200
+ type="text"
201
+ value={inputValue}
202
+ onChange={handleInputChange}
203
+ placeholder="Type your message here..."
204
+ className="chat-input"
205
+ disabled={isLoading}
206
+ />
207
+ <button
208
+ type="submit"
209
+ className="btn"
210
+ disabled={isLoading || !inputValue.trim()}
211
+ >
212
+ {isLoading ? (
213
+ <i className="fas fa-spinner fa-spin"></i>
214
+ ) : (
215
+ <i className="fas fa-paper-plane"></i>
216
+ )}
217
+ </button>
218
+ </form>
219
+ <div className="text-xs text-center mt-2 text-gray-500 dark:text-gray-400">
220
+ Qwen Coder can make mistakes. Consider checking important information.
221
+ </div>
222
+ </div>
223
+ </div>
224
+ );
225
+ }
226
+
227
+ // Render the app
228
+ ReactDOM.render(<App />, document.getElementById('root'));
public/index.html ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>AI Chat with Qwen Coder</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script src="https://unpkg.com/react@18/umd/react.development.js"></script>
9
+ <script src="https://unpkg.com/react-dom@18/umd/react-dom.development.js"></script>
10
+ <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
11
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
12
+ <link rel="stylesheet" href="styles.css">
13
+ </head>
14
+ <body class="bg-gray-50 dark:bg-gray-900 text-gray-900 dark:text-gray-100">
15
+ <div id="root"></div>
16
+ <script type="text/babel" src="app.js"></script>
17
+ </body>
18
+ </html>
public/styles.css ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS variables for theming */
2
+ :root {
3
+ /* Primary color palette */
4
+ --primary-50: 240 249 255;
5
+ --primary-100: 224 242 254;
6
+ --primary-200: 186 230 253;
7
+ --primary-300: 125 211 252;
8
+ --primary-400: 56 189 248;
9
+ --primary-500: 14 165 233;
10
+ --primary-600: 2 132 199;
11
+ --primary-700: 3 105 161;
12
+ --primary-800: 7 89 133;
13
+ --primary-900: 12 74 110;
14
+
15
+ /* Secondary color palette */
16
+ --secondary-50: 248 250 252;
17
+ --secondary-100: 241 245 249;
18
+ --secondary-200: 226 232 240;
19
+ --secondary-300: 203 213 225;
20
+ --secondary-400: 148 163 184;
21
+ --secondary-500: 100 116 139;
22
+ --secondary-600: 71 85 105;
23
+ --secondary-700: 51 65 85;
24
+ --secondary-800: 30 41 59;
25
+ --secondary-900: 15 23 42;
26
+
27
+ /* Accent colors */
28
+ --accent-50: 254 249 195;
29
+ --accent-100: 254 240 138;
30
+ --accent-200: 253 230 138;
31
+ --accent-300: 252 211 77;
32
+ --accent-400: 251 191 36;
33
+ --accent-500: 245 158 11;
34
+ --accent-600: 217 119 6;
35
+ --accent-700: 180 83 9;
36
+ --accent-800: 146 64 14;
37
+ --accent-900: 120 53 15;
38
+
39
+ /* Gradient definitions */
40
+ --gradient-primary: linear-gradient(135deg, hsl(var(--primary-500)), hsl(var(--accent-500)));
41
+ --gradient-secondary: linear-gradient(135deg, hsl(var(--secondary-700)), hsl(var(--secondary-900)));
42
+
43
+ /* Shadows */
44
+ --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
45
+ --shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px -1px rgba(0, 0, 0, 0.1);
46
+ --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.1);
47
+ --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -4px rgba(0, 0, 0, 0.1);
48
+ --shadow-xl: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 8px 10px -6px rgba(0, 0, 0, 0.1);
49
+ --shadow-2xl: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
50
+
51
+ /* Transitions */
52
+ --transition-fast: all 0.15s cubic-bezier(0.4, 0, 0.2, 1);
53
+ --transition-normal: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
54
+ --transition-slow: all 0.5s cubic-bezier(0.4, 0, 0.2, 1);
55
+ }
56
+
57
+ /* Dark mode variables */
58
+ .dark {
59
+ --primary-50: 236 254 255;
60
+ --primary-100: 207 250 254;
61
+ --primary-200: 165 243 252;
62
+ --primary-300: 103 232 249;
63
+ --primary-400: 34 211 238;
64
+ --primary-500: 6 182 212;
65
+ --primary-600: 8 145 178;
66
+ --primary-700: 14 116 144;
67
+ --primary-800: 21 94 117;
68
+ --primary-900: 22 78 99;
69
+ }
70
+
71
+ /* Base styles */
72
+ body {
73
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
74
+ -webkit-font-smoothing: antialiased;
75
+ -moz-osx-font-smoothing: grayscale;
76
+ background-color: hsl(var(--secondary-50));
77
+ transition: background-color var(--transition-normal);
78
+ }
79
+
80
+ .dark body {
81
+ background-color: hsl(var(--secondary-900));
82
+ }
83
+
84
+ /* Chat container */
85
+ .chat-container {
86
+ max-width: 1200px;
87
+ margin: 0 auto;
88
+ height: 100vh;
89
+ display: flex;
90
+ flex-direction: column;
91
+ background-color: hsl(var(--secondary-50));
92
+ transition: background-color var(--transition-normal);
93
+ }
94
+
95
+ .dark .chat-container {
96
+ background-color: hsl(var(--secondary-900));
97
+ }
98
+
99
+ /* Header */
100
+ .chat-header {
101
+ padding: 1rem;
102
+ border-bottom: 1px solid hsl(var(--secondary-200));
103
+ background-color: hsl(var(--secondary-50));
104
+ transition: all var(--transition-normal);
105
+ }
106
+
107
+ .dark .chat-header {
108
+ border-bottom: 1px solid hsl(var(--secondary-800));
109
+ background-color: hsl(var(--secondary-900));
110
+ }
111
+
112
+ /* Chat messages area */
113
+ .chat-messages {
114
+ flex: 1;
115
+ overflow-y: auto;
116
+ padding: 1rem;
117
+ display: flex;
118
+ flex-direction: column;
119
+ gap: 1rem;
120
+ background-color: hsl(var(--secondary-50));
121
+ transition: background-color var(--transition-normal);
122
+ }
123
+
124
+ .dark .chat-messages {
125
+ background-color: hsl(var(--secondary-900));
126
+ }
127
+
128
+ /* Message bubble */
129
+ .message-bubble {
130
+ max-width: 80%;
131
+ padding: 1rem 1.5rem;
132
+ border-radius: 1rem;
133
+ box-shadow: var(--shadow);
134
+ transition: all var(--transition-normal);
135
+ }
136
+
137
+ .message-bubble.user {
138
+ align-self: flex-end;
139
+ background-color: hsl(var(--primary-500));
140
+ color: white;
141
+ }
142
+
143
+ .message-bubble.ai {
144
+ align-self: flex-start;
145
+ background-color: hsl(var(--secondary-100));
146
+ color: hsl(var(--secondary-900));
147
+ }
148
+
149
+ .dark .message-bubble.ai {
150
+ background-color: hsl(var(--secondary-800));
151
+ color: hsl(var(--secondary-100));
152
+ }
153
+
154
+ /* Input area */
155
+ .chat-input-area {
156
+ padding: 1rem;
157
+ border-top: 1px solid hsl(var(--secondary-200));
158
+ background-color: hsl(var(--secondary-50));
159
+ transition: all var(--transition-normal);
160
+ }
161
+
162
+ .dark .chat-input-area {
163
+ border-top: 1px solid hsl(var(--secondary-800));
164
+ background-color: hsl(var(--secondary-900));
165
+ }
166
+
167
+ /* Input field */
168
+ .chat-input {
169
+ width: 100%;
170
+ padding: 0.75rem 1rem;
171
+ border-radius: 0.5rem;
172
+ border: 1px solid hsl(var(--secondary-300));
173
+ background-color: hsl(var(--secondary-100));
174
+ color: hsl(var(--secondary-900));
175
+ transition: all var(--transition-normal);
176
+ }
177
+
178
+ .dark .chat-input {
179
+ border: 1px solid hsl(var(--secondary-700));
180
+ background-color: hsl(var(--secondary-800));
181
+ color: hsl(var(--secondary-100));
182
+ }
183
+
184
+ .chat-input:focus {
185
+ outline: none;
186
+ border-color: hsl(var(--primary-500));
187
+ box-shadow: 0 0 0 3px hsla(var(--primary-500), 0.2);
188
+ }
189
+
190
+ /* Buttons */
191
+ .btn {
192
+ padding: 0.5rem 1rem;
193
+ border-radius: 0.5rem;
194
+ font-weight: 500;
195
+ transition: all var(--transition-normal);
196
+ cursor: pointer;
197
+ border: none;
198
+ background-color: hsl(var(--primary-500));
199
+ color: white;
200
+ }
201
+
202
+ .btn:hover {
203
+ background-color: hsl(var(--primary-600));
204
+ }
205
+
206
+ .btn-secondary {
207
+ background-color: hsl(var(--secondary-200));
208
+ color: hsl(var(--secondary-900);
209
+ }
210
+
211
+ .dark .btn-secondary {
212
+ background-color: hsl(var(--secondary-700));
213
+ color: hsl(var(--secondary-100);
214
+ }
215
+
216
+ .btn-secondary:hover {
217
+ background-color: hsl(var(--secondary-300));
218
+ }
219
+
220
+ .dark .btn-secondary:hover {
221
+ background-color: hsl(var(--secondary-600));
222
+ }
223
+
224
+ /* Copy button */
225
+ .copy-button {
226
+ position: absolute;
227
+ top: 0.5rem;
228
+ right: 0.5rem;
229
+ padding: 0.25rem;
230
+ border-radius: 0.25rem;
231
+ background-color: hsl(var(--secondary-200));
232
+ color: hsl(var(--secondary-700));
233
+ opacity: 0;
234
+ transition: all var(--transition-normal);
235
+ }
236
+
237
+ .message-bubble:hover .copy-button {
238
+ opacity: 1;
239
+ }
240
+
241
+ .dark .copy-button {
242
+ background-color: hsl(var(--secondary-700));
243
+ color: hsl(var(--secondary-200));
244
+ }
245
+
246
+ /* Typing indicator */
247
+ .typing-indicator {
248
+ display: flex;
249
+ align-items: center;
250
+ gap: 0.25rem;
251
+ padding: 1rem 1.5rem;
252
+ background-color: hsl(var(--secondary-100));
253
+ border-radius: 1rem;
254
+ width: fit-content;
255
+ max-width: 80%;
256
+ align-self: flex-start;
257
+ }
258
+
259
+ .dark .typing-indicator {
260
+ background-color: hsl(var(--secondary-800));
261
+ }
262
+
263
+ .typing-dot {
264
+ width: 0.5rem;
265
+ height: 0.5rem;
266
+ border-radius: 50%;
267
+ background-color: hsl(var(--secondary-500));
268
+ animation: typing 1.4s infinite ease-in-out;
269
+ }
270
+
271
+ .typing-dot:nth-child(1) {
272
+ animation-delay: 0s;
273
+ }
274
+
275
+ .typing-dot:nth-child(2) {
276
+ animation-delay: 0.2s;
277
+ }
278
+
279
+ .typing-dot:nth-child(3) {
280
+ animation-delay: 0.4s;
281
+ }
282
+
283
+ @keyframes typing {
284
+ 0%, 60%, 100% {
285
+ transform: translateY(0);
286
+ }
287
+ 30% {
288
+ transform: translateY(-5px);
289
+ }
290
+ }
291
+
292
+ /* Responsive design */
293
+ @media (max-width: 768px) {
294
+ .message-bubble {
295
+ max-width: 90%;
296
+ }
297
+
298
+ .chat-header, .chat-input-area {
299
+ padding: 0.75rem;
300
+ }
301
+
302
+ .chat-messages {
303
+ padding: 0.75rem;
304
+ }
305
+ }
306
+
307
+ /* Scrollbar styling */
308
+ ::-webkit-scrollbar {
309
+ width: 8px;
310
+ }
311
+
312
+ ::-webkit-scrollbar-track {
313
+ background: hsl(var(--secondary-100));
314
+ }
315
+
316
+ .dark ::-webkit-scrollbar-track {
317
+ background: hsl(var(--secondary-800));
318
+ }
319
+
320
+ ::-webkit-scrollbar-thumb {
321
+ background: hsl(var(--secondary-300));
322
+ border-radius: 4px;
323
+ }
324
+
325
+ .dark ::-webkit-scrollbar-thumb {
326
+ background: hsl(var(--secondary-600));
327
+ }
328
+
329
+ ::-webkit-scrollbar-thumb:hover {
330
+ background: hsl(var(--secondary-400));
331
+ }
332
+
333
+ .dark ::-webkit-scrollbar-thumb:hover {
334
+ background: hsl(var(--secondary-500));
335
+ }
readme.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Chat Application for HuggingFace Spaces
2
+
3
+ A fully functional AI chat application for HuggingFace Spaces integrating Qwen Coder 3 with advanced OPENAI API compatibility features.
4
+
5
+ ## Features
6
+
7
+ - Integration with Qwen/Qwen3-Coder-30B-A3B-Instruct model
8
+ - Advanced OPENAI API compatibility
9
+ - Professional web interface replicating Perplexity AI design
10
+ - Responsive layout with TailwindCSS styling
11
+ - Dark/light mode support
12
+ - Real-time streaming responses
13
+ - Conversation history management
14
+ - Copy response functionality
15
+ - Typing indicators
16
+ - Full GPU optimization
17
+ - Robust error handling and automatic connection recovery
18
+ - Caching mechanisms
19
+ - Ready for immediate deployment on HuggingFace Spaces
20
+
21
+ ## Technology Stack
22
+
23
+ - **Backend**: Python, Gradio, FastAPI, Transformers, PyTorch
24
+ - **Frontend**: TailwindCSS, JavaScript, HTML5
25
+ - **Infrastructure**: Redis for caching, HuggingFace Spaces deployment
26
+
27
+ ## Requirements
28
+
29
+ - Python 3.8+
30
+ - GPU with at least 24GB VRAM (for Qwen/Qwen3-Coder-30B-A3B-Instruct model)
31
+ - Redis server (optional, for conversation caching)
32
+
33
+ ## Installation
34
+
35
+ 1. Clone this repository:
36
+ ```bash
37
+ git clone <repository-url>
38
+ cd ai-chat-app
39
+ ```
40
+
41
+ 2. Install dependencies:
42
+ ```bash
43
+ pip install -r requirements.txt
44
+ ```
45
+
46
+ 3. Run the application:
47
+ ```bash
48
+ python app.py
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ ### Web Interface
54
+
55
+ The application provides a web interface accessible at `http://localhost:7860` when running locally. The interface features:
56
+
57
+ - Chat interface similar to Perplexity AI
58
+ - Dark/light mode toggle
59
+ - Conversation history sidebar
60
+ - Copy buttons for responses
61
+ - Typing indicators during response generation
62
+
63
+ ### API Endpoints
64
+
65
+ The application exposes OPENAI API compatible endpoints:
66
+
67
+ - `POST /v1/chat/completions` - Chat completion endpoint
68
+
69
+ Example request:
70
+ ```json
71
+ {
72
+ "messages": [
73
+ {"role": "user", "content": "Hello, how are you?"}
74
+ ],
75
+ "model": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
76
+ "max_tokens": 1024,
77
+ "temperature": 0.7
78
+ }
79
+ ```
80
+
81
+ ## Deployment to HuggingFace Spaces
82
+
83
+ 1. Create a new Space on HuggingFace with the following configuration:
84
+ - SDK: Gradio
85
+ - Hardware: GPU (recommended)
86
+
87
+ 2. Upload all files to your Space repository
88
+
89
+ 3. The application will automatically start and be accessible through your Space URL
90
+
91
+ ## Configuration
92
+
93
+ The application can be configured through environment variables:
94
+
95
+ - `MODEL_NAME`: The HuggingFace model identifier (default: Qwen/Qwen3-Coder-30B-A3B-Instruct)
96
+ - `MAX_TOKENS`: Default maximum tokens for responses (default: 1024)
97
+ - `TEMPERATURE`: Default temperature for generation (default: 0.7)
98
+ - `REDIS_URL`: Redis connection URL for caching (optional)
99
+
100
+ ## Troubleshooting
101
+
102
+ ### GPU Memory Issues
103
+
104
+ If you encounter GPU memory issues:
105
+
106
+ 1. Ensure your GPU has at least 24GB VRAM
107
+ 2. Try reducing the `max_tokens` parameter
108
+ 3. Use quantization techniques for model loading
109
+
110
+ ### Model Loading Errors
111
+
112
+ If the model fails to load:
113
+
114
+ 1. Check your internet connection
115
+ 2. Ensure you have sufficient disk space
116
+ 3. Verify the model identifier is correct
117
+
118
+ ## Contributing
119
+
120
+ Contributions are welcome! Please fork the repository and submit a pull request with your changes.
121
+
122
+ ## License
123
+
124
+ This project is licensed under the MIT License - see the LICENSE file for details.
requirements.txt CHANGED
@@ -1,8 +1,10 @@
1
- fastapi==0.116.1
2
- uvicorn==0.35.0
3
- transformers>=4.55.3
4
- torch==2.4.0
5
- accelerate>=0.33.0
6
- einops
7
- safetensors
8
- # bitsandbytes>=0.43.1 # jen pokud chceš 4-bit kvantizaci
 
 
 
1
+ gradio>=3.0.0
2
+ transformers>=4.30.0
3
+ torch>=2.0.0
4
+ fastapi>=0.68.0
5
+ uvicorn>=0.15.0
6
+ redis>=3.5.0
7
+ aiohttp>=3.7.0
8
+ pydantic>=1.8.0
9
+ accelerate>=0.20.0
10
+ bitsandbytes>=0.39.0
utils/__init__.py ADDED
File without changes
utils/model_utils.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model utilities for working with Qwen/Qwen3-Coder-30B-A3B-Instruct model
3
+ """
4
+
5
+ import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
+ from threading import Thread
8
+ import logging
9
+ from typing import Generator, Optional
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Model configuration
16
+ MODEL_NAME = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
17
+ DEFAULT_MAX_TOKENS = 1024
18
+ DEFAULT_TEMPERATURE = 0.7
19
+
20
+ class ModelManager:
21
+ """Manage Qwen model loading and inference"""
22
+
23
+ def __init__(self):
24
+ self.model = None
25
+ self.tokenizer = None
26
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ self.load_model()
28
+
29
+ def load_model(self) -> None:
30
+ """Load the Qwen model"""
31
+ try:
32
+ logger.info(f"Loading model {MODEL_NAME} on {self.device}")
33
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
34
+ self.model = AutoModelForCausalLM.from_pretrained(
35
+ MODEL_NAME,
36
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
37
+ low_cpu_mem_usage=True,
38
+ device_map="auto"
39
+ )
40
+ logger.info("Model loaded successfully")
41
+ except Exception as e:
42
+ logger.error(f"Error loading model: {e}")
43
+ raise
44
+
45
+ def generate_response(self, prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS, temperature: float = DEFAULT_TEMPERATURE) -> str:
46
+ """Generate response from the model"""
47
+ try:
48
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
49
+
50
+ # Generate without streaming for simple response
51
+ generated = self.model.generate(
52
+ **inputs,
53
+ max_new_tokens=max_tokens,
54
+ temperature=temperature,
55
+ do_sample=True,
56
+ pad_token_id=self.tokenizer.eos_token_id
57
+ )
58
+
59
+ response = self.tokenizer.decode(generated[0], skip_special_tokens=True)
60
+ # Remove the prompt from the response
61
+ response = response[len(prompt):].strip()
62
+ return response
63
+ except Exception as e:
64
+ logger.error(f"Error generating response: {e}")
65
+ raise
66
+
67
+ def generate_streaming_response(self, prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS, temperature: float = DEFAULT_TEMPERATURE) -> Generator[str, None, None]:
68
+ """Generate streaming response from the model"""
69
+ try:
70
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
71
+
72
+ # Create streamer for streaming response
73
+ streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
74
+
75
+ # Start generation in a separate thread
76
+ generation_kwargs = dict(
77
+ inputs,
78
+ streamer=streamer,
79
+ max_new_tokens=max_tokens,
80
+ temperature=temperature,
81
+ do_sample=True,
82
+ pad_token_id=self.tokenizer.eos_token_id
83
+ )
84
+
85
+ thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
86
+ thread.start()
87
+
88
+ # Yield tokens as they are generated
89
+ for new_text in streamer:
90
+ yield new_text
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error generating streaming response: {e}")
94
+ yield f"Error: {str(e)}"