Gemma-3-R1984-1B-0613

Running on Zero

App Files Files Community

openfree commited on 1 day ago

Commit

cbc6c4f

verified ·

1 Parent(s): d0b06a7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -29

app.py CHANGED Viewed

@@ -141,20 +141,19 @@ def do_web_search(query: str) -> str:
             summary_lines.append(
                 f"### Result {idx}: {title}\n\n"
                 f"{snippet}\n\n"
-                f"**출처**: [{displayed_link}]({link})\n\n"
                 f"---\n"
             )
         instructions = """
-# 웹 검색 결과 / Web Search Results
-아래는 검색 결과입니다. 질문에 답변할 때 이 정보를 활용하세요:
 Below are the search results. Use this information when answering the question:
-1. 각 결과의 제목, 내용, 출처 링크를 참고하세요 / Reference the title, content, and source links
-2. 답변에 관련 정보의 출처를 명시적으로 인용하세요 / Explicitly cite relevant sources
-3. 응답에 실제 출처 링크를 포함하세요 / Include actual source links in response
-4. 여러 출처의 정보를 종합하여 답변하세요 / Synthesize information from multiple sources
 """
         search_results = instructions + "\n".join(summary_lines)
@@ -364,8 +363,8 @@ def _model_gen_with_oom_catch(**kwargs):
         model.generate(**kwargs)
     except torch.cuda.OutOfMemoryError:
         raise RuntimeError(
-            "[OutOfMemoryError] GPU 메모리가 부족합니다. "
-            "Max New Tokens을 줄이거나, 프롬프트 길이를 줄여주세요."
         )
     finally:
         clear_cuda_cache()
@@ -479,7 +478,7 @@ def run(
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
-        yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
     finally:
         # 메모리 정리
@@ -496,7 +495,7 @@ examples = [
     [
         {
-            "text": "두 PDF 파일의 내용을 비교 분석해주세요.",
             "files": [
                 "assets/additional-examples/before.pdf",
                 "assets/additional-examples/after.pdf",
@@ -545,43 +544,43 @@ button:hover, .btn:hover {
 """
 title_html = """
-<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;"> 🤗 Gemma3-R1984-1B (텍스트 전용) </h1>
 <p align="center" style="font-size:1.1em; color:#555;">
-    ✅에이전틱 AI 플랫폼 ✅추론 및 분석 ✅텍스트 분석 ✅심층 연구 & RAG <br>
-    ✅문서 처리 (PDF, CSV, TXT) ✅웹 검색 통합 ✅한글/영어 지원<br>
-    ✅'NVIDIA L40s / A100(ZeroGPU) GPU'에서 독립 로컬 서버로 작동<br>
-    @모델 저장소: VIDraft/Gemma-3-R1984-1B, @기반: 'Google Gemma-3-1b'
 </p>
 """
 with gr.Blocks(css=css, title="Gemma3-R1984-1B") as demo:
     gr.Markdown(title_html)
-    with gr.Accordion("고급 설정", open=False):
         web_search_checkbox = gr.Checkbox(
-            label="Deep Research (웹 검색 활성화)",
             value=False
         )
         max_tokens_slider = gr.Slider(
-            label="최대 토큰 수 (답변 길이)",
             minimum=100,
             maximum=8000,
             step=50,
             value=2048,
-            info="더 긴 답변을 원하시면 이 값을 늘려주세요"
         )
         system_prompt_box = gr.Textbox(
             lines=5,
-            label="시스템 프롬프트",
-            value="""당신은 심층적인 사고를 하는 AI 어시스턴트입니다. 다음 지침을 따라주세요:
-1. **언어**: 사용자가 한글로 질문하면 반드시 한글로 답변하세요. 영어로 질문하면 영어로 답변하세요.
-2. **답변 길이**: 충분히 상세하고 풍부한 답변을 제공하세요. 최소 3-5개 문단으로 구성된 답변을 작성하세요.
-3. **분석 방식**: 문제를 철저히 분석하고, 체계적인 추론 과정을 거쳐 정확한 해답을 제시하세요.
-4. **구조**: 답변을 명확한 구조로 구성하고, 필요시 번호나 불릿 포인트를 사용하세요.
-5. **예시와 설명**: 가능한 한 구체적인 예시와 상세한 설명을 포함하세요."""
         )
     web_search_text = gr.Textbox(
@@ -618,7 +617,7 @@ with gr.Blocks(css=css, title="Gemma3-R1984-1B") as demo:
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
-            gr.Markdown("### 예시 입력 (클릭하여 로드)")
 if __name__ == "__main__":
     demo.launch()

             summary_lines.append(
                 f"### Result {idx}: {title}\n\n"
                 f"{snippet}\n\n"
+                f"**Source**: [{displayed_link}]({link})\n\n"
                 f"---\n"
             )
         instructions = """
+# Web Search Results
 Below are the search results. Use this information when answering the question:
+1. Reference the title, content, and source links from each result
+2. Explicitly cite relevant sources in your response
+3. Include actual source links in your response
+4. Synthesize information from multiple sources when answering
 """
         search_results = instructions + "\n".join(summary_lines)
         model.generate(**kwargs)
     except torch.cuda.OutOfMemoryError:
         raise RuntimeError(
+            "[OutOfMemoryError] GPU memory insufficient. "
+            "Please reduce Max New Tokens or shorten the prompt length."
         )
     finally:
         clear_cuda_cache()
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
+        yield f"Sorry, an error occurred: {str(e)}"
     finally:
         # 메모리 정리
     [
         {
+            "text": "Please compare and analyze the content of these two PDF files.",
             "files": [
                 "assets/additional-examples/before.pdf",
                 "assets/additional-examples/after.pdf",
 """
 title_html = """
+<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;"> 🤗 Gemma3-R1984-1B (Text-Only) </h1>
 <p align="center" style="font-size:1.1em; color:#555;">
+    ✅Agentic AI Platform ✅Reasoning & Analysis ✅Text Analysis ✅Deep Research & RAG <br>
+    ✅Document Processing (PDF, CSV, TXT) ✅Web Search Integration ✅Korean/English Support<br>
+    ✅Running on Independent Local Server with 'NVIDIA L40s / A100(ZeroGPU) GPU'<br>
+    @Model Repository: VIDraft/Gemma-3-R1984-1B, @Based on: 'Google Gemma-3-1b'
 </p>
 """
 with gr.Blocks(css=css, title="Gemma3-R1984-1B") as demo:
     gr.Markdown(title_html)
+    with gr.Accordion("Advanced Settings", open=False):
         web_search_checkbox = gr.Checkbox(
+            label="Deep Research (Enable Web Search)",
             value=False
         )
         max_tokens_slider = gr.Slider(
+            label="Max Tokens (Response Length)",
             minimum=100,
             maximum=8000,
             step=50,
             value=2048,
+            info="Increase this value for longer responses"
         )
         system_prompt_box = gr.Textbox(
             lines=5,
+            label="System Prompt",
+            value="""You are an AI assistant that performs deep thinking. Please follow these guidelines:
+1. **Language**: If the user asks in Korean, you must answer in Korean. If they ask in English, answer in English.
+2. **Response Length**: Provide sufficiently detailed and rich responses. Write responses with at least 3-5 paragraphs.
+3. **Analysis Method**: Thoroughly analyze problems and provide accurate solutions through systematic reasoning processes.
+4. **Structure**: Organize responses with clear structure, using numbers or bullet points when necessary.
+5. **Examples and Explanations**: Include specific examples and detailed explanations whenever possible."""
         )
     web_search_text = gr.Textbox(
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
+            gr.Markdown("### Example Inputs (Click to Load)")
 if __name__ == "__main__":
     demo.launch()