Spaces:

Arkay92
/

ContractBuddy

Sleeping

App Files Files Community

Arkay92 commited on Feb 23

Commit

094178c

verified ·

1 Parent(s): 6883dbd

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -25

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ model_name = "ibm-granite/granite-3.1-2b-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="balanced",  # Using balanced CPU mapping.
-    torch_dtype=torch.float16  # Use float16 if supported, otherwise float32.
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -64,7 +64,6 @@ def read_file(file_obj):
     """
     Reads content from a file. Supports both file paths (str) and Streamlit uploaded files.
     """
-    # If file_obj is a string path:
     if isinstance(file_obj, str):
         if file_obj in FILE_CACHE:
             return FILE_CACHE[file_obj]
@@ -139,11 +138,17 @@ def read_files(file_objs, max_length=3000):
     SUMMARY_CACHE[cache_key] = summarized
     return summarized
-def format_prompt(system_msg, user_msg):
-    return [
-        {"role": "system", "content": system_msg},
-        {"role": "user", "content": user_msg}
-    ]
 def speculative_decode(input_text, max_tokens=DEFAULT_MAX_TOKENS, top_p=0.9, temperature=0.7):
     model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
@@ -168,18 +173,23 @@ def post_process(text):
             unique_lines.append(clean_line)
     return "\n".join(unique_lines)
-def granite_analysis(prompt, file_objs=None, max_tokens=DEFAULT_MAX_TOKENS, top_p=0.9, temperature=0.7):
-    file_context = read_files(file_objs) if file_objs else ""
-    internal_context = f"\n[Internal Context]: {file_context.strip()}" if file_context else ""
-    refined_prompt = prompt + internal_context
-    system_message = (
-        "You are IBM Granite, an enterprise legal and technical analysis assistant. Your task is to critically analyze "
-        "contract documents with a special focus on identifying dangerous provisions, significant legal pitfalls, "
-        "and areas that could expose a party to high risks or liabilities."
     )
-    messages = format_prompt(system_message, refined_prompt)
-    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    response = speculative_decode(input_text, max_tokens=max_tokens, top_p=top_p, temperature=temperature)
     final_response = post_process(response)
     return final_response
@@ -207,10 +217,4 @@ if st.button("Analyze Contract"):
         result = granite_analysis(user_prompt, uploaded_files, max_tokens=max_tokens_slider, top_p=top_p_slider, temperature=temperature_slider)
     st.success("Analysis complete!")
     st.markdown("### Analysis Output")
-    keyword = "assistant"
-    text_after_keyword = result.rsplit(keyword, 1)[-1].strip()
-    st.text_area("Output", text_after_keyword, height=400)

 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="balanced",  # Using balanced CPU mapping.
+    torch_dtype=torch.float16  # Use float16 if supported.
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
     """
     Reads content from a file. Supports both file paths (str) and Streamlit uploaded files.
     """
     if isinstance(file_obj, str):
         if file_obj in FILE_CACHE:
             return FILE_CACHE[file_obj]
     SUMMARY_CACHE[cache_key] = summarized
     return summarized
+def build_prompt(system_msg, document_content, user_prompt):
+    """
+    Build a unified prompt that explicitly delineates the system instructions,
+    document content, and user prompt.
+    """
+    prompt_parts = []
+    prompt_parts.append("SYSTEM PROMPT:\n" + system_msg.strip())
+    if document_content:
+        prompt_parts.append("\nDOCUMENT CONTENT:\n" + document_content.strip())
+    prompt_parts.append("\nUSER PROMPT:\n" + user_prompt.strip())
+    return "\n\n".join(prompt_parts)
 def speculative_decode(input_text, max_tokens=DEFAULT_MAX_TOKENS, top_p=0.9, temperature=0.7):
     model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
             unique_lines.append(clean_line)
     return "\n".join(unique_lines)
+def granite_analysis(user_prompt, file_objs=None, max_tokens=DEFAULT_MAX_TOKENS, top_p=0.9, temperature=0.7):
+    # Read and summarize document content.
+    document_content = read_files(file_objs) if file_objs else ""
+    # Define a clear system prompt.
+    system_prompt = (
+        "You are IBM Granite, an enterprise legal and technical analysis assistant. "
+        "Your task is to critically analyze the contract document provided below. "
+        "Pay special attention to identifying dangerous provisions, legal pitfalls, and potential liabilities. "
+        "Make sure to address both the overall contract structure and specific clauses where applicable."
     )
+    # Build a unified prompt with explicit sections.
+    unified_prompt = build_prompt(system_prompt, document_content, user_prompt)
+    # Generate the analysis.
+    response = speculative_decode(unified_prompt, max_tokens=max_tokens, top_p=top_p, temperature=temperature)
     final_response = post_process(response)
     return final_response
         result = granite_analysis(user_prompt, uploaded_files, max_tokens=max_tokens_slider, top_p=top_p_slider, temperature=temperature_slider)
     st.success("Analysis complete!")
     st.markdown("### Analysis Output")
+    st.text_area("Output", result, height=400)