Spaces:

Goodnight7
/

llama3.2_vision

Sleeping

App Files Files Community

Goodnight7 commited on Apr 8

Commit

8942d5c

verified ·

1 Parent(s): f528f15

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import os
+from PIL import Image
+import io
+import base64
+import requests
+import json
+from pathlib import Path
+# Ensure assets directory exists
+Path("./assets").mkdir(parents=True, exist_ok=True)
+# Function to call Groq API directly (avoiding the groq package)
+def call_groq_api(image_base64, model, prompt):
+    api_key = os.environ.get("GROQ_API_KEY", "")
+    if not api_key:
+        return None, "Error: GROQ_API_KEY environment variable is not set."
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        }
+                    }
+                ]
+            }
+        ],
+        "temperature": 0.1,
+        "max_tokens": 1000
+    }
+    try:
+        response = requests.post(
+            "https://api.groq.com/openai/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"], None
+    except Exception as e:
+        return None, f"Error calling Groq API: {str(e)}"
+# Page configuration
+st.set_page_config(
+    page_title="Llama-3-2-90b-vision-preview",
+    page_icon="👁️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Add clear button to top right
+col1, col2 = st.columns([6, 1])
+with col1:
+    st.markdown("""
+    <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview
+    """.format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True)
+with col2:
+    if st.button("Clear 🗑️"):
+        if "ocr_result" in st.session_state:
+            del st.session_state["ocr_result"]
+        st.rerun()
+st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True)
+st.markdown("---")
+# Move upload controls to sidebar
+with st.sidebar:
+    st.header("Upload Image")
+    uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
+    # Model selection
+    st.subheader("Model Settings")
+    model = st.selectbox(
+        "Select Vision Model",
+        ["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"],
+        index=0
+    )
+if uploaded_file is not None:
+    # Display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image")
+    if st.button("Extract Text 🔍", type="primary"):
+        with st.spinner("Processing image..."):
+            try:
+                # Convert image for API
+                buffered = io.BytesIO()
+                image.save(buffered, format="PNG")
+                img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+                # Prepare the prompt
+                prompt = """Analyze the text in the provided image. Extract all readable content
+and present it in a structured Markdown format that is clear, concise,
+and well-organized. Ensure proper formatting (e.g., headings, lists, or
+code blocks) as necessary to represent the content effectively."""
+                # Call the API
+                result, error = call_groq_api(img_str, model, prompt)
+                if error:
+                    st.error(error)
+                else:
+                    st.session_state["ocr_result"] = result
+            except Exception as e:
+                st.error(f"Error processing image: {str(e)}")
+# Main content area for results
+if "ocr_result" in st.session_state:
+    st.markdown(st.session_state["ocr_result"])
+else:
+    st.info("Upload an image and click 'Extract Text' to see the results here.")
+# Footer
+st.markdown("---")
+st.markdown("Made using Vision Models via Groq API")