Spaces:

omersaidd
/

Etikos_V1

Sleeping

App Files Files Community

omersaidd commited on Apr 29

Commit

170f47a

verified ·

1 Parent(s): e00c998

Create app.py

Browse files

Files changed (1) hide show

app.py +228 -0

app.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import streamlit as st
+import torch
+from pinecone import Pinecone
+from transformers import AutoTokenizer, AutoModel
+import time
+import requests
+import json
+# Page configuration
+st.set_page_config(
+    page_title="Hukuki Döküman Arama (Detaylı Özet)",
+    page_icon="⚖️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# App title and description
+st.title("⚖️ Hukuki Döküman Semantik Arama Detaylı Özet")
+st.markdown("Bu uygulama, 10.000 hukuki dökümanı içeren bir veritabanında semantik arama yapmanızı sağlar.")
+# Initialize Pinecone connection
+@st.cache_resource
+def initialize_pinecone():
+    pinecone_client = Pinecone(api_key="pcsk_5s8hcC_2zwJTQthP5PSWE992iXmbRx6ykNQbnEWLhj3fDuR1Cw9eKRn31i2zsRyyCxCmgW")
+    return pinecone_client.Index("etikos2")
+# Load the model and tokenizer
+@st.cache_resource
+def load_model():
+    model_name = "intfloat/multilingual-e5-large"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(model_name)
+    # Use GPU if available
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = model.to(device)
+    return tokenizer, model, device
+# Function to get query embedding
+def get_query_embedding(query_text, tokenizer, model):
+    # Prepare text with prefix required by e5 model
+    prefix = "query: "
+    query_text = prefix + query_text
+    # Tokenize
+    inputs = tokenizer(
+        query_text,
+        padding=True,
+        truncation=True,
+        return_tensors="pt",
+        max_length=1024
+    ).to(model.device)
+    # Get embeddings
+    with torch.no_grad():
+        model_output = model(**inputs)
+    # Mean pooling
+    attention_mask = inputs['attention_mask']
+    token_embeddings = model_output[0]
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+    # Normalize
+    embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+    # Convert to list
+    embedding = embeddings[0].cpu().numpy().tolist()
+    return embedding
+# Function to truncate text to a reasonable preview length
+def get_text_preview(text, max_chars=1000):
+    if not text:
+        return "İçerik mevcut değil."
+    if len(text) <= max_chars:
+        return text
+    return text[:max_chars] + "..."
+# Function to process query through Dify AI
+def process_with_dify(query):
+    # Replace with your actual Dify API details
+    dify_api_key = "app-0UV1vRHHnChGssQ2Kc5UK9gg"  # Replace with your actual API key
+    dify_api_endpoint = "https://api.dify.ai/v1/chat-messages"  # Replace with your actual endpoint
+    headers = {
+        "Authorization": f"Bearer {dify_api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "inputs": {},
+        "query": f"{query}",
+        "response_mode": "blocking",
+        "user": "user"
+    }
+    try:
+        response = requests.post(dify_api_endpoint, headers=headers, json=payload)
+        if response.status_code == 200:
+            data = response.json()
+            return data.get("answer", "")
+        else:
+            st.warning(f"Dify AI ile iletişim kurulurken hata oluştu: {response.status_code}")
+            return ""
+    except Exception as e:
+        st.warning(f"Dify AI işlemi sırasında hata: {str(e)}")
+        return ""
+# Sidebar configuration
+st.sidebar.header("Arama Ayarları")
+top_k = st.sidebar.slider("Gösterilecek sonuç sayısı:", 1, 30, 5)
+preview_length = st.sidebar.slider("Ön izleme uzunluğu (karakter):", 500, 3000, 1000)
+# Initialize resources with status indicators
+with st.sidebar:
+    st.subheader("Sistem Durumu")
+    with st.status("Pinecone bağlantısı kuruluyor...", expanded=True) as status:
+        try:
+            index = initialize_pinecone()
+            status.update(label="Pinecone bağlantısı kuruldu ✅", state="complete", expanded=False)
+        except Exception as e:
+            status.update(label=f"Pinecone bağlantı hatası ❌: {str(e)}", state="error", expanded=True)
+            st.error("Veritabanına bağlanılamadı. Lütfen daha sonra tekrar deneyin.")
+            st.stop()
+    with st.status("Model yükleniyor...", expanded=True) as status:
+        try:
+            tokenizer, model, device = load_model()
+            status.update(label=f"Model yüklendi ✅ ({device.upper()} kullanılıyor)", state="complete", expanded=False)
+        except Exception as e:
+            status.update(label=f"Model yükleme hatası ❌: {str(e)}", state="error", expanded=True)
+            st.error("Model yüklenemedi. Lütfen daha sonra tekrar deneyin.")
+            st.stop()
+# Main search interface
+query = st.text_area("Aramak istediğiniz konuyu yazın:", height=100,
+                     placeholder="Örnek: Mülkiyet hakkı ile ilgili davalar")
+# Search button
+search_button = st.button("🔍 Ara", type="primary", use_container_width=True)
+# Execute search when button is clicked
+if search_button and query:
+    # Process with Dify AI
+    with st.spinner("Sorgunuz Dify AI ile analiz ediliyor..."):
+        dify_output = process_with_dify(query)
+        if not dify_output:
+            st.error("Dify AI'dan yanıt alınamadı. Lütfen tekrar deneyin.")
+            st.stop()
+        # Show the query transformation
+        with st.expander("Sorgu Dönüşümü", expanded=True):
+            st.write("Orijinal sorgu:")
+            st.info(query)
+            st.write("Dify AI çıktısı (arama için kullanılacak):")
+            st.success(dify_output)
+    # Perform the search with ONLY the Dify AI output
+    with st.spinner("Arama yapılıyor..."):
+        try:
+            # Get query embedding from Dify AI output
+            start_time = time.time()
+            query_embedding = get_query_embedding(dify_output, tokenizer, model)
+            # Search Pinecone
+            search_results = index.query(
+                vector=query_embedding,
+                top_k=top_k,
+                include_metadata=True
+            )
+            elapsed_time = time.time() - start_time
+            # Display results
+            st.success(f"Arama tamamlandı! ({elapsed_time:.2f} saniye)")
+            if not search_results.matches:
+                st.info("Aramanıza uygun sonuç bulunamadı.")
+            else:
+                st.subheader(f"Arama Sonuçları ({len(search_results.matches)} döküman)")
+                # Display each result in a card
+                for i, match in enumerate(search_results.matches):
+                    with st.container():
+                        col1, col2 = st.columns([4, 1])
+                        with col1:
+                            st.markdown(f"### {i+1}. {match.metadata.get('daire', 'Bilinmeyen Daire')}")
+                        with col2:
+                            st.metric(label="Benzerlik", value=f"{match.score*100:.1f}%")
+                        st.markdown("**Döküman Bilgileri:**")
+                        st.markdown(f"""
+                        - **Karar No:** {match.metadata.get('karar_no', 'Belirtilmemiş')}
+                        - **Esas No:** {match.metadata.get('esas_no', 'Belirtilmemiş')}
+                        - **Tarih:** {match.metadata.get('tarih', 'Belirtilmemiş')}
+                        """)
+                        # Get full text content from metadata
+                        text_content = match.metadata.get('text', match.metadata.get('text_snippet', ''))
+                        # Display text content in an expandable section
+                        with st.expander("Döküman İçeriği", expanded=True):
+                            st.markdown(get_text_preview(text_content, preview_length))
+                            # Add download button if text content exists
+                            if text_content:
+                                st.download_button(
+                                    label="Tam Metni İndir",
+                                    data=text_content,
+                                    file_name=f"karar_{match.metadata.get('karar_no', 'bilinmeyen')}.txt",
+                                    mime="text/plain"
+                                )
+                        st.divider()
+        except Exception as e:
+            st.error(f"Arama sırasında bir hata oluştu: {str(e)}")
+# Footer
+st.sidebar.markdown("---")
+st.sidebar.caption("©2025 Etikos AI")