omersaidd commited on
Commit
b84eed0
·
verified ·
1 Parent(s): 7c346fd

Upload metin_detay.py

Browse files
Files changed (1) hide show
  1. metin_detay.py +234 -0
metin_detay.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from pinecone import Pinecone
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import time
6
+ import requests
7
+ import json
8
+
9
+ # Page configuration
10
+ st.set_page_config(
11
+ page_title="Hukuki Döküman Arama (Detaylı Özet)",
12
+ page_icon="⚖️",
13
+ layout="wide",
14
+ initial_sidebar_state="expanded"
15
+ )
16
+
17
+ # App title and description
18
+ st.title("⚖️ Hukuki Döküman Semantik Arama Detaylı Özet")
19
+ st.markdown("Bu uygulama, 10.000 hukuki dökümanı içeren bir veritabanında semantik arama yapmanızı sağlar.")
20
+
21
+ # Initialize Pinecone connection
22
+ @st.cache_resource
23
+ def initialize_pinecone():
24
+ pinecone_client = Pinecone(api_key="pcsk_5s8hcC_2zwJTQthP5PSWE992iXmbRx6ykNQbnEWLhj3fDuR1Cw9eKRn31i2zsRyyCxCmgW")
25
+ return pinecone_client.Index("etikos2")
26
+
27
+ # Load the model and tokenizer
28
+ @st.cache_resource
29
+ def load_model():
30
+ model_name = "intfloat/multilingual-e5-large"
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ model = AutoModel.from_pretrained(model_name)
33
+
34
+ # Use GPU if available
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ model = model.to(device)
37
+
38
+ return tokenizer, model, device
39
+
40
+ # Function to get query embedding
41
+ def get_query_embedding(query_text, tokenizer, model):
42
+ # Prepare text with prefix required by e5 model
43
+ prefix = "query: "
44
+ query_text = prefix + query_text
45
+
46
+ # Tokenize
47
+ inputs = tokenizer(
48
+ query_text,
49
+ padding=True,
50
+ truncation=True,
51
+ return_tensors="pt",
52
+ max_length=1024
53
+ ).to(model.device)
54
+
55
+ # Get embeddings
56
+ with torch.no_grad():
57
+ model_output = model(**inputs)
58
+
59
+ # Mean pooling
60
+ attention_mask = inputs['attention_mask']
61
+ token_embeddings = model_output[0]
62
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
63
+ embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
64
+
65
+ # Normalize
66
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
67
+
68
+ # Convert to list
69
+ embedding = embeddings[0].cpu().numpy().tolist()
70
+ return embedding
71
+
72
+ # Function to truncate text to a reasonable preview length
73
+ def get_text_preview(text, max_chars=1000):
74
+ if not text:
75
+ return "İçerik mevcut değil."
76
+
77
+ if len(text) <= max_chars:
78
+ return text
79
+
80
+ return text[:max_chars] + "..."
81
+
82
+ # Function to process query through Dify AI
83
+ def process_with_dify(query):
84
+ # Replace with your actual Dify API details
85
+ dify_api_key = "app-0UV1vRHHnChGssQ2Kc5UK9gg" # Replace with your actual API key
86
+ dify_api_endpoint = "https://api.dify.ai/v1/chat-messages" # Replace with your actual endpoint
87
+
88
+ headers = {
89
+ "Authorization": f"Bearer {dify_api_key}",
90
+ "Content-Type": "application/json"
91
+ }
92
+
93
+ payload = {
94
+ "inputs": {},
95
+ "query": f"{query}",
96
+ "response_mode": "blocking",
97
+ "user": "user"
98
+ }
99
+
100
+ try:
101
+ response = requests.post(dify_api_endpoint, headers=headers, json=payload)
102
+ if response.status_code == 200:
103
+ data = response.json()
104
+ return data.get("answer", "")
105
+ else:
106
+ st.warning(f"Dify AI ile iletişim kurulurken hata oluştu: {response.status_code}")
107
+ return ""
108
+ except Exception as e:
109
+ st.warning(f"Dify AI işlemi sırasında hata: {str(e)}")
110
+ return ""
111
+
112
+ # Sidebar configuration
113
+ st.sidebar.header("Arama Ayarları")
114
+ top_k = st.sidebar.slider("Gösterilecek sonuç sayısı:", 1, 30, 5)
115
+ preview_length = st.sidebar.slider("Ön izleme uzunluğu (karakter):", 500, 3000, 1000)
116
+ use_dify = st.sidebar.checkbox("Dify AI ile sorgu zenginleştirme", value=True)
117
+
118
+ # Initialize resources with status indicators
119
+ with st.sidebar:
120
+ st.subheader("Sistem Durumu")
121
+
122
+ with st.status("Pinecone bağlantısı kuruluyor...", expanded=True) as status:
123
+ try:
124
+ index = initialize_pinecone()
125
+ status.update(label="Pinecone bağlantısı kuruldu ✅", state="complete", expanded=False)
126
+ except Exception as e:
127
+ status.update(label=f"Pinecone bağlantı hatası ❌: {str(e)}", state="error", expanded=True)
128
+ st.error("Veritabanına bağlanılamadı. Lütfen daha sonra tekrar deneyin.")
129
+ st.stop()
130
+
131
+ with st.status("Model yükleniyor...", expanded=True) as status:
132
+ try:
133
+ tokenizer, model, device = load_model()
134
+ status.update(label=f"Model yüklendi ✅ ({device.upper()} kullanılıyor)", state="complete", expanded=False)
135
+ except Exception as e:
136
+ status.update(label=f"Model yükleme hatası ❌: {str(e)}", state="error", expanded=True)
137
+ st.error("Model yüklenemedi. Lütfen daha sonra tekrar deneyin.")
138
+ st.stop()
139
+
140
+ # Main search interface
141
+ query = st.text_area("Aramak istediğiniz konuyu yazın:", height=100,
142
+ placeholder="Örnek: Mülkiyet hakkı ile ilgili davalar")
143
+
144
+ # Search button
145
+ search_button = st.button("🔍 Ara", type="primary", use_container_width=True)
146
+
147
+ # Execute search when button is clicked
148
+ if search_button and query:
149
+ enhanced_query = query
150
+
151
+ # Process with Dify AI if enabled
152
+ if use_dify:
153
+ with st.spinner("Sorgunuz Dify AI ile analiz ediliyor..."):
154
+ dify_output = process_with_dify(query)
155
+
156
+ if dify_output:
157
+ # Combine original query with Dify output
158
+ enhanced_query = f"{query} {dify_output}"
159
+
160
+ # Show the enhanced query
161
+ with st.expander("Zenginleştirilmiş Sorgu", expanded=False):
162
+ st.write("Orijinal sorgu:")
163
+ st.info(query)
164
+ st.write("Dify AI ile zenginleştirilmiş sorgu:")
165
+ st.info(dify_output)
166
+ st.write("Birleştirilmiş sorgu:")
167
+ st.success(enhanced_query)
168
+
169
+ # Perform the search with the enhanced query
170
+ with st.spinner("Arama yapılıyor..."):
171
+ try:
172
+ # Get query embedding
173
+ start_time = time.time()
174
+ query_embedding = get_query_embedding(enhanced_query, tokenizer, model)
175
+
176
+ # Search Pinecone
177
+ search_results = index.query(
178
+ vector=query_embedding,
179
+ top_k=top_k,
180
+ include_metadata=True
181
+ )
182
+
183
+ elapsed_time = time.time() - start_time
184
+
185
+ # Display results
186
+ st.success(f"Arama tamamlandı! ({elapsed_time:.2f} saniye)")
187
+
188
+ if not search_results.matches:
189
+ st.info("Aramanıza uygun sonuç bulunamadı.")
190
+ else:
191
+ st.subheader(f"Arama Sonuçları ({len(search_results.matches)} döküman)")
192
+
193
+ # Display each result in a card
194
+ for i, match in enumerate(search_results.matches):
195
+ with st.container():
196
+ col1, col2 = st.columns([4, 1])
197
+
198
+ with col1:
199
+ st.markdown(f"### {i+1}. {match.metadata.get('daire', 'Bilinmeyen Daire')}")
200
+
201
+ with col2:
202
+ st.metric(label="Benzerlik", value=f"{match.score*100:.1f}%")
203
+
204
+ st.markdown("**Döküman Bilgileri:**")
205
+ st.markdown(f"""
206
+ - **Karar No:** {match.metadata.get('karar_no', 'Belirtilmemiş')}
207
+ - **Esas No:** {match.metadata.get('esas_no', 'Belirtilmemiş')}
208
+ - **Tarih:** {match.metadata.get('tarih', 'Belirtilmemiş')}
209
+ """)
210
+
211
+ # Get full text content from metadata
212
+ text_content = match.metadata.get('text', match.metadata.get('text_snippet', ''))
213
+
214
+ # Display text content in an expandable section
215
+ with st.expander("Döküman İçeriği", expanded=True):
216
+ st.markdown(get_text_preview(text_content, preview_length))
217
+
218
+ # Add download button if text content exists
219
+ if text_content:
220
+ st.download_button(
221
+ label="Tam Metni İndir",
222
+ data=text_content,
223
+ file_name=f"karar_{match.metadata.get('karar_no', 'bilinmeyen')}.txt",
224
+ mime="text/plain"
225
+ )
226
+
227
+ st.divider()
228
+
229
+ except Exception as e:
230
+ st.error(f"Arama sırasında bir hata oluştu: {str(e)}")
231
+
232
+ # Footer
233
+ st.sidebar.markdown("---")
234
+ st.sidebar.caption("© 2023 Hukuki Döküman Arama")