Vishwas1 commited on
Commit
e068354
·
verified ·
1 Parent(s): 411c845

Upload 6 files

Browse files
Files changed (1) hide show
  1. app.py +55 -5
app.py CHANGED
@@ -244,8 +244,23 @@ def process_document(text: str, strategy: str, category: str = None, custom_keys
244
  # Apply selected strategy
245
  if strategy == "Fact Extraction":
246
  facts = active_reader.extract_facts(text)
247
- result = f"**Extracted {len(facts)} facts:**\n\n" + "\n".join([f"• {fact}" for fact in facts])
248
- facts_json = json.dumps(facts, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  questions_json = ""
250
  summary_text = ""
251
 
@@ -268,18 +283,53 @@ def process_document(text: str, strategy: str, category: str = None, custom_keys
268
  questions = active_reader.generate_questions(text)
269
  summary = active_reader.generate_summary(text)
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  result = f"""**Domain:** {domain} | **Category:** {selected_category}
272
 
273
  **Summary:**
274
  {summary}
275
 
276
- **Key Facts ({len(facts)}):**
277
- """ + "\n".join([f"• {fact}" for fact in facts]) + f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
  **Generated Questions ({len(questions)}):**
280
  """ + "\n".join([f"Q: {q}" for q in questions])
281
 
282
- facts_json = json.dumps(facts, indent=2)
 
 
 
 
 
 
283
  questions_json = json.dumps(questions, indent=2)
284
  summary_text = summary
285
 
 
244
  # Apply selected strategy
245
  if strategy == "Fact Extraction":
246
  facts = active_reader.extract_facts(text)
247
+
248
+ # Also include category-specific extractions if custom keys provided
249
+ category_extractions = []
250
+ if custom_keys_list:
251
+ for key, values in category_data["custom_extractions"].items():
252
+ category_extractions.extend(values)
253
+
254
+ all_facts = facts + category_extractions
255
+ result = f"**Extracted {len(all_facts)} facts:**\n\n" + "\n".join([f"• {fact}" for fact in all_facts])
256
+
257
+ # Include category data in facts JSON
258
+ facts_data = {
259
+ "traditional_facts": facts,
260
+ "category_extractions": category_data["extracted_data"] if category_data["extracted_data"] else {},
261
+ "custom_extractions": category_data["custom_extractions"] if category_data["custom_extractions"] else {}
262
+ }
263
+ facts_json = json.dumps(facts_data, indent=2)
264
  questions_json = ""
265
  summary_text = ""
266
 
 
283
  questions = active_reader.generate_questions(text)
284
  summary = active_reader.generate_summary(text)
285
 
286
+ # Include category extractions in complete analysis
287
+ category_facts = []
288
+ if category_data["extracted_data"]:
289
+ for key, values in category_data["extracted_data"].items():
290
+ if values:
291
+ category_facts.extend([f"{key}: {v}" for v in values[:2]]) # Top 2 per category
292
+
293
+ custom_facts = []
294
+ if category_data["custom_extractions"]:
295
+ for key, values in category_data["custom_extractions"].items():
296
+ if values:
297
+ custom_facts.extend([f"{key}: {v}" for v in values[:1]]) # Top 1 per custom key
298
+
299
+ all_facts = facts + category_facts + custom_facts
300
+
301
  result = f"""**Domain:** {domain} | **Category:** {selected_category}
302
 
303
  **Summary:**
304
  {summary}
305
 
306
+ **Traditional Facts ({len(facts)}):**
307
+ """ + "\n".join([f"• {fact}" for fact in facts])
308
+
309
+ if category_facts:
310
+ result += f"""
311
+
312
+ **Category-Specific Extractions ({len(category_facts)}):**
313
+ """ + "\n".join([f"• {fact}" for fact in category_facts])
314
+
315
+ if custom_facts:
316
+ result += f"""
317
+
318
+ **Custom Key Extractions ({len(custom_facts)}):**
319
+ """ + "\n".join([f"• {fact}" for fact in custom_facts])
320
+
321
+ result += f"""
322
 
323
  **Generated Questions ({len(questions)}):**
324
  """ + "\n".join([f"Q: {q}" for q in questions])
325
 
326
+ # Enhanced facts JSON with all extraction types
327
+ facts_data = {
328
+ "traditional_facts": facts,
329
+ "category_extractions": category_data["extracted_data"],
330
+ "custom_extractions": category_data["custom_extractions"]
331
+ }
332
+ facts_json = json.dumps(facts_data, indent=2)
333
  questions_json = json.dumps(questions, indent=2)
334
  summary_text = summary
335