Spaces:
Running
Running
| """ | |
| Main Streamlit application for the Fake News Detector. | |
| This module implements the user interface for claim verification, | |
| rendering the results and handling user interactions. It also | |
| manages the application lifecycle including initialization and cleanup. | |
| """ | |
| import streamlit as st | |
| import time | |
| import json | |
| import os | |
| import logging | |
| import atexit | |
| import sys | |
| from pathlib import Path | |
| # Configure logging first, before other imports | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[logging.StreamHandler()] | |
| ) | |
| logger = logging.getLogger("misinformation_detector") | |
| # Check for critical environment variables | |
| if not os.environ.get("OPENAI_API_KEY"): | |
| logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.") | |
| # Import our modules | |
| from utils.models import initialize_models | |
| from utils.performance import PerformanceTracker | |
| # Import agent functionality | |
| import agent | |
| # Initialize performance tracker | |
| performance_tracker = PerformanceTracker() | |
| # Ensure data directory exists | |
| data_dir = Path("data") | |
| if not data_dir.exists(): | |
| logger.info("Creating data directory") | |
| data_dir.mkdir(exist_ok=True) | |
| # Set page configuration | |
| st.set_page_config( | |
| page_title="AskVeracity", | |
| page_icon="π", | |
| layout="wide", | |
| ) | |
| # Hide the "Press β+Enter to apply" text with CSS | |
| st.markdown(""" | |
| <style> | |
| /* Hide the shortcut text that appears at the bottom of text areas */ | |
| .stTextArea div:has(textarea) + div { | |
| visibility: hidden !important; | |
| height: 0px !important; | |
| position: absolute !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def get_agent(): | |
| """ | |
| Initialize and cache the agent for reuse across requests. | |
| This function creates and caches the fact-checking agent to avoid | |
| recreating it for every request. It's decorated with st.cache_resource | |
| to ensure the agent is only initialized once per session. | |
| Returns: | |
| object: Initialized LangGraph agent for fact checking | |
| """ | |
| logger.info("Initializing models and agent (cached)") | |
| initialize_models() | |
| return agent.setup_agent() | |
| def cleanup_resources(): | |
| """ | |
| Clean up resources when app is closed. | |
| This function is registered with atexit to ensure resources | |
| are properly released when the application terminates. | |
| """ | |
| try: | |
| # Clear any cached data | |
| st.cache_data.clear() | |
| # Reset performance tracker | |
| performance_tracker.reset() | |
| # Log cleanup | |
| logger.info("Resources cleaned up successfully") | |
| except Exception as e: | |
| logger.error(f"Error during cleanup: {e}") | |
| # Register cleanup handler | |
| atexit.register(cleanup_resources) | |
| # App title and description | |
| st.title("π AskVeracity") | |
| st.markdown(""" | |
| This is a simple AI-powered tool - a fact-checking system that analyzes claims to determine | |
| their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia, | |
| news outlets, and academic repositories. | |
| """) | |
| # Sidebar with app information | |
| with st.sidebar: | |
| st.header("About") | |
| st.info( | |
| "This system uses a combination of NLP techniques and LLMs to " | |
| "extract claims, gather evidence, and classify the truthfulness of statements." | |
| ) | |
| # Application information | |
| st.markdown("### How It Works") | |
| st.info( | |
| "1. Enter any recent news or a factual claim\n" | |
| "2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n" | |
| "3. The system analyzes the evidence to determine truthfulness\n" | |
| "4. Results show the verdict with supporting evidence" | |
| ) | |
| # Our Mission | |
| st.markdown("### Our Mission") | |
| st.info( | |
| "AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. " | |
| "We believe in empowering people with factual information to make informed decisions." | |
| ) | |
| # Limitations and Usage | |
| st.markdown("### Limitations") | |
| st.warning( | |
| "Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. " | |
| "Performance is typically best with widely-reported news and information published within the last 48 hours. " | |
| "Additionally, the system evaluates claims based on current evidence - a claim that was true in the past " | |
| "may be judged false if circumstances have changed, and vice versa." | |
| ) | |
| # Best Practices | |
| st.markdown("### Best Practices") | |
| st.success( | |
| "For optimal results:\n\n" | |
| "β’ Keep claims short and precise\n\n" | |
| "β’ Include key details in your claim\n\n" | |
| "β’ Phrase claims as direct statements rather than questions\n\n" | |
| "β’ Be specific about who said what" | |
| ) | |
| # Example comparison | |
| with st.expander("π Examples of Effective Claims"): | |
| st.markdown(""" | |
| **Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe." | |
| **More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction." | |
| """) | |
| # Important Notes | |
| st.markdown("### Important Notes") | |
| st.info( | |
| "β’ AskVeracity covers general topics and is not specialized in any single domain or location\n\n" | |
| "β’ Results can vary based on available evidence and LLM behavior\n\n" | |
| "β’ The system is designed to indicate uncertainty when evidence is insufficient\n\n" | |
| "β’ AskVeracity is not a chatbot and does not maintain conversation history\n\n" | |
| "β’ We recommend cross-verifying critical information with additional sources" | |
| ) | |
| # Privacy Information | |
| st.markdown("### Data Privacy") | |
| st.info( | |
| "We do not collect or store any data about the claims you submit. " | |
| "Your interactions are processed by OpenAI's API. Please refer to " | |
| "[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices." | |
| ) | |
| # Feedback Section | |
| st.markdown("### Feedback") | |
| st.success( | |
| "AskVeracity is evolving and we welcome your feedback to help us improve. " | |
| "Please reach out to us with questions, suggestions, or concerns." | |
| ) | |
| # Initialize session state variables | |
| if 'processing' not in st.session_state: | |
| st.session_state.processing = False | |
| if 'claim_to_process' not in st.session_state: | |
| st.session_state.claim_to_process = "" | |
| if 'has_result' not in st.session_state: | |
| st.session_state.has_result = False | |
| if 'result' not in st.session_state: | |
| st.session_state.result = None | |
| if 'total_time' not in st.session_state: | |
| st.session_state.total_time = 0 | |
| if 'fresh_state' not in st.session_state: | |
| st.session_state.fresh_state = True | |
| # Main interface | |
| st.markdown("### Enter a claim to verify") | |
| # Input area | |
| claim_input = st.text_area("", | |
| height=100, | |
| placeholder=( | |
| "Examples: The Eiffel Tower is located in Rome, Italy. " | |
| "Meta recently released its Llama 4 large language model. " | |
| "Justin Trudeau is not the Canadian Prime Minister anymore. " | |
| "China retaliated with 125% tariffs against U.S. imports. " | |
| "A recent piece of news." | |
| ), | |
| key="claim_input_area", | |
| label_visibility="collapsed", | |
| max_chars=None) | |
| # Information about result variability | |
| st.caption(""" | |
| π‘ **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to: | |
| - Incorporate the most recent evidence available | |
| - Benefit from the AI's ability to consider multiple perspectives | |
| - Adapt to evolving information landscapes | |
| """) | |
| st.warning("β±οΈ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.") | |
| # Button for verifying claim | |
| verify_button = st.button( | |
| "Verify Claim", | |
| type="primary", | |
| disabled=st.session_state.processing, | |
| key="verify_btn" | |
| ) | |
| # Create a clean interface | |
| if st.session_state.fresh_state: | |
| # Show a clean interface for the first query or when we need to reset | |
| analysis_placeholder = st.empty() | |
| # When button is clicked and not already processing | |
| if verify_button and not st.session_state.processing: | |
| if not claim_input: | |
| st.error("Please enter a claim to verify.") | |
| else: | |
| # Store the claim and set processing state | |
| st.session_state.claim_to_process = claim_input | |
| st.session_state.processing = True | |
| st.session_state.fresh_state = False | |
| # Force a rerun to refresh UI | |
| st.rerun() | |
| else: | |
| # This is either during processing or showing results | |
| # Create a container for processing and results | |
| analysis_container = st.container() | |
| with analysis_container: | |
| # If we're processing, show the processing UI | |
| if st.session_state.processing: | |
| st.subheader("π Processing...") | |
| status = st.empty() | |
| status.text("Verifying claim... (this may take a while)") | |
| progress_bar = st.progress(0) | |
| # Initialize models and agent if needed | |
| if not hasattr(st.session_state, 'agent_initialized'): | |
| with st.spinner("Initializing system..."): | |
| st.session_state.agent = get_agent() | |
| st.session_state.agent_initialized = True | |
| try: | |
| # Use the stored claim for processing | |
| claim_to_process = st.session_state.claim_to_process | |
| # Process the claim with the agent | |
| start_time = time.time() | |
| result = agent.process_claim(claim_to_process, st.session_state.agent) | |
| total_time = time.time() - start_time | |
| # Update progress as claim processing completes | |
| progress_bar.progress(100) | |
| # Check for None result | |
| if result is None: | |
| st.error("Failed to process the claim. Please try again.") | |
| st.session_state.processing = False | |
| st.session_state.fresh_state = True | |
| else: | |
| # If result exists but key values are missing, provide default values | |
| if "classification" not in result or result["classification"] is None: | |
| result["classification"] = "Uncertain" | |
| if "confidence" not in result or result["confidence"] is None: | |
| result["confidence"] = 0.6 # Default to 0.6 instead of 0.0 | |
| if "explanation" not in result or result["explanation"] is None: | |
| result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim." | |
| # Update result with timing information | |
| if "processing_times" not in result: | |
| result["processing_times"] = {"total": total_time} | |
| # Store the result and timing information | |
| st.session_state.result = result | |
| st.session_state.total_time = total_time | |
| st.session_state.has_result = True | |
| st.session_state.processing = False | |
| # Clear processing indicators before showing results | |
| status.empty() | |
| progress_bar.empty() | |
| # Force rerun to display results | |
| st.rerun() | |
| except Exception as e: | |
| # Handle any exceptions and reset processing state | |
| logger.error(f"Error during claim processing: {str(e)}") | |
| st.error(f"An error occurred: {str(e)}") | |
| st.session_state.processing = False | |
| st.session_state.fresh_state = True | |
| # Force rerun to re-enable button | |
| st.rerun() | |
| # Display results if available | |
| elif st.session_state.has_result and st.session_state.result: | |
| result = st.session_state.result | |
| total_time = st.session_state.total_time | |
| claim_to_process = st.session_state.claim_to_process | |
| st.subheader("π Verification Results") | |
| result_col1, result_col2 = st.columns([2, 1]) | |
| with result_col1: | |
| # Display both original and processed claim if they differ | |
| if "claim" in result and result["claim"] and result["claim"] != claim_to_process: | |
| st.markdown(f"**Original Claim:** {claim_to_process}") | |
| st.markdown(f"**Processed Claim:** {result['claim']}") | |
| else: | |
| st.markdown(f"**Claim:** {claim_to_process}") | |
| # Make verdict colorful based on classification | |
| truth_label = result.get('classification', 'Uncertain') | |
| if truth_label and "True" in truth_label: | |
| verdict_color = "green" | |
| elif truth_label and "False" in truth_label: | |
| verdict_color = "red" | |
| else: | |
| verdict_color = "gray" | |
| st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True) | |
| # Ensure confidence value is used | |
| if "confidence" in result and result["confidence"] is not None: | |
| confidence_value = result["confidence"] | |
| # Make sure confidence is a numeric value between 0 and 1 | |
| try: | |
| confidence_value = float(confidence_value) | |
| if confidence_value < 0: | |
| confidence_value = 0.0 | |
| elif confidence_value > 1: | |
| confidence_value = 1.0 | |
| except (ValueError, TypeError): | |
| confidence_value = 0.6 # Fallback to reasonable default | |
| else: | |
| confidence_value = 0.6 # Default confidence | |
| # Display the confidence | |
| st.markdown(f"**Confidence:** {confidence_value:.2%}") | |
| st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}") | |
| # Add disclaimer about cross-verification | |
| st.info("β οΈ **Note:** Please cross-verify important information with additional reliable sources.") | |
| with result_col2: | |
| st.markdown("**Processing Time**") | |
| times = result.get("processing_times", {"total": total_time}) | |
| st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s") | |
| # Show agent thoughts | |
| if "thoughts" in result and result["thoughts"]: | |
| st.markdown("**AI Reasoning Process**") | |
| thoughts = result.get("thoughts", []) | |
| for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts | |
| st.markdown(f"{i+1}. {thought}") | |
| if len(thoughts) > 5: | |
| with st.expander("Show all reasoning steps"): | |
| for i, thought in enumerate(thoughts): | |
| st.markdown(f"{i+1}. {thought}") | |
| # Display evidence | |
| st.subheader("π Evidence") | |
| evidence_count = result.get("evidence_count", 0) | |
| evidence = result.get("evidence", []) | |
| # Ensure evidence is a list | |
| if not isinstance(evidence, list): | |
| if isinstance(evidence, str): | |
| # Try to parse string as a list | |
| try: | |
| import ast | |
| parsed_evidence = ast.literal_eval(evidence) | |
| if isinstance(parsed_evidence, list): | |
| evidence = parsed_evidence | |
| else: | |
| evidence = [evidence] | |
| except: | |
| evidence = [evidence] | |
| else: | |
| evidence = [str(evidence)] if evidence else [] | |
| # Update evidence count based on actual evidence list | |
| evidence_count = len(evidence) | |
| # Check for empty evidence | |
| if evidence_count == 0 or not any(ev for ev in evidence if ev): | |
| st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.") | |
| else: | |
| st.markdown(f"Retrieved {evidence_count} pieces of evidence") | |
| # Get classification results | |
| classification_results = result.get("classification_results", []) | |
| # Only show evidence tabs if we have evidence | |
| if evidence and any(ev for ev in evidence if ev): | |
| # Create tabs for different evidence categories | |
| evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"]) | |
| with evidence_tabs[0]: | |
| for i, ev in enumerate(evidence): | |
| if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence | |
| with st.expander(f"Evidence {i+1}", expanded=i==0): | |
| st.text(ev) | |
| with evidence_tabs[1]: | |
| if classification_results: | |
| # Check if classification_results items have the expected format | |
| valid_results = [] | |
| for res in classification_results: | |
| if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res: | |
| if res.get("evidence"): # Only include results with actual evidence | |
| valid_results.append(res) | |
| if valid_results: | |
| sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True) | |
| top_results = sorted_results[:min(3, len(sorted_results))] | |
| for i, res in enumerate(top_results): | |
| with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0): | |
| st.text(res.get("evidence", "No evidence text available")) | |
| st.markdown(f"**Classification:** {res.get('label', 'unknown')}") | |
| else: | |
| # If no valid results, just show the evidence | |
| shown = False | |
| for i, ev in enumerate(evidence[:3]): | |
| if ev and isinstance(ev, str) and ev.strip(): | |
| with st.expander(f"Evidence {i+1}", expanded=i==0): | |
| st.text(ev) | |
| shown = True | |
| if not shown: | |
| st.info("No detailed classification results available.") | |
| else: | |
| # Just show regular evidence if no classification details | |
| shown = False | |
| for i, ev in enumerate(evidence[:3]): | |
| if ev and isinstance(ev, str) and ev.strip(): | |
| with st.expander(f"Evidence {i+1}", expanded=i==0): | |
| st.text(ev) | |
| shown = True | |
| if not shown: | |
| st.info("No detailed classification results available.") | |
| with evidence_tabs[2]: | |
| evidence_sources = {} | |
| for ev in evidence: | |
| if not ev or not isinstance(ev, str): | |
| continue | |
| source = "Unknown" | |
| # Extract source info from evidence text | |
| if "URL:" in ev: | |
| import re | |
| url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev) | |
| if url_match: | |
| source = url_match.group(1) | |
| if source in evidence_sources: | |
| evidence_sources[source] += 1 | |
| else: | |
| evidence_sources[source] = 1 | |
| # Display evidence source distribution | |
| if evidence_sources: | |
| st.markdown("**Evidence Source Distribution**") | |
| for source, count in evidence_sources.items(): | |
| st.markdown(f"- {source}: {count} item(s)") | |
| else: | |
| st.info("No source information available in the evidence.") | |
| else: | |
| st.warning("No evidence was retrieved for this claim.") | |
| # Button to start a new verification | |
| if st.button("Verify Another Claim", type="primary", key="new_verify_btn"): | |
| # Reset to fresh state for a new verification | |
| st.session_state.fresh_state = True | |
| st.session_state.has_result = False | |
| st.session_state.result = None | |
| st.rerun() | |
| # Footer with additional information | |
| st.markdown("---") | |
| st.caption(""" | |
| **AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis. | |
| While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information. | |
| """) |