import gradio as gr from datasets import load_dataset import pandas as pd # Load dataset dataset = load_dataset('dell-research-harvard/newswire', split='train') # Function to filter and match articles def filter_nobel_articles(laureates_file): laureates_df = pd.read_csv(laureates_file.name) nobel_articles = dataset.filter(lambda example: 'nobel' in example['text'].lower()) def contains_laureate(article_text): for _, row in laureates_df.iterrows(): if row['first_name'] in article_text and row['last_name'] in article_text: return True return False filtered_articles = nobel_articles.filter(lambda example: contains_laureate(example['text'])) return f"Found {len(filtered_articles)} articles mentioning Nobel laureates." # Create Gradio interface interface = gr.Interface( fn=filter_nobel_articles, inputs=gr.File(label="Upload Nobel Laureates CSV"), outputs="text" ) interface.launch()