Tarun-1999M commited on
Commit
c2dc399
·
verified ·
1 Parent(s): b747958

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -57,13 +57,24 @@ def search_arxiv(query):
57
 
58
  return "\n\n".join(results)
59
 
 
 
 
 
 
 
 
 
 
 
60
  # Create the Gradio interface
61
  iface = gr.Interface(
62
  fn=search_arxiv,
63
  inputs=gr.components.Textbox(lines=1, placeholder="Enter your query..."),
64
  outputs="markdown",
65
  title="Semantic Search in ArXiv ML Papers",
66
- description="Enter a query to find relevant ML papers from the ArXiv dataset."
 
67
  )
68
 
69
  # Launch the interface
 
57
 
58
  return "\n\n".join(results)
59
 
60
+ # Dataset information
61
+ dataset_info = """
62
+ ### About the Dataset
63
+
64
+ This dataset contains a subset of ArXiv papers with the "cs.LG" tag, indicating that the paper is about Machine Learning. The core dataset is filtered from the full ArXiv dataset hosted on Kaggle: [ArXiv Dataset on Kaggle](https://www.kaggle.com/datasets/Cornell-University/arxiv). The original dataset contains roughly 2 million papers, and this dataset contains approximately 100,000 papers after category filtering.
65
+
66
+ The dataset is maintained by making requests to the ArXiv API. The current iteration only includes the title and abstract of each paper.
67
+ """
68
+
69
+
70
  # Create the Gradio interface
71
  iface = gr.Interface(
72
  fn=search_arxiv,
73
  inputs=gr.components.Textbox(lines=1, placeholder="Enter your query..."),
74
  outputs="markdown",
75
  title="Semantic Search in ArXiv ML Papers",
76
+ description="Enter a query to find relevant ML papers from the ArXiv dataset.",
77
+ article=dataset_info
78
  )
79
 
80
  # Launch the interface