Update app.py
Browse files
app.py
CHANGED
|
@@ -57,13 +57,24 @@ def search_arxiv(query):
|
|
| 57 |
|
| 58 |
return "\n\n".join(results)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# Create the Gradio interface
|
| 61 |
iface = gr.Interface(
|
| 62 |
fn=search_arxiv,
|
| 63 |
inputs=gr.components.Textbox(lines=1, placeholder="Enter your query..."),
|
| 64 |
outputs="markdown",
|
| 65 |
title="Semantic Search in ArXiv ML Papers",
|
| 66 |
-
description="Enter a query to find relevant ML papers from the ArXiv dataset."
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
# Launch the interface
|
|
|
|
| 57 |
|
| 58 |
return "\n\n".join(results)
|
| 59 |
|
| 60 |
+
# Dataset information
|
| 61 |
+
dataset_info = """
|
| 62 |
+
### About the Dataset
|
| 63 |
+
|
| 64 |
+
This dataset contains a subset of ArXiv papers with the "cs.LG" tag, indicating that the paper is about Machine Learning. The core dataset is filtered from the full ArXiv dataset hosted on Kaggle: [ArXiv Dataset on Kaggle](https://www.kaggle.com/datasets/Cornell-University/arxiv). The original dataset contains roughly 2 million papers, and this dataset contains approximately 100,000 papers after category filtering.
|
| 65 |
+
|
| 66 |
+
The dataset is maintained by making requests to the ArXiv API. The current iteration only includes the title and abstract of each paper.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
|
| 70 |
# Create the Gradio interface
|
| 71 |
iface = gr.Interface(
|
| 72 |
fn=search_arxiv,
|
| 73 |
inputs=gr.components.Textbox(lines=1, placeholder="Enter your query..."),
|
| 74 |
outputs="markdown",
|
| 75 |
title="Semantic Search in ArXiv ML Papers",
|
| 76 |
+
description="Enter a query to find relevant ML papers from the ArXiv dataset.",
|
| 77 |
+
article=dataset_info
|
| 78 |
)
|
| 79 |
|
| 80 |
# Launch the interface
|