File size: 6,380 Bytes
8c56624 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import streamlit as st
import requests
from PyPDF2 import PdfReader
from transformers import pipeline
from huggingface_hub import HfApi
import io
import os
from datetime import datetime
# --- Constants ---
COMMUNITY_BETA_MESSAGE = "This Streamlit app is part of a community in Beta. Please open discussions in the Community tab of the Community card."
DEFAULT_BADGE_IMAGE_URL = "https://img.shields.io/badge/Hugging%20Face-Space-blue"
COPYRIGHT_TEXT = f"© {datetime.now().year} Your Name/Organization. All rights reserved."
# --- CSS ---
st.markdown(
"""
<style>
.reportview-container {
margin-top: -2em;
}
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
header {visibility: hidden;}
.st-emotion-cache-z53if6 {
padding-top: 10px;
}
</style>
""",
unsafe_allow_html=True,
)
# --- Sidebar Settings ---
st.sidebar.header("Settings")
arxiv_link = st.sidebar.text_input("arXiv Paper Link or ID", placeholder="e.g., https://arxiv.org/abs/2301.00001 or 2301.00001")
custom_space_name = st.sidebar.text_input("Custom Hugging Face Space Name (optional)")
badge_image_url = st.sidebar.text_input("Badge Image URL", DEFAULT_BADGE_IMAGE_URL)
copyright_text = st.sidebar.text_input("Copyright Text", COPYRIGHT_TEXT)
hf_token = st.sidebar.text_input("Hugging Face Token", type="password")
# --- Main App ---
st.title("arXiv Paper to Hugging Face Space")
st.info(COMMUNITY_BETA_MESSAGE)
if hf_token:
try:
api = HfApi(token=hf_token)
user_info = api.whoami()
hf_username = user_info['fullname'] if 'fullname' in user_info else user_info['name']
st.sidebar.success(f"Logged in as: {hf_username}")
except Exception as e:
st.sidebar.error(f"Error with Hugging Face Token: {e}")
if arxiv_link:
arxiv_id = None
if arxiv_link.startswith("https://arxiv.org/abs/"):
arxiv_id = arxiv_link.split("/")[-1]
elif arxiv_link.isdigit():
arxiv_id = arxiv_link
elif arxiv_link.startswith("arxiv:"):
arxiv_id = arxiv_link.split(":")[-1]
if arxiv_id:
pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
try:
response = requests.get(pdf_url)
response.raise_for_status()
pdf_content = response.content
pdf_file = io.BytesIO(pdf_content)
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
st.subheader("Paper Content Preview:")
st.markdown(f'<iframe src="{pdf_url}" width="700" height="600" type="application/pdf"></iframe>', unsafe_allow_html=True)
if st.button("Convert to Hugging Face Space"):
if not hf_token:
st.warning("Please enter your Hugging Face Token in the sidebar to create a Space.")
else:
space_name_suffix = custom_space_name if custom_space_name else arxiv_id
space_name = f"arxiv-{space_name_suffix}"
try:
api = HfApi(token=hf_token)
repo_id = f"{hf_username}/{space_name}"
api.create_repo(repo_id=repo_id, space_sdk="static")
# Save the PDF to a temporary file
with open("paper.pdf", "wb") as f:
f.write(pdf_content)
api.upload_file(
path_or_fileobj="paper.pdf",
path_in_repo="paper.pdf",
repo_id=repo_id,
repo_type="space",
)
os.remove("paper.pdf")
# PDF Analysis
try:
st.info("Analyzing PDF content...")
pipe = pipeline("text2text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
analysis_result = pipe(text[:4096], max_length=512)[0]['generated_text'] # Limit input for faster processing
analysis_pdf_content = f"""
# Analysis of arXiv Paper: {arxiv_id}
**Generated on:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
**Analysis:**
{analysis_result}
---
{copyright_text}
"""
# Save analysis to a temporary text file
with open("analysis.txt", "w") as f:
f.write(analysis_pdf_content)
api.upload_file(
path_or_fileobj="analysis.txt",
path_in_repo="analysis.txt",
repo_id=repo_id,
repo_type="space",
)
os.remove("analysis.txt")
st.success(f"Analysis saved to the Space as `analysis.txt`.")
except Exception as e_analysis:
st.error(f"Error during PDF analysis: {e_analysis}")
badge_html = f"""
<a href="https://huggingface.co/spaces/{repo_id}" target="_blank">
<img src="{badge_image_url}" alt="Hugging Face Space">
</a>
"""
st.subheader("Hugging Face Space Created!")
st.markdown(f"Space URL: https://huggingface.co/spaces/{repo_id}")
st.markdown("Embed this badge in your README or website:")
st.code(badge_html, language="html")
except Exception as e_hf:
st.error(f"Error creating or updating Hugging Face Space: {e_hf}")
except requests.exceptions.RequestException as e_http:
st.error(f"Error fetching PDF from arXiv: {e_http}")
except Exception as e_pdf:
st.error(f"Error processing PDF: {e_pdf}")
else:
st.warning("Invalid arXiv link or ID format.")
st.markdown("---")
st.markdown(copyright_text) |