Spaces:

unpaper
/

AddPaper

Sleeping

App Files Files Community

AddPaper / app.py

katsukiai

Create app.py

8c56624 verified 10 months ago

raw

history blame contribute delete

6.38 kB

	import streamlit as st
	import requests
	from PyPDF2 import PdfReader
	from transformers import pipeline
	from huggingface_hub import HfApi
	import io
	import os
	from datetime import datetime

	# --- Constants ---
	COMMUNITY_BETA_MESSAGE = "This Streamlit app is part of a community in Beta. Please open discussions in the Community tab of the Community card."
	DEFAULT_BADGE_IMAGE_URL = "https://img.shields.io/badge/Hugging%20Face-Space-blue"
	COPYRIGHT_TEXT = f"© {datetime.now().year} Your Name/Organization. All rights reserved."

	# --- CSS ---
	st.markdown(
	"""
	<style>
	.reportview-container {
	margin-top: -2em;
	}
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	header {visibility: hidden;}
	.st-emotion-cache-z53if6 {
	padding-top: 10px;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	# --- Sidebar Settings ---
	st.sidebar.header("Settings")
	arxiv_link = st.sidebar.text_input("arXiv Paper Link or ID", placeholder="e.g., https://arxiv.org/abs/2301.00001 or 2301.00001")
	custom_space_name = st.sidebar.text_input("Custom Hugging Face Space Name (optional)")
	badge_image_url = st.sidebar.text_input("Badge Image URL", DEFAULT_BADGE_IMAGE_URL)
	copyright_text = st.sidebar.text_input("Copyright Text", COPYRIGHT_TEXT)

	hf_token = st.sidebar.text_input("Hugging Face Token", type="password")

	# --- Main App ---
	st.title("arXiv Paper to Hugging Face Space")
	st.info(COMMUNITY_BETA_MESSAGE)

	if hf_token:
	try:
	api = HfApi(token=hf_token)
	user_info = api.whoami()
	hf_username = user_info['fullname'] if 'fullname' in user_info else user_info['name']
	st.sidebar.success(f"Logged in as: {hf_username}")
	except Exception as e:
	st.sidebar.error(f"Error with Hugging Face Token: {e}")

	if arxiv_link:
	arxiv_id = None
	if arxiv_link.startswith("https://arxiv.org/abs/"):
	arxiv_id = arxiv_link.split("/")[-1]
	elif arxiv_link.isdigit():
	arxiv_id = arxiv_link
	elif arxiv_link.startswith("arxiv:"):
	arxiv_id = arxiv_link.split(":")[-1]

	if arxiv_id:
	pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
	try:
	response = requests.get(pdf_url)
	response.raise_for_status()
	pdf_content = response.content
	pdf_file = io.BytesIO(pdf_content)
	reader = PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()

	st.subheader("Paper Content Preview:")
	st.markdown(f'<iframe src="{pdf_url}" width="700" height="600" type="application/pdf"></iframe>', unsafe_allow_html=True)

	if st.button("Convert to Hugging Face Space"):
	if not hf_token:
	st.warning("Please enter your Hugging Face Token in the sidebar to create a Space.")
	else:
	space_name_suffix = custom_space_name if custom_space_name else arxiv_id
	space_name = f"arxiv-{space_name_suffix}"

	try:
	api = HfApi(token=hf_token)
	repo_id = f"{hf_username}/{space_name}"
	api.create_repo(repo_id=repo_id, space_sdk="static")

	# Save the PDF to a temporary file
	with open("paper.pdf", "wb") as f:
	f.write(pdf_content)
	api.upload_file(
	path_or_fileobj="paper.pdf",
	path_in_repo="paper.pdf",
	repo_id=repo_id,
	repo_type="space",
	)
	os.remove("paper.pdf")

	# PDF Analysis
	try:
	st.info("Analyzing PDF content...")
	pipe = pipeline("text2text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
	analysis_result = pipe(text[:4096], max_length=512)[0]['generated_text'] # Limit input for faster processing

	analysis_pdf_content = f"""
	# Analysis of arXiv Paper: {arxiv_id}

	Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

	Analysis:
	{analysis_result}

	---
	{copyright_text}
	"""

	# Save analysis to a temporary text file
	with open("analysis.txt", "w") as f:
	f.write(analysis_pdf_content)

	api.upload_file(
	path_or_fileobj="analysis.txt",
	path_in_repo="analysis.txt",
	repo_id=repo_id,
	repo_type="space",
	)
	os.remove("analysis.txt")

	st.success(f"Analysis saved to the Space as `analysis.txt`.")

	except Exception as e_analysis:
	st.error(f"Error during PDF analysis: {e_analysis}")

	badge_html = f"""
	<a href="https://huggingface.co/spaces/{repo_id}" target="_blank">
	<img src="{badge_image_url}" alt="Hugging Face Space">
	</a>
	"""
	st.subheader("Hugging Face Space Created!")
	st.markdown(f"Space URL: https://huggingface.co/spaces/{repo_id}")
	st.markdown("Embed this badge in your README or website:")
	st.code(badge_html, language="html")

	except Exception as e_hf:
	st.error(f"Error creating or updating Hugging Face Space: {e_hf}")

	except requests.exceptions.RequestException as e_http:
	st.error(f"Error fetching PDF from arXiv: {e_http}")
	except Exception as e_pdf:
	st.error(f"Error processing PDF: {e_pdf}")
	else:
	st.warning("Invalid arXiv link or ID format.")

	st.markdown("---")
	st.markdown(copyright_text)