katsukiai commited on
Commit
8c56624
·
verified ·
1 Parent(s): f4fdb5d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from PyPDF2 import PdfReader
4
+ from transformers import pipeline
5
+ from huggingface_hub import HfApi
6
+ import io
7
+ import os
8
+ from datetime import datetime
9
+
10
+ # --- Constants ---
11
+ COMMUNITY_BETA_MESSAGE = "This Streamlit app is part of a community in Beta. Please open discussions in the Community tab of the Community card."
12
+ DEFAULT_BADGE_IMAGE_URL = "https://img.shields.io/badge/Hugging%20Face-Space-blue"
13
+ COPYRIGHT_TEXT = f"© {datetime.now().year} Your Name/Organization. All rights reserved."
14
+
15
+ # --- CSS ---
16
+ st.markdown(
17
+ """
18
+ <style>
19
+ .reportview-container {
20
+ margin-top: -2em;
21
+ }
22
+ #MainMenu {visibility: hidden;}
23
+ footer {visibility: hidden;}
24
+ header {visibility: hidden;}
25
+ .st-emotion-cache-z53if6 {
26
+ padding-top: 10px;
27
+ }
28
+ </style>
29
+ """,
30
+ unsafe_allow_html=True,
31
+ )
32
+
33
+ # --- Sidebar Settings ---
34
+ st.sidebar.header("Settings")
35
+ arxiv_link = st.sidebar.text_input("arXiv Paper Link or ID", placeholder="e.g., https://arxiv.org/abs/2301.00001 or 2301.00001")
36
+ custom_space_name = st.sidebar.text_input("Custom Hugging Face Space Name (optional)")
37
+ badge_image_url = st.sidebar.text_input("Badge Image URL", DEFAULT_BADGE_IMAGE_URL)
38
+ copyright_text = st.sidebar.text_input("Copyright Text", COPYRIGHT_TEXT)
39
+
40
+ hf_token = st.sidebar.text_input("Hugging Face Token", type="password")
41
+
42
+ # --- Main App ---
43
+ st.title("arXiv Paper to Hugging Face Space")
44
+ st.info(COMMUNITY_BETA_MESSAGE)
45
+
46
+ if hf_token:
47
+ try:
48
+ api = HfApi(token=hf_token)
49
+ user_info = api.whoami()
50
+ hf_username = user_info['fullname'] if 'fullname' in user_info else user_info['name']
51
+ st.sidebar.success(f"Logged in as: {hf_username}")
52
+ except Exception as e:
53
+ st.sidebar.error(f"Error with Hugging Face Token: {e}")
54
+
55
+ if arxiv_link:
56
+ arxiv_id = None
57
+ if arxiv_link.startswith("https://arxiv.org/abs/"):
58
+ arxiv_id = arxiv_link.split("/")[-1]
59
+ elif arxiv_link.isdigit():
60
+ arxiv_id = arxiv_link
61
+ elif arxiv_link.startswith("arxiv:"):
62
+ arxiv_id = arxiv_link.split(":")[-1]
63
+
64
+ if arxiv_id:
65
+ pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
66
+ try:
67
+ response = requests.get(pdf_url)
68
+ response.raise_for_status()
69
+ pdf_content = response.content
70
+ pdf_file = io.BytesIO(pdf_content)
71
+ reader = PdfReader(pdf_file)
72
+ text = ""
73
+ for page in reader.pages:
74
+ text += page.extract_text()
75
+
76
+ st.subheader("Paper Content Preview:")
77
+ st.markdown(f'<iframe src="{pdf_url}" width="700" height="600" type="application/pdf"></iframe>', unsafe_allow_html=True)
78
+
79
+ if st.button("Convert to Hugging Face Space"):
80
+ if not hf_token:
81
+ st.warning("Please enter your Hugging Face Token in the sidebar to create a Space.")
82
+ else:
83
+ space_name_suffix = custom_space_name if custom_space_name else arxiv_id
84
+ space_name = f"arxiv-{space_name_suffix}"
85
+
86
+ try:
87
+ api = HfApi(token=hf_token)
88
+ repo_id = f"{hf_username}/{space_name}"
89
+ api.create_repo(repo_id=repo_id, space_sdk="static")
90
+
91
+ # Save the PDF to a temporary file
92
+ with open("paper.pdf", "wb") as f:
93
+ f.write(pdf_content)
94
+ api.upload_file(
95
+ path_or_fileobj="paper.pdf",
96
+ path_in_repo="paper.pdf",
97
+ repo_id=repo_id,
98
+ repo_type="space",
99
+ )
100
+ os.remove("paper.pdf")
101
+
102
+ # PDF Analysis
103
+ try:
104
+ st.info("Analyzing PDF content...")
105
+ pipe = pipeline("text2text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
106
+ analysis_result = pipe(text[:4096], max_length=512)[0]['generated_text'] # Limit input for faster processing
107
+
108
+ analysis_pdf_content = f"""
109
+ # Analysis of arXiv Paper: {arxiv_id}
110
+
111
+ **Generated on:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
112
+
113
+ **Analysis:**
114
+ {analysis_result}
115
+
116
+ ---
117
+ {copyright_text}
118
+ """
119
+
120
+ # Save analysis to a temporary text file
121
+ with open("analysis.txt", "w") as f:
122
+ f.write(analysis_pdf_content)
123
+
124
+ api.upload_file(
125
+ path_or_fileobj="analysis.txt",
126
+ path_in_repo="analysis.txt",
127
+ repo_id=repo_id,
128
+ repo_type="space",
129
+ )
130
+ os.remove("analysis.txt")
131
+
132
+ st.success(f"Analysis saved to the Space as `analysis.txt`.")
133
+
134
+ except Exception as e_analysis:
135
+ st.error(f"Error during PDF analysis: {e_analysis}")
136
+
137
+ badge_html = f"""
138
+ <a href="https://huggingface.co/spaces/{repo_id}" target="_blank">
139
+ <img src="{badge_image_url}" alt="Hugging Face Space">
140
+ </a>
141
+ """
142
+ st.subheader("Hugging Face Space Created!")
143
+ st.markdown(f"Space URL: https://huggingface.co/spaces/{repo_id}")
144
+ st.markdown("Embed this badge in your README or website:")
145
+ st.code(badge_html, language="html")
146
+
147
+ except Exception as e_hf:
148
+ st.error(f"Error creating or updating Hugging Face Space: {e_hf}")
149
+
150
+ except requests.exceptions.RequestException as e_http:
151
+ st.error(f"Error fetching PDF from arXiv: {e_http}")
152
+ except Exception as e_pdf:
153
+ st.error(f"Error processing PDF: {e_pdf}")
154
+ else:
155
+ st.warning("Invalid arXiv link or ID format.")
156
+
157
+ st.markdown("---")
158
+ st.markdown(copyright_text)