File size: 6,380 Bytes
8c56624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
import requests
from PyPDF2 import PdfReader
from transformers import pipeline
from huggingface_hub import HfApi
import io
import os
from datetime import datetime

# --- Constants ---
COMMUNITY_BETA_MESSAGE = "This Streamlit app is part of a community in Beta. Please open discussions in the Community tab of the Community card."
DEFAULT_BADGE_IMAGE_URL = "https://img.shields.io/badge/Hugging%20Face-Space-blue"
COPYRIGHT_TEXT = f"© {datetime.now().year} Your Name/Organization. All rights reserved."

# --- CSS ---
st.markdown(
    """
    <style>
    .reportview-container {
        margin-top: -2em;
    }
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    header {visibility: hidden;}
    .st-emotion-cache-z53if6 {
        padding-top: 10px;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

# --- Sidebar Settings ---
st.sidebar.header("Settings")
arxiv_link = st.sidebar.text_input("arXiv Paper Link or ID", placeholder="e.g., https://arxiv.org/abs/2301.00001 or 2301.00001")
custom_space_name = st.sidebar.text_input("Custom Hugging Face Space Name (optional)")
badge_image_url = st.sidebar.text_input("Badge Image URL", DEFAULT_BADGE_IMAGE_URL)
copyright_text = st.sidebar.text_input("Copyright Text", COPYRIGHT_TEXT)

hf_token = st.sidebar.text_input("Hugging Face Token", type="password")

# --- Main App ---
st.title("arXiv Paper to Hugging Face Space")
st.info(COMMUNITY_BETA_MESSAGE)

if hf_token:
    try:
        api = HfApi(token=hf_token)
        user_info = api.whoami()
        hf_username = user_info['fullname'] if 'fullname' in user_info else user_info['name']
        st.sidebar.success(f"Logged in as: {hf_username}")
    except Exception as e:
        st.sidebar.error(f"Error with Hugging Face Token: {e}")

if arxiv_link:
    arxiv_id = None
    if arxiv_link.startswith("https://arxiv.org/abs/"):
        arxiv_id = arxiv_link.split("/")[-1]
    elif arxiv_link.isdigit():
        arxiv_id = arxiv_link
    elif arxiv_link.startswith("arxiv:"):
        arxiv_id = arxiv_link.split(":")[-1]

    if arxiv_id:
        pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
        try:
            response = requests.get(pdf_url)
            response.raise_for_status()
            pdf_content = response.content
            pdf_file = io.BytesIO(pdf_content)
            reader = PdfReader(pdf_file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()

            st.subheader("Paper Content Preview:")
            st.markdown(f'<iframe src="{pdf_url}" width="700" height="600" type="application/pdf"></iframe>', unsafe_allow_html=True)

            if st.button("Convert to Hugging Face Space"):
                if not hf_token:
                    st.warning("Please enter your Hugging Face Token in the sidebar to create a Space.")
                else:
                    space_name_suffix = custom_space_name if custom_space_name else arxiv_id
                    space_name = f"arxiv-{space_name_suffix}"

                    try:
                        api = HfApi(token=hf_token)
                        repo_id = f"{hf_username}/{space_name}"
                        api.create_repo(repo_id=repo_id, space_sdk="static")

                        # Save the PDF to a temporary file
                        with open("paper.pdf", "wb") as f:
                            f.write(pdf_content)
                        api.upload_file(
                            path_or_fileobj="paper.pdf",
                            path_in_repo="paper.pdf",
                            repo_id=repo_id,
                            repo_type="space",
                        )
                        os.remove("paper.pdf")

                        # PDF Analysis
                        try:
                            st.info("Analyzing PDF content...")
                            pipe = pipeline("text2text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
                            analysis_result = pipe(text[:4096], max_length=512)[0]['generated_text'] # Limit input for faster processing

                            analysis_pdf_content = f"""
                            # Analysis of arXiv Paper: {arxiv_id}

                            **Generated on:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

                            **Analysis:**
                            {analysis_result}

                            ---
                            {copyright_text}
                            """

                            # Save analysis to a temporary text file
                            with open("analysis.txt", "w") as f:
                                f.write(analysis_pdf_content)

                            api.upload_file(
                                path_or_fileobj="analysis.txt",
                                path_in_repo="analysis.txt",
                                repo_id=repo_id,
                                repo_type="space",
                            )
                            os.remove("analysis.txt")

                            st.success(f"Analysis saved to the Space as `analysis.txt`.")

                        except Exception as e_analysis:
                            st.error(f"Error during PDF analysis: {e_analysis}")

                        badge_html = f"""
                        <a href="https://huggingface.co/spaces/{repo_id}" target="_blank">
                            <img src="{badge_image_url}" alt="Hugging Face Space">
                        </a>
                        """
                        st.subheader("Hugging Face Space Created!")
                        st.markdown(f"Space URL: https://huggingface.co/spaces/{repo_id}")
                        st.markdown("Embed this badge in your README or website:")
                        st.code(badge_html, language="html")

                    except Exception as e_hf:
                        st.error(f"Error creating or updating Hugging Face Space: {e_hf}")

        except requests.exceptions.RequestException as e_http:
            st.error(f"Error fetching PDF from arXiv: {e_http}")
        except Exception as e_pdf:
            st.error(f"Error processing PDF: {e_pdf}")
    else:
        st.warning("Invalid arXiv link or ID format.")

st.markdown("---")
st.markdown(copyright_text)