Spaces:

unpaper
/

AddPaper

Sleeping

App Files Files Community

katsukiai commited on Feb 28

Commit

f4fdb5d

verified ·

1 Parent(s): a7ef7e7

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -222

app.py DELETED Viewed

@@ -1,222 +0,0 @@
-import streamlit as st
-import arxiv
-import requests
-import os
-from pathlib import Path
-from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
-from huggingface_hub import login, HfApi
-import fitz  # PyMuPDF
-import pandas as pd
-from collections import Counter
-import re
-import json
-# Constants
-MODEL_NAME = "google/flan-t5-large"
-SECONDARY_MODEL = "facebook/bart-large-cnn"
-HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "your_username/<name>")
-SPACE_NAME = f"unpaper/<name>" if not HUGGINGFACE_TOKEN.startswith("your_username") else f"your_username/<name>"
-HF_API_URL = "https://huggingface.co/api/models"
-# CSS
-st.markdown("""
-    <style>
-    .main { background-color: #f5f5f5; }
-    .sidebar .sidebar-content { background-color: #ffffff; }
-    .badge {
-        background-color: #ff4b4b;
-        color: white;
-        padding: 5px 10px;
-        border-radius: 5px;
-        display: inline-block;
-    }
-    .warning {
-        background-color: #fff3cd;
-        color: #856404;
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    </style>
-""", unsafe_allow_html=True)
-# Sidebar
-st.sidebar.title("arXiv Paper Converter")
-st.sidebar.header("Settings")
-arxiv_id = st.sidebar.text_input("Enter arXiv ID", "2407.21783")
-upload_pdf = st.sidebar.file_uploader("Upload PDF", type="pdf")
-space_name = st.sidebar.text_input("Hugging Face Space Name", SPACE_NAME)
-token = st.sidebar.text_input("Hugging Face Token", HUGGINGFACE_TOKEN, type="password")
-model_choice = st.sidebar.selectbox("Select Model", ["Text-to-Text (FLAN-T5)", "Text Generation (BART)"])
-# Login to Hugging Face
-if token:
-    login(token=token)
-# Fetch available models from Hugging Face API
-@st.cache_data(ttl=3600)
-def fetch_hf_models():
-    try:
-        response = requests.get(HF_API_URL, headers={"Authorization": f"Bearer {token}"})
-        if response.status_code == 200:
-            return response.json()
-        else:
-            st.warning("Failed to fetch models from Hugging Face API. Using default models.")
-            return None
-    except Exception as e:
-        st.warning(f"Error fetching models: {str(e)}. Using default models.")
-        return None
-hf_models = fetch_hf_models()
-# Initialize models
-@st.cache_resource
-def load_models():
-    if model_choice == "Text-to-Text (FLAN-T5)":
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-        pipeline_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
-    else:
-        tokenizer = AutoTokenizer.from_pretrained(SECONDARY_MODEL)
-        model = AutoModelForSeq2SeqLM.from_pretrained(SECONDARY_MODEL)
-        pipeline_model = pipeline("summarization", model=model, tokenizer=tokenizer)
-    return tokenizer, model, pipeline_model
-tokenizer, model, pipeline_model = load_models()
-# Functions
-def fetch_arxiv_paper(paper_id):
-    client = arxiv.Client()
-    search = arxiv.Search(id_list=[paper_id])
-    paper = next(client.results(search))
-    return paper
-def download_pdf(paper, filename):
-    paper.download_pdf(filename=filename)
-    return filename
-def extract_text_from_pdf(pdf_path):
-    doc = fitz.open(pdf_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-def analyze_authors(text):
-    author_pattern = r"Author[s]?:\s*(.+?)(?:\n|$)"
-    authors = re.findall(author_pattern, text, re.IGNORECASE)
-    author_list = []
-    for author in authors:
-        names = author.split(',')
-        author_list.extend([name.strip() for name in names])
-    return Counter(author_list)
-def process_text_with_model(text, task="summarize"):
-    if model_choice == "Text-to-Text (FLAN-T5)":
-        prompt = f"{task} the following text: {text[:1000]}"
-        result = pipeline_model(prompt, max_length=512, num_beams=4)
-    else:
-        result = pipeline_model(text[:1000], max_length=512, min_length=30, do_sample=False)
-    return result[0]['generated_text']
-def create_huggingface_space(space_name, metadata):
-    api = HfApi()
-    try:
-        api.create_repo(repo_id=space_name, repo_type="space", space_sdk="static", private=False)
-        # Upload metadata
-        with open("metadata.json", "w") as f:
-            json.dump(metadata, f, indent=2)
-        api.upload_file(
-            path_or_fileobj="metadata.json",
-            path_in_repo="metadata.json",
-            repo_id=space_name,
-            repo_type="space"
-        )
-        api.upload_file(
-            path_or_fileobj="README.md",
-            path_in_repo="README.md",
-            repo_id=space_name,
-            repo_type="space"
-        )
-        return f"https://huggingface.co/spaces/{space_name}"
-    except Exception as e:
-        st.error(f"Failed to create space: {str(e)}")
-        return None
-    finally:
-        if os.path.exists("metadata.json"):
-            os.remove("metadata.json")
-# Main App
-st.title("arXiv Paper to Hugging Face Space Converter")
-st.markdown("<div class='badge'>Beta Community - Open Discussion in Community Tab</div>", unsafe_allow_html=True)
-# Warning about model usage
-st.markdown("""
-    <div class='warning'>
-        <strong>Warning:</strong> Ensure you have proper permissions to use selected models.
-        Model outputs are stored in metadata and will be publicly visible in the space.
-    </div>
-""", unsafe_allow_html=True)
-# Process arXiv or PDF
-if arxiv_id or upload_pdf:
-    if upload_pdf:
-        pdf_path = "temp.pdf"
-        with open(pdf_path, "wb") as f:
-            f.write(upload_pdf.getbuffer())
-    else:
-        paper = fetch_arxiv_paper(arxiv_id)
-        pdf_path = download_pdf(paper, "temp.pdf")
-    # Extract and analyze
-    text = extract_text_from_pdf(pdf_path)
-    author_analysis = analyze_authors(text)
-    # Model processing
-    summary = process_text_with_model(text, "summarize")
-    key_points = process_text_with_model(text, "extract key points" if model_choice == "Text-to-Text (FLAN-T5)" else "summarize")
-    # Display results
-    st.header("Paper Analysis")
-    st.subheader("Authors")
-    st.dataframe(pd.DataFrame.from_dict(author_analysis, orient='index', columns=['Count']))
-    st.subheader("AI Analysis")
-    st.write("Summary:", summary)
-    st.write("Key Points:", key_points)
-    # Enhanced metadata
-    metadata = {
-        "title": paper.title if arxiv_id else "Uploaded PDF",
-        "authors": list(author_analysis.keys()),
-        "arxiv_id": arxiv_id if arxiv_id else "N/A",
-        "model_analysis": {
-            "summary": summary,
-            "key_points": key_points,
-            "model_used": model_choice,
-            "model_name": MODEL_NAME if model_choice == "Text-to-Text (FLAN-T5)" else SECONDARY_MODEL,
-            "model_license": "Check model card on Hugging Face",
-            "processing_date": pd.Timestamp.now().isoformat()
-        },
-        "warnings": {
-            "model_usage": "Ensure proper model licensing",
-            "content_visibility": "All outputs will be public in space",
-            "data_source": "Verify arXiv/paper permissions"
-        }
-    }
-    # Create Space
-    if st.button("Create Hugging Face Space"):
-        space_url = create_huggingface_space(space_name, metadata)
-        if space_url:
-            st.success(f"Space created: {space_url}")
-            st.markdown(f"""
-                <a href="{space_url}" target="_blank">
-                    <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
-                         alt="Hugging Face Space" width="150">
-                </a>
-            """, unsafe_allow_html=True)
-# Cleanup
-if os.path.exists("temp.pdf"):
-    os.remove("temp.pdf")