Spaces:
Runtime error
Runtime error
import os | |
import nltk | |
import csv | |
import logging | |
from tqdm import tqdm | |
import gradio as gr | |
from transformers import pipeline | |
from huggingface_hub import HfApi, upload_file, HfFolder | |
import time | |
# Setup Logging | |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Download All NLTK Data | |
nltk.download('all') | |
# Constants | |
HF_REPO = "katsukiai/DeepFocus-X3" | |
TOKENIZER = 'bert-base-uncased' | |
MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-v2"] # Add more models as needed | |
# Initialize Models | |
models = {model: pipeline('feature-extraction', model=model) for model in MODELS} | |
# Functions | |
def process_text(text): | |
tokens = nltk.word_tokenize(text) | |
words = list(set(tokens)) | |
means = {} | |
for word in tqdm(words, desc="Processing Words"): | |
word_means = {} | |
for model_name, model in models.items(): | |
try: | |
output = model(word) | |
word_means[model_name] = output[0].mean().item() | |
except Exception as e: | |
logging.error(f"Error processing word {word} with model {model_name}: {e}") | |
word_means[model_name] = None | |
means[word] = word_means | |
return {"tokenizer": tokens, "words": words, "meaning": means} | |
def save_to_csv(data, filename="output.csv"): | |
with open(filename, 'w', newline='', encoding='utf-8') as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"]) | |
writer.writeheader() | |
for word in data['words']: | |
writer.writerow({ | |
"word": word, | |
"meanings": str(data['meaning'][word]) | |
}) | |
def train_dataset(): | |
text = "Your long text goes here..." | |
data = process_text(text) | |
save_to_csv(data) | |
logging.info("Dataset processed and saved to CSV.") | |
def generate_report(): | |
with open('app.log', 'r') as log_file: | |
log_content = log_file.read() | |
return log_content | |
def get_uptime(): | |
uptime = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time)) | |
return f"Uptime: {uptime}" | |
# Gradio Interface | |
def generate_all(text): | |
data = process_text(text) | |
save_to_csv(data) | |
return "Processed data saved to output.csv" | |
# Custom CSS for Tailwind CSS | |
custom_css = """ | |
<style> | |
@import url('https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css'); | |
body { | |
font-family: 'Arial', sans-serif; | |
} | |
#title { | |
text-align: center; | |
margin-bottom: 20px; | |
} | |
#input_text, #output_text, #log_output { | |
width: 100%; | |
max-width: 600px; | |
margin: 10px 0; | |
} | |
#generate_button, #report_button, #save_settings_button { | |
width: 100%; | |
max-width: 200px; | |
margin: 10px 0; | |
} | |
#settings_container { | |
margin-top: 20px; | |
} | |
</style> | |
""" | |
start_time = time.time() | |
with gr.Blocks(css=custom_css) as iface: | |
gr.Markdown("# DeepFocus-X3", elem_id="title") | |
with gr.Tab("Generate All"): | |
with gr.Row(): | |
input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", elem_id="input_text") | |
output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", elem_id="output_text") | |
generate_button = gr.Button("Generate", elem_id="generate_button") | |
generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text) | |
with gr.Tab("Logs"): | |
with gr.Row(): | |
log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", elem_id="log_output") | |
report_button = gr.Button("Report using Logs", elem_id="report_button") | |
report_button.click(fn=generate_report, outputs=log_output) | |
with gr.Tab("Settings"): | |
with gr.Row(): | |
commit_input = gr.Textbox(label="Commit", placeholder="Enter commit message", elem_id="commit_input") | |
username_input = gr.Textbox(label="Username", placeholder="Enter your username", elem_id="username_input") | |
metadata_input = gr.Textbox(label="Metadata", placeholder="Enter metadata", elem_id="metadata_input") | |
uptime_text = gr.Textbox(label="Uptime", placeholder="Uptime will appear here...", elem_id="uptime_text", interactive=False) | |
save_settings_button = gr.Button("Save Settings", elem_id="save_settings_button") | |
save_settings_button.click( | |
fn=lambda commit, username, metadata: f"Settings saved: {commit}, {username}, {metadata}", | |
inputs=[commit_input, username_input, metadata_input], | |
outputs=[uptime_text] # Reusing uptime_text for output to show settings saved message | |
) | |
# Update uptime every 10 seconds | |
iface.set_event_handler("load", lambda: uptime_text.update(get_uptime()), every=10) | |
# Run and Push to HuggingFace | |
def run_and_push(): | |
train_dataset() | |
try: | |
api = HfApi() | |
api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True) | |
upload_file( | |
path_or_fileobj="output.csv", | |
path_in_repo="output.csv", | |
repo_id=HF_REPO | |
) | |
logging.info("Dataset pushed to HuggingFace.") | |
except Exception as e: | |
logging.error(f"Error uploading to HuggingFace: {e}") | |
try: | |
# Log the error to a separate errors repo | |
errors_repo = "katsukiai/errors" | |
api.create_repo(repo_id=errors_repo, private=False, exist_ok=True) | |
with open('upload_error.log', 'w') as error_file: | |
error_file.write(f"Error uploading to HuggingFace: {e}\n") | |
upload_file( | |
path_or_fileobj="upload_error.log", | |
path_in_repo="upload_error.log", | |
repo_id=errors_repo | |
) | |
logging.info("Error log pushed to HuggingFace errors repo.") | |
except Exception as e2: | |
logging.error(f"Failed to log error to HuggingFace errors repo: {e2}") | |
if __name__ == "__main__": | |
iface.launch() | |
run_and_push() |