DeepFocusTrain / app.py
katsukiai's picture
Update app.py
5eafbe3 verified
raw
history blame
6.02 kB
import os
import nltk
import csv
import logging
from tqdm import tqdm
import gradio as gr
from transformers import pipeline
from huggingface_hub import HfApi, upload_file, HfFolder
import time
# Setup Logging
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Download All NLTK Data
nltk.download('all')
# Constants
HF_REPO = "katsukiai/DeepFocus-X3"
TOKENIZER = 'bert-base-uncased'
MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-v2"] # Add more models as needed
# Initialize Models
models = {model: pipeline('feature-extraction', model=model) for model in MODELS}
# Functions
def process_text(text):
tokens = nltk.word_tokenize(text)
words = list(set(tokens))
means = {}
for word in tqdm(words, desc="Processing Words"):
word_means = {}
for model_name, model in models.items():
try:
output = model(word)
word_means[model_name] = output[0].mean().item()
except Exception as e:
logging.error(f"Error processing word {word} with model {model_name}: {e}")
word_means[model_name] = None
means[word] = word_means
return {"tokenizer": tokens, "words": words, "meaning": means}
def save_to_csv(data, filename="output.csv"):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"])
writer.writeheader()
for word in data['words']:
writer.writerow({
"word": word,
"meanings": str(data['meaning'][word])
})
def train_dataset():
text = "Your long text goes here..."
data = process_text(text)
save_to_csv(data)
logging.info("Dataset processed and saved to CSV.")
def generate_report():
with open('app.log', 'r') as log_file:
log_content = log_file.read()
return log_content
def get_uptime():
uptime = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
return f"Uptime: {uptime}"
# Gradio Interface
def generate_all(text):
data = process_text(text)
save_to_csv(data)
return "Processed data saved to output.csv"
# Custom CSS for Tailwind CSS
custom_css = """
<style>
@import url('https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css');
body {
font-family: 'Arial', sans-serif;
}
#title {
text-align: center;
margin-bottom: 20px;
}
#input_text, #output_text, #log_output {
width: 100%;
max-width: 600px;
margin: 10px 0;
}
#generate_button, #report_button, #save_settings_button {
width: 100%;
max-width: 200px;
margin: 10px 0;
}
#settings_container {
margin-top: 20px;
}
</style>
"""
start_time = time.time()
with gr.Blocks(css=custom_css) as iface:
gr.Markdown("# DeepFocus-X3", elem_id="title")
with gr.Tab("Generate All"):
with gr.Row():
input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", elem_id="input_text")
output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", elem_id="output_text")
generate_button = gr.Button("Generate", elem_id="generate_button")
generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text)
with gr.Tab("Logs"):
with gr.Row():
log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", elem_id="log_output")
report_button = gr.Button("Report using Logs", elem_id="report_button")
report_button.click(fn=generate_report, outputs=log_output)
with gr.Tab("Settings"):
with gr.Row():
commit_input = gr.Textbox(label="Commit", placeholder="Enter commit message", elem_id="commit_input")
username_input = gr.Textbox(label="Username", placeholder="Enter your username", elem_id="username_input")
metadata_input = gr.Textbox(label="Metadata", placeholder="Enter metadata", elem_id="metadata_input")
uptime_text = gr.Textbox(label="Uptime", placeholder="Uptime will appear here...", elem_id="uptime_text", interactive=False)
save_settings_button = gr.Button("Save Settings", elem_id="save_settings_button")
save_settings_button.click(
fn=lambda commit, username, metadata: f"Settings saved: {commit}, {username}, {metadata}",
inputs=[commit_input, username_input, metadata_input],
outputs=[uptime_text] # Reusing uptime_text for output to show settings saved message
)
# Update uptime every 10 seconds
iface.set_event_handler("load", lambda: uptime_text.update(get_uptime()), every=10)
# Run and Push to HuggingFace
def run_and_push():
train_dataset()
try:
api = HfApi()
api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True)
upload_file(
path_or_fileobj="output.csv",
path_in_repo="output.csv",
repo_id=HF_REPO
)
logging.info("Dataset pushed to HuggingFace.")
except Exception as e:
logging.error(f"Error uploading to HuggingFace: {e}")
try:
# Log the error to a separate errors repo
errors_repo = "katsukiai/errors"
api.create_repo(repo_id=errors_repo, private=False, exist_ok=True)
with open('upload_error.log', 'w') as error_file:
error_file.write(f"Error uploading to HuggingFace: {e}\n")
upload_file(
path_or_fileobj="upload_error.log",
path_in_repo="upload_error.log",
repo_id=errors_repo
)
logging.info("Error log pushed to HuggingFace errors repo.")
except Exception as e2:
logging.error(f"Failed to log error to HuggingFace errors repo: {e2}")
if __name__ == "__main__":
iface.launch()
run_and_push()