import os import nltk import csv import logging from tqdm import tqdm import gradio as gr from transformers import pipeline from huggingface_hub import HfApi, upload_file, HfFolder # Setup Logging logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Download All NLTK Data nltk.download('all') # Constants HF_REPO = "katsukiai/DeepFocus-X3" TOKENIZER = 'bert-base-uncased' MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-base-v2"] # Add more models as needed # Initialize Models models = {model: pipeline('feature-extraction', model=model) for model in MODELS} # Functions def process_text(text): tokens = nltk.word_tokenize(text) words = list(set(tokens)) means = {} for word in tqdm(words, desc="Processing Words"): word_means = {} for model_name, model in models.items(): try: output = model(word) word_means[model_name] = output[0].mean().item() except Exception as e: logging.error(f"Error processing word {word} with model {model_name}: {e}") word_means[model_name] = None means[word] = word_means return {"tokenizer": tokens, "words": words, "meaning": means} def save_to_csv(data, filename="output.csv"): with open(filename, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"]) writer.writeheader() for word in data['words']: writer.writerow({ "word": word, "meanings": str(data['meaning'][word]) }) def train_dataset(): text = "Your long text goes here..." data = process_text(text) save_to_csv(data) logging.info("Dataset processed and saved to CSV.") def generate_report(): with open('app.log', 'r') as log_file: log_content = log_file.read() return log_content # Gradio Interface def generate_all(text): data = process_text(text) save_to_csv(data) return "Processed data saved to output.csv" # Custom CSS for Tailwind CSS custom_css = """ """ with gr.Blocks(css=custom_css) as iface: gr.Markdown("# DeepFocus-X3") with gr.Tab("Generate All"): with gr.Row(): input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", container=False) output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", container=False) generate_button = gr.Button("Generate", container=False) generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text) with gr.Tab("Logs"): with gr.Row(): log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", container=False) report_button = gr.Button("Report using Logs", container=False) report_button.click(fn=generate_report, outputs=log_output) # Run and Push to HuggingFace def run_and_push(): train_dataset() try: api = HfApi() api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True) upload_file( path_or_fileobj="output.csv", path_in_repo="output.csv", repo_id=HF_REPO ) logging.info("Dataset pushed to HuggingFace.") except Exception as e: logging.error(f"Error uploading to HuggingFace: {e}") try: # Log the error to a separate errors repo errors_repo = "katsukiai/errors" api.create_repo(repo_id=errors_repo, private=False, exist_ok=True) with open('upload_error.log', 'w') as error_file: error_file.write(f"Error uploading to HuggingFace: {e}\n") upload_file( path_or_fileobj="upload_error.log", path_in_repo="upload_error.log", repo_id=errors_repo ) logging.info("Error log pushed to HuggingFace errors repo.") except Exception as e2: logging.error(f"Failed to log error to HuggingFace errors repo: {e2}") if __name__ == "__main__": iface.launch() run_and_push()