Spaces:

0xrushi
/

MasoodishWisdom

Sleeping

App Files Files Community

0xrushi commited on Apr 23

Commit

06bc80f

0 Parent(s):

Initial commit

Browse files

Files changed (27) hide show

.gitattributes +36 -0
.gitignore +46 -0
README.md +34 -0
checkpoints/epoch-10/README.md +202 -0
checkpoints/epoch-10/adapter_config.json +36 -0
checkpoints/epoch-10/adapter_model.safetensors +3 -0
checkpoints/epoch-10/quotes_epoch_10.txt +27 -0
checkpoints/epoch-10/special_tokens_map.json +23 -0
checkpoints/epoch-10/tokenizer.json +0 -0
checkpoints/epoch-10/tokenizer.model +3 -0
checkpoints/epoch-10/tokenizer_config.json +0 -0
checkpoints/epoch-11/README.md +202 -0
checkpoints/epoch-11/adapter_config.json +36 -0
checkpoints/epoch-11/adapter_model.safetensors +3 -0
checkpoints/epoch-11/quotes_epoch_11.txt +30 -0
checkpoints/epoch-11/special_tokens_map.json +23 -0
checkpoints/epoch-11/tokenizer.json +0 -0
checkpoints/epoch-11/tokenizer.model +3 -0
checkpoints/epoch-11/tokenizer_config.json +0 -0
data/instrumental.wav +3 -0
data/ref_weights.pkl +3 -0
infer/examples/basic/basic.toml +11 -0
requirements.txt +0 -0
scripts/f5py.py +230 -0
scripts/generate_quote_gradio.py +106 -0
scripts/ref_utils.py +54 -0
scripts/stitch.py +45 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,46 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+env/
+ENV/
+.env
+.venv
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+# Project specific
+output/
+tests/
+checkpoints/
+*.log
+wandb
+data/training_data
+code.py
+main.py

README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+---
+title: MasoodishWisdom
+emoji: 🤖
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: "4.19.2"
+app_file: scripts/generate_quote_gradio.py
+pinned: false
+---
+# MasoodWisdom
+An AI-powered wisdom sharing platform built with Gradio and Hugging Face Spaces.
+## Setup
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+```bash
+python scripts/generate_quote_gradio.py
+```
+## Configuration
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## License
+MIT License

checkpoints/epoch-10/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: mistralai/Mistral-7B-Instruct-v0.3
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

checkpoints/epoch-10/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoints/epoch-10/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e8f1e42f01ddab093e73c66b528145d5bb5b6d19742236caaa6607a3e25acc1
+size 27297032

checkpoints/epoch-10/quotes_epoch_10.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+💭 Sample 1:
+Generate a Masood Boomgaard style quote:
+There are people who can tell you the exact day of the week and
+month of any date in history.
+And then there are the people who could tell you...
+to never bother with learning that sort of frivolous shit,
+because not only does it not impress the opposite
+💭 Sample 2:
+Generate a Masood Boomgaard style quote:
+If you are going to work on a Saturday,
+You might as well also work on a Sunday.
+That way, you might as well not work
+Any other day of the week.
+Just chill the fuck
+Right out.
+💭 Sample 3:
+Generate a Masood Boomgaard style quote:
+Why settle for average when you can consistently be average?
+More and more companies are selling the idea of
+'Being exceptional',
+'Reaching your potential'
+And 'Living your best life'.
+This is all a vast selling project.
+What these concepts fail to clearly

checkpoints/epoch-10/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/epoch-10/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/epoch-10/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
+size 587404

checkpoints/epoch-10/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/epoch-11/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: mistralai/Mistral-7B-Instruct-v0.3
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

checkpoints/epoch-11/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoints/epoch-11/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b91b1cf87d31816052bb1b23e1ef35283aec9cb12304ac133b7b2c84993aba6
+size 27297032

checkpoints/epoch-11/quotes_epoch_11.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+💭 Sample 1:
+Generate a Masood Boomgaard style quote:
+Stop looking for the perfect place
+To start over.
+Every place has its problems,
+Even your place.
+Just fucking deal with it.
+You are going to find the peace you seek,
+And the place where you find it is irrelevant.
+💭 Sample 2:
+Generate a Masood Boomgaard style quote:
+Earth's problems are caused by humankind wanting what
+Others want.
+This could only get resolved if nobody wanted anything.
+So, do not be impressed by the upcoming release of the new
+Apple products.
+Do nothing.
+Just walk away.
+💭 Sample 3:
+Generate a Masood Boomgaard style quote:
+Saying sorry is important.
+But saying sorry is not as important as noticing how it happened in the
+First place.
+Grant yourself an apology tab.
+It might be large.
+If you are apology tab is high,
+you are like a CEO with

checkpoints/epoch-11/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/epoch-11/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/epoch-11/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
+size 587404

checkpoints/epoch-11/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/instrumental.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eea4a4a2fb91ee6e139724860b2bca597a9187ed8e09be60e46de82266aef34
+size 5097476

data/ref_weights.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76513fe3c720861d8c165113a4844336c957422d3c967e9a3e5300d1a1293bfe
+size 126

infer/examples/basic/basic.toml ADDED Viewed

	@@ -0,0 +1,11 @@

+# F5TTS_v1_Base | E2TTS_Base
+model = "F5TTS_v1_Base"
+ref_audio = "infer/examples/basic/basic_ref_en.wav"
+# If an empty "", transcribes the reference audio automatically.
+ref_text = "Some call me nature, others call me mother nature."
+gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
+# File with text to generate. Ignores the text above.
+gen_file = ""
+remove_silence = false
+output_dir = "tests"
+output_file = "infer_cli_basic.wav"

requirements.txt ADDED Viewed

Binary file (6.11 kB). View file

scripts/f5py.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import codecs
+import os
+import re
+from datetime import datetime
+from importlib.resources import files
+from pathlib import Path
+import numpy as np
+import soundfile as sf
+import tomli
+from cached_path import cached_path
+from hydra.utils import get_class
+from omegaconf import OmegaConf
+from f5_tts.infer.utils_infer import (
+    mel_spec_type,
+    target_rms,
+    cross_fade_duration,
+    nfe_step,
+    cfg_strength,
+    sway_sampling_coef,
+    speed,
+    fix_duration,
+    device,
+    infer_process,
+    load_model,
+    load_vocoder,
+    remove_silence_for_generated_wav,
+)
+from ref_utils import load_ref_weights
+# ── USER CONFIG ────────────────────────────────────────────────────────────────
+config_path    = "infer/examples/basic/basic.toml"
+model          = "F5TTS_v1_Base"
+model_cfg_path = None  # e.g. "path/to/your/model.yaml", or leave None to use default from config
+ckpt_file      = ""    # leave blank to pull from HF cache
+vocab_file     = ""    # leave blank to use default
+ref_text       = (
+    "Fuck your phone. Stop texting all the time. "
+    "Look up from your phone and breathe. Release yourself."
+)
+gen_text       = (
+    "I am not feeling it. This is it. There is no reconceptualizing."
+)
+gen_file       = ""    # if set, will override gen_text by loading from this file
+output_dir     = "tests"
+output_file    = f"infer_cli_{datetime.now():%Y%m%d_%H%M%S}.wav"
+save_chunk     = False
+remove_silence = False
+load_vocoder_from_local = False
+vocoder_name   = None  # "vocos" or "bigvgan" or None to use default from config
+# ────────────────────────────────────────────────────────────────────────────────
+# load config
+config = tomli.load(open(config_path, "rb"))
+# resolve parameters (fall back to config defaults where applicable)
+model_cfg_path = model_cfg_path or config.get("model_cfg", None)
+ckpt_file      = ckpt_file      or config.get("ckpt_file", "")
+vocab_file     = vocab_file     or config.get("vocab_file", "")
+gen_file       = gen_file       or config.get("gen_file", "")
+save_chunk     = save_chunk     or config.get("save_chunk", False)
+remove_silence = remove_silence or config.get("remove_silence", False)
+load_vocoder_from_local = load_vocoder_from_local or config.get("load_vocoder_from_local", False)
+vocoder_name   = vocoder_name   or config.get("vocoder_name", mel_spec_type)
+target_rms     = config.get("target_rms", target_rms)
+cross_fade_duration = config.get("cross_fade_duration", cross_fade_duration)
+nfe_step       = config.get("nfe_step", nfe_step)
+cfg_strength   = config.get("cfg_strength", cfg_strength)
+sway_sampling_coef = config.get("sway_sampling_coef", sway_sampling_coef)
+speed          = config.get("speed", speed)
+fix_duration   = config.get("fix_duration", fix_duration)
+device         = config.get("device", device)
+# if user pointed at example paths inside the package, fix them
+# if "infer/examples/" in ref_audio:
+#     ref_audio = str(files("f5_tts").joinpath(ref_audio))
+# if gen_file and "infer/examples/" in gen_file:
+#     gen_file = str(files("f5_tts").joinpath(gen_file))
+# if "voices" in config:
+#     for v in config["voices"].values():
+#         if "infer/examples/" in v.get("ref_audio", ""):
+#             v["ref_audio"] = str(files("f5_tts").joinpath(v["ref_audio"]))
+# if using a gen_file, load its text
+if gen_file:
+    gen_text = codecs.open(gen_file, "r", "utf-8").read()
+# prepare output paths
+wave_path = Path(output_dir) / output_file
+if save_chunk:
+    chunk_dir = Path(output_dir) / f"{wave_path.stem}_chunks"
+    chunk_dir.mkdir(parents=True, exist_ok=True)
+# load vocoder
+if vocoder_name == "vocos":
+    vocoder_local_path = "../checkpoints/vocos-mel-24khz"
+elif vocoder_name == "bigvgan":
+    vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
+else:
+    vocoder_local_path = None
+vocoder = load_vocoder(
+    vocoder_name=vocoder_name,
+    is_local=load_vocoder_from_local,
+    local_path=vocoder_local_path,
+    device=device,
+)
+# load TTS model
+model_cfg = OmegaConf.load(
+    model_cfg_path
+    or str(files("f5_tts").joinpath(f"configs/{model}.yaml"))
+)
+ModelClass = get_class(f"f5_tts.model.{model_cfg.model.backbone}")
+mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
+repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
+if model == "F5TTS_Base":
+    if vocoder_name == "vocos":
+        ckpt_step = 1200000
+    else:
+        model = "F5TTS_Base_bigvgan"
+        ckpt_type = "pt"
+elif model == "E2TTS_Base":
+    repo_name, ckpt_step = "E2-TTS", 1200000
+if not ckpt_file:
+    ckpt_file = str(
+        cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}")
+    )
+print(f"Loading model {model} checkpoint…")
+ema_model = load_model(
+    ModelClass,
+    model_cfg.model.arch,
+    ckpt_file,
+    mel_spec_type=vocoder_name,
+    vocab_file=vocab_file,
+    device=device,
+)
+def generate_tts(input_text, output_dir="tests", output_file=None, ref_text=None):
+    """
+    Generate text-to-speech audio from input text.
+    Args:
+        input_text (str): Text to convert to speech
+        output_dir (str): Directory to save the output file (default: "tests")
+        output_file (str): Output filename (default: auto-generated based on timestamp)
+        ref_text (str): Reference text (default: predefined text)
+    Returns:
+        str: Path to the generated audio file
+    """
+    if ref_text is None:
+        ref_text = (
+            "Fuck your phone. Stop texting all the time. "
+            "Look up from your phone and breathe. Release yourself."
+        )
+    gen_text = input_text
+    if output_file is None:
+        output_file = f"infer_cli_{datetime.now():%Y%m%d_%H%M%S}.wav"
+    # load preprocessed reference weights
+    base_dir = os.path.dirname(os.path.dirname(__file__))
+    pkl_path = os.path.join(base_dir, "data", "ref_weights.pkl")
+    voices = load_ref_weights(pkl_path)
+    # break text into per‑voice chunks
+    reg1 = r"(?=\[\w+\])"
+    reg2 = r"\[(\w+)\]"
+    chunks = re.split(reg1, gen_text)
+    segments = []
+    for chunk in chunks:
+        txt = chunk.strip()
+        if not txt:
+            continue
+        m = re.match(reg2, txt)
+        if m:
+            voice = m.group(1)
+            txt = re.sub(reg2, "", txt).strip()
+        else:
+            voice = "main"
+        if voice not in voices:
+            print(f"Unknown voice '{voice}', using main.")
+            voice = "main"
+        seg, sr, _ = infer_process(
+            voices[voice]["ref_audio"],
+            voices[voice]["ref_text"],
+            txt,
+            ema_model,
+            vocoder,
+            mel_spec_type=vocoder_name,
+            target_rms=target_rms,
+            cross_fade_duration=cross_fade_duration,
+            nfe_step=nfe_step,
+            cfg_strength=cfg_strength,
+            sway_sampling_coef=sway_sampling_coef,
+            speed=speed,
+            fix_duration=fix_duration,
+            device=device,
+        )
+        segments.append(seg)
+        if save_chunk:
+            name = txt[:200].replace(" ", "_")
+            sf.write(str(chunk_dir / f"{len(segments)-1}_{name}.wav"), seg, sr)
+    # concatenate and write
+    final = np.concatenate(segments) if segments else np.array([], dtype=np.float32)
+    os.makedirs(output_dir, exist_ok=True)
+    wave_path = Path(output_dir) / output_file
+    sf.write(str(wave_path), final, sr)
+    if remove_silence:
+        remove_silence_for_generated_wav(str(wave_path))
+    print(f"Written output to {wave_path}")
+    return str(wave_path)
+if __name__ == "__main__":
+    test_text = "This is a test of the TTS system."
+    generated_file = generate_tts(test_text)
+    print(f"Generated file: {generated_file}")

scripts/generate_quote_gradio.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from huggingface_hub import login
+import os
+token = os.environ.get("HUGGINGFACE_TOKEN")
+login(token)
+import gradio as gr
+import spaces
+from peft import PeftModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+from f5py import generate_tts
+from stitch import create_music_speech_mix
+import traceback
+import warnings
+# Suppress NVML initialization warning
+warnings.filterwarnings("ignore", message="Can't initialize NVML")
+@spaces.GPU()
+def generate_quote(temperature, top_p, max_length):
+    try:
+        def initialize_model():
+            adapter_path = "./checkpoints/epoch-11"
+            base_model = "mistralai/Mistral-7B-Instruct-v0.3"
+            # Check CUDA availability more thoroughly
+            device = "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+            print(f"Using device: {device}")
+            tokenizer = AutoTokenizer.from_pretrained(base_model)
+            model = AutoModelForCausalLM.from_pretrained(
+                base_model,
+                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                device_map="auto" if device == "cuda" else None
+            )
+            model = PeftModel.from_pretrained(model, adapter_path)
+            model.eval()
+            return pipeline("text-generation", model=model, tokenizer=tokenizer)
+        generator = initialize_model()
+        prompt = "Generate a Masood Boomgaard style quote:"
+        output = generator(
+            prompt,
+            max_new_tokens=max_length,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=50,
+            num_return_sequences=1
+        )
+        text = output[0]["generated_text"].replace(prompt, "")
+        output_path = generate_tts(input_text=text)
+        final_audio_path = create_music_speech_mix(speech_path=output_path)
+        return text, final_audio_path, None
+    except Exception as e:
+        error_msg = f"Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+        return None, None, error_msg
+with gr.Blocks() as demo:
+    gr.Markdown("# MasoodishWisdom")
+    with gr.Row():
+        with gr.Column():
+            temperature = gr.Slider(
+                minimum=0.1, maximum=1.0, step=0.1, value=0.9,
+                label="Temperature"
+            )
+            top_p = gr.Slider(
+                minimum=0.1, maximum=1.0, step=0.05, value=0.95,
+                label="Top-p"
+            )
+            max_length = gr.Slider(
+                minimum=50, maximum=200, step=10, value=100,
+                label="Max Length"
+            )
+            generate_btn = gr.Button("Generate Quote")
+        with gr.Column():
+            text_output = gr.Textbox(label="Generated Quote")
+            audio_output = gr.Audio(label="Generated Audio")
+            error_output = gr.Textbox(label="Error Log", visible=True)
+    def handle_generation(*args):
+        text, audio, error = generate_quote(*args)
+        if error:
+            return [None, None, error]
+        return [text, audio, None]
+    generate_btn.click(
+        handle_generation,
+        inputs=[temperature, top_p, max_length],
+        outputs=[text_output, audio_output, error_output]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        show_error=True,
+        share=False
+    )

scripts/ref_utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import tomli
+import pickle
+import os
+from importlib.resources import files
+from f5_tts.infer.utils_infer import preprocess_ref_audio_text
+def load_ref_weights(pkl_path="ref_weights.pkl"):
+    """
+    Load and return a dict of voices -> {"ref_audio", "ref_text"}.
+    Args:
+        pkl_path (str): Path to the pickle file.
+    Returns:
+        dict: Mapping voice names to preprocessed refs.
+    """
+    if not os.path.isfile(pkl_path):
+        raise FileNotFoundError(f"Ref weights pickle not found at {pkl_path}. Please run ref_utils.py first.")
+    with open(pkl_path, "rb") as f:
+        return pickle.load(f)
+def build_ref_weights(config_path="infer/examples/basic/basic.toml", output_pkl="data/ref_weights.pkl"):
+    with open(config_path, "rb") as f:
+        config = tomli.load(f)
+    def fix_path(path):
+        if "infer/examples/" in path:
+            return str(files("f5_tts").joinpath(path))
+        return path
+    ref_audio = fix_path(config.get("ref_audio", "data/15sec.wav"))
+    ref_text = config.get("ref_text")
+    main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
+    voices = {"main": main_voice}
+    if "voices" in config:
+        for name, v in config["voices"].items():
+            voices[name] = {
+                "ref_audio": fix_path(v.get("ref_audio")),
+                "ref_text": v.get("ref_text"),
+            }
+    for v in voices.values():
+        v["ref_audio"], v["ref_text"] = preprocess_ref_audio_text(
+            v["ref_audio"], v["ref_text"]
+        )
+    with open(output_pkl, "wb") as f:
+        pickle.dump(voices, f)
+    print(f"Saved {output_pkl}")
+if __name__ == "__main__":
+    build_ref_weights()

scripts/stitch.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from pydub import AudioSegment
+import random
+def create_music_speech_mix(speech_path, music_path="data/instrumental.wav", output_path="data/output.wav"):
+    """
+    Combine speech audio with background music at random position.
+    Args:
+        speech_path (str): Path to speech WAV file
+        music_path (str): Path to music WAV file (default: data/instrumental.wav)
+        output_path (str): Path for output WAV file (default: output.wav)
+    Returns:
+        tuple: (start_time_seconds, end_time_seconds)
+    """
+    speech = AudioSegment.from_wav(speech_path)
+    music = AudioSegment.from_wav(music_path)
+    # Durations (in milliseconds)
+    speech_len = len(speech)
+    music_len = len(music)
+    if speech_len > music_len:
+        raise ValueError("Speech audio is longer than background music!")
+    # Choose a random start point
+    max_start = music_len - speech_len
+    start_ms = random.randint(0, max_start)
+    # Extract the music segment
+    music_segment = music[start_ms : start_ms + speech_len]
+    # Lower volume by 10db
+    # music_segment = music_segment - 10
+    # Overlay speech on music
+    combined = music_segment.overlay(speech)
+    combined.export(output_path, format="wav")
+    return output_path
+if __name__ == "__main__":
+    output_path = create_music_speech_mix("tests/infer_cli_basic.wav")
+    print(f"Created {output_path} using music")