0xrushi commited on
Commit
06bc80f
·
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .env
28
+ .venv
29
+
30
+ # IDE
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+ .DS_Store
36
+
37
+ # Project specific
38
+ output/
39
+ tests/
40
+ checkpoints/
41
+ *.log
42
+ wandb
43
+
44
+ data/training_data
45
+ code.py
46
+ main.py
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MasoodishWisdom
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: "4.19.2"
8
+ app_file: scripts/generate_quote_gradio.py
9
+ pinned: false
10
+ ---
11
+
12
+ # MasoodWisdom
13
+
14
+ An AI-powered wisdom sharing platform built with Gradio and Hugging Face Spaces.
15
+
16
+ ## Setup
17
+
18
+ 1. Install dependencies:
19
+ ```bash
20
+ pip install -r requirements.txt
21
+ ```
22
+
23
+ 2. Run the application:
24
+ ```bash
25
+ python scripts/generate_quote_gradio.py
26
+ ```
27
+
28
+ ## Configuration
29
+
30
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
31
+
32
+ ## License
33
+
34
+ MIT License
checkpoints/epoch-10/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.3
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.15.2
checkpoints/epoch-10/adapter_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "o_proj",
28
+ "q_proj",
29
+ "k_proj",
30
+ "v_proj"
31
+ ],
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_rslora": false
36
+ }
checkpoints/epoch-10/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e8f1e42f01ddab093e73c66b528145d5bb5b6d19742236caaa6607a3e25acc1
3
+ size 27297032
checkpoints/epoch-10/quotes_epoch_10.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 💭 Sample 1:
2
+ Generate a Masood Boomgaard style quote:
3
+ There are people who can tell you the exact day of the week and
4
+ month of any date in history.
5
+ And then there are the people who could tell you...
6
+ to never bother with learning that sort of frivolous shit,
7
+ because not only does it not impress the opposite
8
+
9
+ 💭 Sample 2:
10
+ Generate a Masood Boomgaard style quote:
11
+ If you are going to work on a Saturday,
12
+ You might as well also work on a Sunday.
13
+ That way, you might as well not work
14
+ Any other day of the week.
15
+ Just chill the fuck
16
+ Right out.
17
+
18
+ 💭 Sample 3:
19
+ Generate a Masood Boomgaard style quote:
20
+ Why settle for average when you can consistently be average?
21
+ More and more companies are selling the idea of
22
+ 'Being exceptional',
23
+ 'Reaching your potential'
24
+ And 'Living your best life'.
25
+ This is all a vast selling project.
26
+ What these concepts fail to clearly
27
+
checkpoints/epoch-10/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoints/epoch-10/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/epoch-10/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
checkpoints/epoch-10/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/epoch-11/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.3
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.15.2
checkpoints/epoch-11/adapter_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "o_proj",
28
+ "q_proj",
29
+ "k_proj",
30
+ "v_proj"
31
+ ],
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_rslora": false
36
+ }
checkpoints/epoch-11/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b91b1cf87d31816052bb1b23e1ef35283aec9cb12304ac133b7b2c84993aba6
3
+ size 27297032
checkpoints/epoch-11/quotes_epoch_11.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 💭 Sample 1:
2
+ Generate a Masood Boomgaard style quote:
3
+ Stop looking for the perfect place
4
+ To start over.
5
+ Every place has its problems,
6
+ Even your place.
7
+ Just fucking deal with it.
8
+ You are going to find the peace you seek,
9
+ And the place where you find it is irrelevant.
10
+
11
+ 💭 Sample 2:
12
+ Generate a Masood Boomgaard style quote:
13
+ Earth's problems are caused by humankind wanting what
14
+ Others want.
15
+ This could only get resolved if nobody wanted anything.
16
+ So, do not be impressed by the upcoming release of the new
17
+ Apple products.
18
+ Do nothing.
19
+ Just walk away.
20
+
21
+ 💭 Sample 3:
22
+ Generate a Masood Boomgaard style quote:
23
+ Saying sorry is important.
24
+ But saying sorry is not as important as noticing how it happened in the
25
+ First place.
26
+ Grant yourself an apology tab.
27
+ It might be large.
28
+ If you are apology tab is high,
29
+ you are like a CEO with
30
+
checkpoints/epoch-11/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoints/epoch-11/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/epoch-11/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
checkpoints/epoch-11/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
data/instrumental.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eea4a4a2fb91ee6e139724860b2bca597a9187ed8e09be60e46de82266aef34
3
+ size 5097476
data/ref_weights.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76513fe3c720861d8c165113a4844336c957422d3c967e9a3e5300d1a1293bfe
3
+ size 126
infer/examples/basic/basic.toml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # F5TTS_v1_Base | E2TTS_Base
2
+ model = "F5TTS_v1_Base"
3
+ ref_audio = "infer/examples/basic/basic_ref_en.wav"
4
+ # If an empty "", transcribes the reference audio automatically.
5
+ ref_text = "Some call me nature, others call me mother nature."
6
+ gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
7
+ # File with text to generate. Ignores the text above.
8
+ gen_file = ""
9
+ remove_silence = false
10
+ output_dir = "tests"
11
+ output_file = "infer_cli_basic.wav"
requirements.txt ADDED
Binary file (6.11 kB). View file
 
scripts/f5py.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import codecs
2
+ import os
3
+ import re
4
+ from datetime import datetime
5
+ from importlib.resources import files
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+ import soundfile as sf
10
+ import tomli
11
+ from cached_path import cached_path
12
+ from hydra.utils import get_class
13
+ from omegaconf import OmegaConf
14
+
15
+ from f5_tts.infer.utils_infer import (
16
+ mel_spec_type,
17
+ target_rms,
18
+ cross_fade_duration,
19
+ nfe_step,
20
+ cfg_strength,
21
+ sway_sampling_coef,
22
+ speed,
23
+ fix_duration,
24
+ device,
25
+ infer_process,
26
+ load_model,
27
+ load_vocoder,
28
+ remove_silence_for_generated_wav,
29
+ )
30
+ from ref_utils import load_ref_weights
31
+
32
+ # ── USER CONFIG ────────────────────────────────────────────────────────────────
33
+ config_path = "infer/examples/basic/basic.toml"
34
+ model = "F5TTS_v1_Base"
35
+ model_cfg_path = None # e.g. "path/to/your/model.yaml", or leave None to use default from config
36
+ ckpt_file = "" # leave blank to pull from HF cache
37
+ vocab_file = "" # leave blank to use default
38
+ ref_text = (
39
+ "Fuck your phone. Stop texting all the time. "
40
+ "Look up from your phone and breathe. Release yourself."
41
+ )
42
+ gen_text = (
43
+ "I am not feeling it. This is it. There is no reconceptualizing."
44
+ )
45
+ gen_file = "" # if set, will override gen_text by loading from this file
46
+ output_dir = "tests"
47
+ output_file = f"infer_cli_{datetime.now():%Y%m%d_%H%M%S}.wav"
48
+ save_chunk = False
49
+ remove_silence = False
50
+ load_vocoder_from_local = False
51
+ vocoder_name = None # "vocos" or "bigvgan" or None to use default from config
52
+ # ────────────────────────────────────────────────────────────────────────────────
53
+
54
+ # load config
55
+ config = tomli.load(open(config_path, "rb"))
56
+
57
+ # resolve parameters (fall back to config defaults where applicable)
58
+ model_cfg_path = model_cfg_path or config.get("model_cfg", None)
59
+ ckpt_file = ckpt_file or config.get("ckpt_file", "")
60
+ vocab_file = vocab_file or config.get("vocab_file", "")
61
+ gen_file = gen_file or config.get("gen_file", "")
62
+ save_chunk = save_chunk or config.get("save_chunk", False)
63
+ remove_silence = remove_silence or config.get("remove_silence", False)
64
+ load_vocoder_from_local = load_vocoder_from_local or config.get("load_vocoder_from_local", False)
65
+
66
+ vocoder_name = vocoder_name or config.get("vocoder_name", mel_spec_type)
67
+ target_rms = config.get("target_rms", target_rms)
68
+ cross_fade_duration = config.get("cross_fade_duration", cross_fade_duration)
69
+ nfe_step = config.get("nfe_step", nfe_step)
70
+ cfg_strength = config.get("cfg_strength", cfg_strength)
71
+ sway_sampling_coef = config.get("sway_sampling_coef", sway_sampling_coef)
72
+ speed = config.get("speed", speed)
73
+ fix_duration = config.get("fix_duration", fix_duration)
74
+ device = config.get("device", device)
75
+
76
+ # if user pointed at example paths inside the package, fix them
77
+ # if "infer/examples/" in ref_audio:
78
+ # ref_audio = str(files("f5_tts").joinpath(ref_audio))
79
+ # if gen_file and "infer/examples/" in gen_file:
80
+ # gen_file = str(files("f5_tts").joinpath(gen_file))
81
+ # if "voices" in config:
82
+ # for v in config["voices"].values():
83
+ # if "infer/examples/" in v.get("ref_audio", ""):
84
+ # v["ref_audio"] = str(files("f5_tts").joinpath(v["ref_audio"]))
85
+
86
+ # if using a gen_file, load its text
87
+ if gen_file:
88
+ gen_text = codecs.open(gen_file, "r", "utf-8").read()
89
+
90
+ # prepare output paths
91
+ wave_path = Path(output_dir) / output_file
92
+ if save_chunk:
93
+ chunk_dir = Path(output_dir) / f"{wave_path.stem}_chunks"
94
+ chunk_dir.mkdir(parents=True, exist_ok=True)
95
+
96
+ # load vocoder
97
+ if vocoder_name == "vocos":
98
+ vocoder_local_path = "../checkpoints/vocos-mel-24khz"
99
+ elif vocoder_name == "bigvgan":
100
+ vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
101
+ else:
102
+ vocoder_local_path = None
103
+
104
+ vocoder = load_vocoder(
105
+ vocoder_name=vocoder_name,
106
+ is_local=load_vocoder_from_local,
107
+ local_path=vocoder_local_path,
108
+ device=device,
109
+ )
110
+
111
+ # load TTS model
112
+ model_cfg = OmegaConf.load(
113
+ model_cfg_path
114
+ or str(files("f5_tts").joinpath(f"configs/{model}.yaml"))
115
+ )
116
+ ModelClass = get_class(f"f5_tts.model.{model_cfg.model.backbone}")
117
+ mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
118
+
119
+ repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
120
+ if model == "F5TTS_Base":
121
+ if vocoder_name == "vocos":
122
+ ckpt_step = 1200000
123
+ else:
124
+ model = "F5TTS_Base_bigvgan"
125
+ ckpt_type = "pt"
126
+ elif model == "E2TTS_Base":
127
+ repo_name, ckpt_step = "E2-TTS", 1200000
128
+
129
+ if not ckpt_file:
130
+ ckpt_file = str(
131
+ cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}")
132
+ )
133
+
134
+ print(f"Loading model {model} checkpoint…")
135
+ ema_model = load_model(
136
+ ModelClass,
137
+ model_cfg.model.arch,
138
+ ckpt_file,
139
+ mel_spec_type=vocoder_name,
140
+ vocab_file=vocab_file,
141
+ device=device,
142
+ )
143
+
144
+
145
+ def generate_tts(input_text, output_dir="tests", output_file=None, ref_text=None):
146
+ """
147
+ Generate text-to-speech audio from input text.
148
+
149
+ Args:
150
+ input_text (str): Text to convert to speech
151
+ output_dir (str): Directory to save the output file (default: "tests")
152
+ output_file (str): Output filename (default: auto-generated based on timestamp)
153
+ ref_text (str): Reference text (default: predefined text)
154
+
155
+ Returns:
156
+ str: Path to the generated audio file
157
+ """
158
+ if ref_text is None:
159
+ ref_text = (
160
+ "Fuck your phone. Stop texting all the time. "
161
+ "Look up from your phone and breathe. Release yourself."
162
+ )
163
+
164
+ gen_text = input_text
165
+
166
+ if output_file is None:
167
+ output_file = f"infer_cli_{datetime.now():%Y%m%d_%H%M%S}.wav"
168
+
169
+ # load preprocessed reference weights
170
+ base_dir = os.path.dirname(os.path.dirname(__file__))
171
+ pkl_path = os.path.join(base_dir, "data", "ref_weights.pkl")
172
+ voices = load_ref_weights(pkl_path)
173
+
174
+ # break text into per‑voice chunks
175
+ reg1 = r"(?=\[\w+\])"
176
+ reg2 = r"\[(\w+)\]"
177
+ chunks = re.split(reg1, gen_text)
178
+
179
+ segments = []
180
+ for chunk in chunks:
181
+ txt = chunk.strip()
182
+ if not txt:
183
+ continue
184
+ m = re.match(reg2, txt)
185
+ if m:
186
+ voice = m.group(1)
187
+ txt = re.sub(reg2, "", txt).strip()
188
+ else:
189
+ voice = "main"
190
+
191
+ if voice not in voices:
192
+ print(f"Unknown voice '{voice}', using main.")
193
+ voice = "main"
194
+
195
+ seg, sr, _ = infer_process(
196
+ voices[voice]["ref_audio"],
197
+ voices[voice]["ref_text"],
198
+ txt,
199
+ ema_model,
200
+ vocoder,
201
+ mel_spec_type=vocoder_name,
202
+ target_rms=target_rms,
203
+ cross_fade_duration=cross_fade_duration,
204
+ nfe_step=nfe_step,
205
+ cfg_strength=cfg_strength,
206
+ sway_sampling_coef=sway_sampling_coef,
207
+ speed=speed,
208
+ fix_duration=fix_duration,
209
+ device=device,
210
+ )
211
+ segments.append(seg)
212
+
213
+ if save_chunk:
214
+ name = txt[:200].replace(" ", "_")
215
+ sf.write(str(chunk_dir / f"{len(segments)-1}_{name}.wav"), seg, sr)
216
+
217
+ # concatenate and write
218
+ final = np.concatenate(segments) if segments else np.array([], dtype=np.float32)
219
+ os.makedirs(output_dir, exist_ok=True)
220
+ wave_path = Path(output_dir) / output_file
221
+ sf.write(str(wave_path), final, sr)
222
+ if remove_silence:
223
+ remove_silence_for_generated_wav(str(wave_path))
224
+ print(f"Written output to {wave_path}")
225
+ return str(wave_path)
226
+
227
+ if __name__ == "__main__":
228
+ test_text = "This is a test of the TTS system."
229
+ generated_file = generate_tts(test_text)
230
+ print(f"Generated file: {generated_file}")
scripts/generate_quote_gradio.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import login
2
+ import os
3
+
4
+ token = os.environ.get("HUGGINGFACE_TOKEN")
5
+ login(token)
6
+
7
+ import gradio as gr
8
+ import spaces
9
+ from peft import PeftModel
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
11
+ import torch
12
+ from f5py import generate_tts
13
+ from stitch import create_music_speech_mix
14
+ import traceback
15
+ import warnings
16
+
17
+ # Suppress NVML initialization warning
18
+ warnings.filterwarnings("ignore", message="Can't initialize NVML")
19
+
20
+ @spaces.GPU()
21
+ def generate_quote(temperature, top_p, max_length):
22
+ try:
23
+ def initialize_model():
24
+ adapter_path = "./checkpoints/epoch-11"
25
+ base_model = "mistralai/Mistral-7B-Instruct-v0.3"
26
+
27
+ # Check CUDA availability more thoroughly
28
+ device = "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
29
+ print(f"Using device: {device}")
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ base_model,
34
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
35
+ device_map="auto" if device == "cuda" else None
36
+ )
37
+
38
+ model = PeftModel.from_pretrained(model, adapter_path)
39
+ model.eval()
40
+
41
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
42
+
43
+ generator = initialize_model()
44
+ prompt = "Generate a Masood Boomgaard style quote:"
45
+
46
+ output = generator(
47
+ prompt,
48
+ max_new_tokens=max_length,
49
+ do_sample=True,
50
+ temperature=temperature,
51
+ top_p=top_p,
52
+ top_k=50,
53
+ num_return_sequences=1
54
+ )
55
+
56
+ text = output[0]["generated_text"].replace(prompt, "")
57
+ output_path = generate_tts(input_text=text)
58
+ final_audio_path = create_music_speech_mix(speech_path=output_path)
59
+
60
+ return text, final_audio_path, None
61
+ except Exception as e:
62
+ error_msg = f"Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
63
+ return None, None, error_msg
64
+
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("# MasoodishWisdom")
67
+
68
+ with gr.Row():
69
+ with gr.Column():
70
+ temperature = gr.Slider(
71
+ minimum=0.1, maximum=1.0, step=0.1, value=0.9,
72
+ label="Temperature"
73
+ )
74
+ top_p = gr.Slider(
75
+ minimum=0.1, maximum=1.0, step=0.05, value=0.95,
76
+ label="Top-p"
77
+ )
78
+ max_length = gr.Slider(
79
+ minimum=50, maximum=200, step=10, value=100,
80
+ label="Max Length"
81
+ )
82
+ generate_btn = gr.Button("Generate Quote")
83
+
84
+ with gr.Column():
85
+ text_output = gr.Textbox(label="Generated Quote")
86
+ audio_output = gr.Audio(label="Generated Audio")
87
+ error_output = gr.Textbox(label="Error Log", visible=True)
88
+
89
+ def handle_generation(*args):
90
+ text, audio, error = generate_quote(*args)
91
+ if error:
92
+ return [None, None, error]
93
+ return [text, audio, None]
94
+
95
+ generate_btn.click(
96
+ handle_generation,
97
+ inputs=[temperature, top_p, max_length],
98
+ outputs=[text_output, audio_output, error_output]
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch(
103
+ server_name="0.0.0.0",
104
+ show_error=True,
105
+ share=False
106
+ )
scripts/ref_utils.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tomli
2
+ import pickle
3
+ import os
4
+ from importlib.resources import files
5
+ from f5_tts.infer.utils_infer import preprocess_ref_audio_text
6
+
7
+ def load_ref_weights(pkl_path="ref_weights.pkl"):
8
+ """
9
+ Load and return a dict of voices -> {"ref_audio", "ref_text"}.
10
+
11
+ Args:
12
+ pkl_path (str): Path to the pickle file.
13
+
14
+ Returns:
15
+ dict: Mapping voice names to preprocessed refs.
16
+ """
17
+
18
+ if not os.path.isfile(pkl_path):
19
+ raise FileNotFoundError(f"Ref weights pickle not found at {pkl_path}. Please run ref_utils.py first.")
20
+ with open(pkl_path, "rb") as f:
21
+ return pickle.load(f)
22
+
23
+ def build_ref_weights(config_path="infer/examples/basic/basic.toml", output_pkl="data/ref_weights.pkl"):
24
+ with open(config_path, "rb") as f:
25
+ config = tomli.load(f)
26
+
27
+ def fix_path(path):
28
+ if "infer/examples/" in path:
29
+ return str(files("f5_tts").joinpath(path))
30
+ return path
31
+
32
+ ref_audio = fix_path(config.get("ref_audio", "data/15sec.wav"))
33
+ ref_text = config.get("ref_text")
34
+ main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
35
+ voices = {"main": main_voice}
36
+ if "voices" in config:
37
+ for name, v in config["voices"].items():
38
+ voices[name] = {
39
+ "ref_audio": fix_path(v.get("ref_audio")),
40
+ "ref_text": v.get("ref_text"),
41
+ }
42
+
43
+ for v in voices.values():
44
+ v["ref_audio"], v["ref_text"] = preprocess_ref_audio_text(
45
+ v["ref_audio"], v["ref_text"]
46
+ )
47
+
48
+ with open(output_pkl, "wb") as f:
49
+ pickle.dump(voices, f)
50
+ print(f"Saved {output_pkl}")
51
+
52
+
53
+ if __name__ == "__main__":
54
+ build_ref_weights()
scripts/stitch.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydub import AudioSegment
2
+ import random
3
+
4
+ def create_music_speech_mix(speech_path, music_path="data/instrumental.wav", output_path="data/output.wav"):
5
+ """
6
+ Combine speech audio with background music at random position.
7
+
8
+ Args:
9
+ speech_path (str): Path to speech WAV file
10
+ music_path (str): Path to music WAV file (default: data/instrumental.wav)
11
+ output_path (str): Path for output WAV file (default: output.wav)
12
+
13
+ Returns:
14
+ tuple: (start_time_seconds, end_time_seconds)
15
+ """
16
+ speech = AudioSegment.from_wav(speech_path)
17
+ music = AudioSegment.from_wav(music_path)
18
+
19
+ # Durations (in milliseconds)
20
+ speech_len = len(speech)
21
+ music_len = len(music)
22
+
23
+ if speech_len > music_len:
24
+ raise ValueError("Speech audio is longer than background music!")
25
+
26
+ # Choose a random start point
27
+ max_start = music_len - speech_len
28
+ start_ms = random.randint(0, max_start)
29
+
30
+ # Extract the music segment
31
+ music_segment = music[start_ms : start_ms + speech_len]
32
+
33
+ # Lower volume by 10db
34
+ # music_segment = music_segment - 10
35
+
36
+ # Overlay speech on music
37
+ combined = music_segment.overlay(speech)
38
+
39
+ combined.export(output_path, format="wav")
40
+
41
+ return output_path
42
+
43
+ if __name__ == "__main__":
44
+ output_path = create_music_speech_mix("tests/infer_cli_basic.wav")
45
+ print(f"Created {output_path} using music")