Spaces:

TristanBehrens
/

Garland-Composer

Sleeping

App Files Files Community

TristanBehrens commited on Sep 8, 2024

Commit

87ae0b7

1 Parent(s): 2d7a385

Initial commit

Browse files

Files changed (5) hide show

app.py +245 -0
assets/asciilogo.txt +11 -0
requirements.txt +11 -0
source/languagemodel.py +288 -0
source/utilities.py +331 -0

app.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import streamlit as st
+from source.languagemodel import LanguageModel
+from source.utilities import (
+    convert_tokens_to_songdata,
+    convert_songdata_to_notesequence,
+    convert_songdata_to_pianoroll,
+    convert_notesequence_to_wave,
+    convert_notesequence_to_midi
+)
+# Define the MIDI instruments.
+midi_instruments = {
+    "Harpsichord": 6,
+    "Church Organ": 19,
+    "Piano": 0,
+}
+# Load the model once and cache it.
+@st.cache_resource
+def load_model():
+    model = LanguageModel("TristanBehrens/bach-garland-mambaplus")
+    return model
+model = load_model()
+# Initialize token_sequence in session state if it doesn't exist
+if "token_sequence" not in st.session_state:
+    st.session_state.token_sequence = "GARLAND_START"
+    st.session_state.song_data = None
+    st.session_state.piano_roll = None
+    st.session_state.wave = None
+    st.session_state.note_sequence = None
+    st.session_state.midi_file_content = None
+    st.session_state.temperature = 0.1
+    st.session_state.bpm = 100
+    st.session_state.instrument = "Piano"
+# Define the main function.
+def main():
+    columns = st.columns([0.7, 0.3])
+    # Set up the Streamlit application
+    column = columns.pop(0)
+    with column:
+        # Change the colors of the a-tag to (255, 75, 75).
+        st.markdown("<style>a:link { color: #FF4B4B; } a:visited { color: #FF4B4B; }</style>", unsafe_allow_html=True)
+        # Add a title.
+        st.title("Garland Composer")
+        linkedin_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+        x_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+        st.write(f"By Dr. Tristan Behrens. Find me on [LinkedIn]({linkedin_url}) and [X]({x_url}).")
+        hf_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+        st.write(f"Model available on [Hugging Face]({hf_url}).")
+    # Add a picture.
+    column = columns.pop(0)
+    with column:
+        st.write(" ")
+        st.write(" ")
+        st.write(" ")
+        st.image("garland.jpg", use_column_width=True)
+    # Add a horizontal line.
+    st.markdown("---")
+    # Create two columns.
+    columns = st.columns(3)
+    # Add a slider to control the temperature.
+    state_temperature = st.session_state.temperature
+    with columns.pop(0):
+        temperature = st.slider("Temperature", 0.0, 1.0, state_temperature)
+    st.session_state.temperature = temperature
+    # Add a slider to control the bpm.
+    state_bpm = st.session_state.bpm
+    with columns.pop(0):
+        bpm = st.slider("BPM", 80, 120, state_bpm, 5)
+    st.session_state.bpm = bpm
+    # Dropdown for the instrument.
+    state_instrument = st.session_state.instrument
+    with columns.pop(0):
+        instrument = st.selectbox("Instrument", list(midi_instruments.keys()), index=list(midi_instruments.keys()).index(state_instrument))
+    st.session_state.instrument = instrument
+    # Get the token sequence from the session state.
+    token_sequence = st.session_state.token_sequence
+    # Columns for the buttons.
+    columns = st.columns(5)
+    # Add a button to generate the next bar.
+    column = columns.pop(0)
+    with column:
+        if st.button("Add a bar", use_container_width=True):
+            token_sequence = extend_sequence(model, token_sequence, temperature)
+            refresh(token_sequence, bpm, instrument)
+    # Add a button to compose long.
+    column = columns.pop(0)
+    with column:
+        if st.button("Auto compose", use_container_width=True):
+            token_sequence = auto_compose(model, token_sequence, temperature)
+            refresh(token_sequence, bpm, instrument)
+    # Add a button to remove the last bar.
+    column = columns.pop(0)
+    with column:
+        if st.button("Remove last", use_container_width=True):
+            token_sequence = shortened_sequence(token_sequence)
+            refresh(token_sequence, bpm, instrument)
+    # Add a button to reset the sequence.
+    column = columns.pop(0)
+    if token_sequence != "GARLAND_START":
+        with column:
+            if st.button("Reset", use_container_width=True):
+                with columns.pop(0):
+                    token_sequence = "GARLAND_START"
+                    refresh(token_sequence, bpm, instrument)
+    # Provide a download button for the MIDI file.
+    column = columns.pop(0)
+    if "midi_file_content" in st.session_state and st.session_state.midi_file_content is not None:
+        with column:
+            midi_file_content = st.session_state.midi_file_content
+            if st.download_button(
+                label="Download MIDI",
+                data=midi_file_content,
+                file_name="music.mid",
+                mime="audio/midi",
+                use_container_width=True
+            ):
+                pass
+    # Add a horizontal line.
+    st.markdown("---")
+    # Display the piano roll.
+    if "piano_roll" in st.session_state and st.session_state.piano_roll is not None:
+        st.image(st.session_state.piano_roll)
+    # Display an audio player.
+    if "wave" in st.session_state and st.session_state.wave is not None:
+        st.audio(st.session_state.wave, format="audio/wav", sample_rate=44100, autoplay=True)
+    # Add a horizontal line.
+    st.markdown("---")
+    # Set the text color to (255, 31, 75).
+    if token_sequence.endswith("GARLAND_END"):
+        st.write("The AI believes that the music is finished.")
+    else:
+        st.write("The AI believes that the music is not finished.")
+def auto_compose(model, token_sequence, temperature):
+    max_iterations = 100
+    for _ in range(max_iterations):
+        token_sequence = extend_sequence(model, token_sequence, temperature)
+        if token_sequence.endswith("GARLAND_END"):
+            break
+    return token_sequence
+def extend_sequence(model, token_sequence, temperature):
+    # Replace the last GARLAND_END token with NEXT.
+    if token_sequence.endswith("GARLAND_END"):
+        token_sequence = token_sequence.replace("GARLAND_END", "NEXT")
+    # The maximum length of the generated music.
+    max_length = 16_384
+    # When to stop the generation.
+    end_tokens = ["NEXT", "GARLAND_END"]
+    # Compose the music iterativelybar by bar.
+    output_dict = model.generate(
+        prompt=token_sequence,
+        temperature=temperature,
+        max_length=max_length,
+        end_tokens=end_tokens,
+        forbidden_tokens=["[PAD]", "[EOS]"],
+        return_structured_output=True
+    )
+    output = output_dict["output"]
+    return output
+def shortened_sequence(token_sequence):
+    # Find the position of the next to last NEXT token.
+    next_tokens = token_sequence.split()
+    next_positions = [i for i, x in enumerate(next_tokens) if x == "NEXT" or x == "GARLAND_END"]
+    if len(next_positions) <= 1:
+        token_sequence = "GARLAND_START"
+    else:
+        next_position = next_positions[-2]
+        token_sequence = " ".join(next_tokens[:next_position + 1])
+    return token_sequence
+def refresh(token_sequence="GARLAND_START", bpm=120, instrument="Piano"):
+    # Get the token sequence into the session state.
+    st.session_state.token_sequence = token_sequence
+    # Convert to song data.
+    song_data = convert_tokens_to_songdata(token_sequence)
+    song_data["bpm"] = bpm
+    st.session_state.song_data = song_data
+    # Set the instrument.
+    for track in song_data["tracks"]:
+        track["instrument"] = midi_instruments[instrument]
+    # Convert to piano roll.
+    piano_roll = convert_songdata_to_pianoroll(song_data)
+    st.session_state.piano_roll = piano_roll
+    # Convert to note sequence.
+    note_sequence = convert_songdata_to_notesequence(song_data)
+    st.session_state.note_sequence = note_sequence
+    # Play the note sequence.
+    wave = convert_notesequence_to_wave(note_sequence)
+    st.session_state.wave = wave
+    # Get the MIDI file content.
+    midi_file_content = convert_notesequence_to_midi(note_sequence)
+    st.session_state.midi_file_content = midi_file_content
+    # Rerun the app.
+    st.rerun()
+if __name__ == "__main__":
+    main()

assets/asciilogo.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+   ▄█    █▄       ▄████████  ▄█        ▄█  ▀█████████▄     ▄████████ ███    █▄  ███▄▄▄▄   ███▄▄▄▄      ▄████████
+  ███    ███     ███    ███ ███       ███    ███    ███   ███    ███ ███    ███ ███▀▀▀██▄ ███▀▀▀██▄   ███    ███
+  ███    ███     ███    █▀  ███       ███▌   ███    ███   ███    ███ ███    ███ ███   ███ ███   ███   ███    ███
+ ▄███▄▄▄▄███▄▄  ▄███▄▄▄     ███       ███▌  ▄███▄▄▄██▀   ▄███▄▄▄▄██▀ ███    ███ ███   ███ ███   ███   ███    ███
+▀▀███▀▀▀▀███▀  ▀▀███▀▀▀     ███       ███▌ ▀▀███▀▀▀██▄  ▀▀███▀▀▀▀▀   ███    ███ ███   ███ ███   ███ ▀███████████
+  ███    ███     ███    █▄  ███       ███    ███    ██▄ ▀███████████ ███    ███ ███   ███ ███   ███   ███    ███
+  ███    ███     ███    ███ ███▌    ▄ ███    ███    ███   ███    ███ ███    ███ ███   ███ ███   ███   ███    ███
+  ███    █▀      ██████████ █████▄▄██ █▀   ▄█████████▀    ███    ███ ████████▀   ▀█   █▀   ▀█   █▀    ███    █▀
+                            ▀                             ███    ███
+By Dr. Tristan Behrens

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+dacite==1.8.1
+colorama==0.4.6
+omegaconf==2.3.0
+streamlit==1.38.0
+note_seq==0.0.5
+pyfluidsynth==1.3.2
+torch==2.2.0
+transformers==4.44.0
+mamba-ssm==2.2.2
+einops==0.8.0
+mambapy==1.2.0

source/languagemodel.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# Helibrunna - A HuggingFace compatible xLSTM trainer.
+# Copyright (c) 2024 Dr. Tristan Behrens
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import os
+import glob
+from omegaconf import OmegaConf
+from transformers import PreTrainedTokenizerFast
+import torch
+from safetensors.torch import load_file
+import time
+from .utilities import display_logo, model_from_config
+class LanguageModel:
+    def __init__(self, model_path_or_repo, config_overrides={}, mask_special_tokens=True, device="auto"):
+        """
+        Initializes the LanguageModel object.
+        Args:
+            model_path_or_repo (str): The path to the model or the repository ID.
+        Raises:
+            ValueError: If the model checkpoint, tokenizer, config, or weights are not found.
+            Exception: If failed to download the model.
+        Returns:
+            None
+        """
+        # Set the maskt_special_tokens flag.
+        self.mask_special_tokens = mask_special_tokens
+        # Set the device. CPU is default.
+        if device != "auto":
+            # Check if CUDA is available.
+            if not torch.cuda.is_available() and device == "cuda":
+                raise ValueError("CUDA is not available on this system.")
+            # Check if MPS is available.
+            if not torch.backends.mps.is_available() and device == "mps":
+                raise ValueError("MPS is not available on this system.")
+            # Set the device.
+            self.device = device
+        # Set the device to auto.
+        else:
+            # Set the device to CPU if auto is selected.
+            self.device = "cpu" if device == "auto" else device
+            # Check if CUDA is available.
+            if torch.cuda.is_available() and device == "auto":
+                self.device = "cuda"
+            # See if MPS is available.
+            # Note: This is disabled for now. It's not working as expected. It is very slow.
+            #if torch.backends.mps.is_available():
+            #    self.device = "mps"
+        # Display the logo.
+        display_logo()
+        # Download the model if it doesn't exist. Or at least try to.
+        if not os.path.exists(model_path_or_repo):
+            from huggingface_hub import snapshot_download
+            try:
+                model_path=snapshot_download(repo_id=model_path_or_repo)
+                tokenizer_path=model_path
+            except Exception as e:
+                raise f"Failed to download the model: {e}"
+        # Use a local model.
+        else:
+            # Set the model path and tokenizer path.
+            model_path = None
+            tokenizer_path = model_path_or_repo
+            # Find all the checkpoint folders, folders that start with "checkpoint-". Then find the last one.
+            checkpoint_folders = glob.glob(os.path.join(model_path_or_repo, "checkpoint-*"))
+            for checkpoint_folder in checkpoint_folders:
+                if checkpoint_folder.endswith("-last"):
+                    model_path = checkpoint_folder
+                    break
+            if model_path is None:
+                raise ValueError("No model checkpoint found.")
+            # Find the tokenizer folder.
+            if os.path.exists(os.path.join(model_path_or_repo, "tokenizer.json")):
+                tokenizer_path = model_path_or_repo
+            if not os.path.exists(tokenizer_path):
+                raise ValueError("Tokenizer not found.")
+        # Load the config.
+        config_path = os.path.join(model_path, "config.yaml")
+        if not os.path.exists(config_path):
+            raise ValueError(f"Config not found at {config_path}")
+        model_config = OmegaConf.load(config_path)
+        # Override the config.
+        if config_overrides != {} and config_overrides is not None:
+            model_config = OmegaConf.merge(model_config, config_overrides)
+        import json
+        print(json.dumps(OmegaConf.to_container(model_config), indent=4))
+        # Create the model from the config.
+        model = model_from_config(model_config, device=self.device)
+        model.to(self.device)
+        self.config = model_config
+        # Load the weights from the checkpoint.
+        weights_path = os.path.join(model_path, "model.safetensors")
+        if not os.path.exists(weights_path):
+            raise ValueError(f"Weights not found at {weights_path}")
+        state_dict = load_file(weights_path)
+        # TODO: Permute the last two dimensions of these parameters: xlstm_block_stack.blocks.2.xlstm.slstm_cell._recurrent_kernel_:
+        # Check if we have an xLSTM model and if CUDA is not available.
+        if not torch.cuda.is_available() and model_config.get("type", "xLSTMLMModel") == "xLSTMLMModel":
+            print(state_dict.keys())
+            endings = ["xlstm.slstm_cell._recurrent_kernel_"]
+            for key, values in state_dict.items():
+                for ending in endings:
+                    if key.endswith(ending):
+                        print(key)
+                        print(values.shape)
+                        # Option: Permute the last two dimensions.
+                        values = values.permute(0, 2, 1)
+                        # Option: View the tensor.
+                        #new_shape = (values.shape[0], values.shape[2], values.shape[1])
+                        #values = values.view(new_shape)
+                        print(values.shape)
+                        state_dict[key] = values
+                        break
+        # Load the weights into the model.
+        model.load_state_dict(state_dict)
+        self.model = model
+        # Load the tokenizer.
+        tokenizer_path = os.path.join(tokenizer_path, "tokenizer.json")
+        if not os.path.exists(tokenizer_path):
+            raise ValueError(f"Tokenizer not found at {tokenizer_path}")
+        tokenizer = PreTrainedTokenizerFast.from_pretrained(tokenizer_path)
+        self.tokenizer = tokenizer
+    def generate(
+        self,
+        prompt: str,
+        temperature: float = 1.0,
+        max_length: int = 100,
+        end_tokens: list[str] = [],
+        forbidden_tokens: list[str] = [],
+        return_structured_output: bool = False
+    ):
+        """
+        Generates a continuation for a given prompt using the language model.
+        Args:
+            prompt (str): The prompt to generate a continuation for.
+            temperature (float, optional): The temperature value for controlling the randomness of the generated output.
+                Higher values (e.g., 1.0) make the output more random, while lower values (e.g., 0.5) make it more deterministic.
+                Defaults to 1.0.
+            max_length (int, optional): The maximum length of the generated output. Defaults to 100.
+            end_tokens (list[str], optional): A list of end tokens that, if encountered, will stop the generation process.
+                Defaults to an empty list.
+            return_structured_output (bool, optional): If True, returns a dictionary with the generated output, elapsed time,
+                and tokens per second. If False, returns only the generated output as a string. Defaults to False.
+        Returns:
+            str or dict: The generated output as a string if return_structured_output is False.
+                A dictionary with the generated output, elapsed time, and tokens per second if return_structured_output is True.
+        """
+        # Tokenize the prompt.
+        inputs = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
+        assert inputs.shape[0] == 1
+        # Determine the end tokens ids.
+        end_token_ids = []
+        for end_token in end_tokens:
+            assert end_token in self.tokenizer.vocab
+            end_token_ids.append(self.tokenizer(end_token).input_ids[0])
+        # Initialize the ids to mask.
+        ids_to_mask = []
+        # Mask the forbidden tokens.
+        for forbidden_token in forbidden_tokens:
+            assert forbidden_token in self.tokenizer.vocab
+            ids_to_mask.extend(self.tokenizer(forbidden_token).input_ids)
+        # Generate the continuation.
+        start_time = time.time()
+        tokens_count = 0
+        while inputs.shape[1] < max_length:
+            # Stop if the maximum context length is reached.
+            if inputs.shape[1] >= self.config.context_length:
+                print("Warning: The maximum context length has been reached.")
+                break
+            # Generate the continuation.
+            outputs = self.model(inputs.to(device=self.device))
+            assert outputs.shape[0] == 1
+            # Mask the tokens.
+            outputs[:, :, self.tokenizer.all_special_ids] = float("-inf")
+            # Use the temperature to sample from the distribution.
+            outputs = outputs / temperature
+            outputs = torch.nn.functional.softmax(outputs, dim=-1)
+            outputs = torch.multinomial(outputs[0, -1], num_samples=1)
+            # Add to the inputs.
+            inputs = torch.cat([inputs, outputs.unsqueeze(0)], dim=1)
+            # Increment the tokens count.
+            tokens_count += 1
+            # Check if the end token is reached.
+            if outputs[0] in end_token_ids:
+                break
+        # Print the elapsed time and tokens per second.
+        elapsed_time = time.time() - start_time
+        tokens_per_second = tokens_count / elapsed_time
+        # Decode the output.
+        output = self.tokenizer.decode(inputs[0].tolist())
+        # Return the output.
+        if not return_structured_output:
+            return output
+        # Return the structured output.
+        else:
+            return {
+                "output": output,
+                "elapsed_time": elapsed_time,
+                "tokens_per_second": tokens_per_second
+            }
+    def summary(self):
+        """
+        Prints a summary of the model. Makes the model architecture readable. Includes the number of parameters.
+        """
+        # Print the model.
+        print(self.model)
+        # Get the number of parameters.
+        number_of_parameters = sum(p.numel() for p in self.model.parameters())
+        print(f"Number of parameters: {number_of_parameters:_}")
+        sizes = ["", "K", "M", "B", "T"]
+        size_index = 0
+        while number_of_parameters > 1000:
+            number_of_parameters /= 1000
+            size_index += 1
+        print(f"Number of parameters: {number_of_parameters:.2f}{sizes[size_index]}")
+        # Size of the model.
+        # Get the total size of all the markdown files. And make it human readable.
+        number_of_parameters = sum(p.numel() for p in self.model.parameters())
+        total_size = number_of_parameters * 4
+        sizes = ["B", "KB", "MB", "GB", "TB"]
+        size_index = 0
+        while total_size > 1024:
+            total_size /= 1024
+            size_index += 1
+        print(f"Total size of the model: {total_size:.2f}{sizes[size_index]} for precision 32-bit floats.")
+        # Print on which device the model is running.
+        print(f"Device: {self.device}")

source/utilities.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import copy
+import note_seq
+from PIL import Image
+import tempfile
+import os
+import colorama
+from omegaconf import DictConfig, OmegaConf
+import torch
+from typing import List, Tuple, Dict
+from dacite import from_dict
+from collections.abc import MutableMapping
+import sys
+# NOTE: Imported from helibrunna.
+def display_logo():
+    """
+    Display the logo by printing it line by line with a cyberpunk color scheme.
+    Raises:
+        FileNotFoundError: If the logo file is missing.
+    """
+    # Get the path of this script and use it to find the logo.
+    script_path = os.path.dirname(os.path.realpath(__file__))
+    search_path = os.path.dirname(script_path)
+    # Load the logo.
+    logo_path = os.path.join(search_path, "assets", "asciilogo.txt")
+    if not os.path.exists(logo_path):
+        raise FileNotFoundError("The logo file is missing.")
+    with open(logo_path, "r") as f:
+        logo = f.read()
+    # Print the logo line by line. Use colorama to colorize the output. Use a cyberpunk color scheme.
+    for line_index, line in enumerate(logo.split("\n")):
+        color = colorama.Fore.GREEN
+        style = colorama.Style.BRIGHT if line_index % 2 == 0 else colorama.Style.NORMAL
+        print(color + style + line)
+    print(colorama.Style.RESET_ALL)
+# NOTE: Imported from helibrunna.
+def model_from_config(model_config: DictConfig, device:str) -> torch.nn.Module:
+    """
+    Create a model based on the provided model configuration.
+    Args:
+        model_config (DictConfig): The configuration for the model.
+    Returns:
+        The created model.
+    Raises:
+        ValueError: If the model type is unknown.
+    """
+    # Get the model type from the configuration.
+    model_type = model_config.get("type", "xLSTMLMModel")
+    # Create the xLSTMLMModel.
+    if model_type == "xLSTMLMModel":
+        print("Creating xLSTMLMModel...")
+        from xlstm.xlstm_lm_model import xLSTMLMModel, xLSTMLMModelConfig
+        # If there is no GPU, use the vanilla backend.
+        if not torch.cuda.is_available():
+            #model_config.backend = "vanilla"
+            model_config.slstm_block.slstm.backend = "vanilla"
+            model_config.mlstm_block.mlstm.backend = "vanilla"
+        model_config_object = from_dict(xLSTMLMModelConfig, OmegaConf.to_container(model_config))
+        # Create the model.
+        model = xLSTMLMModel(model_config_object)
+        model.reset_parameters()
+    # Create the GPT2LMModel.
+    elif model_type == "gpt2":
+        print("Creating GPT2LMModel...")
+        from .models.gpttwo import GPT2LMModel, GPT2LMModelConfig
+        model_config_object = from_dict(GPT2LMModelConfig, OmegaConf.to_container(model_config))
+        model = GPT2LMModel(model_config_object)
+    # Create the MambaLM.
+    elif model_type == "mamba":
+        print("Creating Mamba LM...")
+        from mambapy.lm import LM, MambaConfig
+        model_config_object = from_dict(MambaConfig, OmegaConf.to_container(model_config))
+        model = LM(model_config_object, model_config.vocab_size)
+    # Create the Transformer.
+    elif model_type == "transformer":
+        from .models.transformer import TransformerConfig, Transformer
+        model_config_object = from_dict(TransformerConfig, OmegaConf.to_container(model_config))
+        model = Transformer(model_config_object)
+    # Create a Pharia instance.
+    elif model_type == "pharia":
+        from .models.pharia import PhariaConfig, PhariaModel
+        model_config_object = from_dict(PhariaConfig, OmegaConf.to_container(model_config))
+        model = PhariaModel(model_config_object)
+    # Create a TransformerXL instance.
+    else:
+        raise ValueError(f"Unknown model type: {model_type}")
+    # Move the model to the device.
+    model.to(device)
+    return model
+def convert_tokens_to_songdata(tokens):
+    if isinstance(tokens, str):
+        tokens = tokens.split()
+    song_data = {}
+    song_data["tracks"] = []
+    current_track_index = 0
+    current_timestep = 0
+    for token in tokens:
+        if token == "GARLAND_START":
+            pass
+        elif token == "BAR_START":
+            if current_track_index == len(song_data["tracks"]):
+                song_data["tracks"] += [{"bars": [], "instrument": "0"}]
+            bar_data = {"notes": []}
+            song_data["tracks"][current_track_index]["bars"] += [bar_data]
+            current_timestep = 0
+        elif token.startswith("INST="):
+            instrument = token.split("=")[1]
+            song_data["tracks"][current_track_index]["instrument"] = instrument
+        elif token.startswith("DENSITY="):
+            pass
+        elif token.startswith("NOTE_ON="):
+            note_pitch = int(token.split("=")[1])
+            note_data = {
+                "note": note_pitch,
+                "start": current_timestep,
+                "end": current_timestep,
+                "veloctiy": 80
+            }
+            song_data["tracks"][current_track_index]["bars"][-1]["notes"] += [note_data]
+            pass
+        elif token.startswith("TIME_DELTA="):
+            current_timestep += int(token.split("=")[1])
+        elif token.startswith("NOTE_OFF="):
+            note_pitch = int(token.split("=")[1])
+            for note_data in song_data["tracks"][current_track_index]["bars"][-1]["notes"]:
+                if note_data["note"] == note_pitch and note_data["start"] == note_data["end"]:
+                    note_data["end"] = current_timestep
+                    break
+            pass
+        elif token == "BAR_END":
+            current_track_index += 1
+        elif token == "NEXT":
+            current_track_index = 0
+        elif token == "GARLAND_END":
+            pass
+        elif token == "[PAD]":
+            pass
+        elif token == "[EOS]":
+            pass
+        else:
+            raise Exception(f"Unknown token: {token}")
+    assert isinstance(song_data, dict)
+    return song_data
+def convert_songdata_to_notesequence(song_data:dict, quantize_steps_per_quarter=8, remove_disabled_tracks=True):
+    assert isinstance(song_data, dict), f"Invalid song data type: {type(song_data)}"
+    # Clone the song data.
+    song_data = copy.deepcopy(song_data)
+    # Sort the tracks by instrument.
+    assert "tracks" in song_data, f"Invalid song data: {song_data.keys()}"
+    tracks = sorted(song_data["tracks"], key=lambda t: t["instrument"])
+    song_data["tracks"] = tracks
+    # Remove tracks that are not enabled.
+    if remove_disabled_tracks:
+        song_data["tracks"] = [t for t in song_data["tracks"] if t.get("enabled", True)]
+    # Create an empy note sequence.
+    note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
+    # Add the tempo.
+    bpm = song_data["bpm"] if "bpm" in song_data else 120
+    note_sequence.tempos.add().qpm = bpm
+    # Compute some lengths.
+    step_length_seconds = 60.0 / bpm / quantize_steps_per_quarter
+    bar_length_seconds = 4 * step_length_seconds * quantize_steps_per_quarter
+    # Get the instruments.
+    instruments = list(set([t["instrument"] for t in song_data["tracks"]]))
+    # Add the tracks.
+    for track_index, track_data in enumerate(song_data["tracks"]):
+        instrument = track_data["instrument"]
+        for bar_index, bar_data in enumerate(track_data["bars"]):
+            bar_start_time = bar_index * bar_length_seconds
+            for note_data in bar_data["notes"]:
+                assert "note" in note_data
+                assert "start" in note_data
+                assert "end" in note_data
+                note = note_sequence.notes.add()
+                #note.instrument = instrument TODO
+                note.pitch = note_data["note"]
+                note.start_time = note_data["start"] * step_length_seconds + bar_start_time
+                note.end_time = note_data["end"] * step_length_seconds + bar_start_time
+                if "velocity" in note_data:
+                    note.velocity = note_data["velocity"]
+                else:
+                    note.velocity = 80
+                note.instrument = track_index
+                if instrument == "drums":
+                    note.is_drum = True
+                else:
+                    note.is_drum = False
+                    note.program = int(instrument)
+    return note_sequence
+def convert_songdata_to_pianoroll(song_data):
+    # The bars are 4/4 and the quantization is 8 steps per quarter, aka 32 steps per bar.
+    # We will render a grid. The height is 64 pixels. The width is 32 pixels per bar
+    # Create a new image.
+    lengths = [len(track["bars"]) for track in song_data["tracks"]]
+    if lengths == []:
+        return None
+    assert len(set(lengths)) == 1, f"Unequal number of bars: {lengths}"
+    num_bars = lengths[0]
+    # Get the note extremes.
+    min_note = 128
+    max_note = 0
+    for track_data in song_data["tracks"]:
+        for bar_data in track_data["bars"]:
+            for note_data in bar_data["notes"]:
+                min_note = min(min_note, note_data["note"])
+                max_note = max(max_note, note_data["note"])
+    # The width depends on the bars.
+    width = 32 * num_bars
+    # The width depends on the notes.
+    height = 1 + max_note - min_note
+    # Create the image.
+    image = Image.new("RGB", (width, height), (14, 17, 23))
+    # Define some colors.
+    base_color = (255, 75, 75)
+    adjustments = [1.2, 1.0, 0.8, 0.6]
+    colors = []
+    for adjustment in adjustments:
+        import colorsys
+        rgb = base_color
+        rgb = [float(c) / 255.0 for c in rgb]
+        hsv = colorsys.rgb_to_hsv(*rgb)
+        # Rotate the hue.
+        offset = (adjustment - 1.0) * 0.1
+        hsv = (hsv[0] + offset, hsv[1], hsv[2])
+        rgb = colorsys.hsv_to_rgb(*hsv)
+        rgb = tuple([int(255.0 * c) for c in rgb])
+        colors += [rgb]
+        print("")
+    for color in colors:
+        print(color)
+    # Draw the grid.
+    for track_index, track_data in enumerate(song_data["tracks"]):
+        color = colors[track_index % len(colors)]
+        for bar_index, bar_data in enumerate(track_data["bars"]):
+            x = bar_index * 32
+            for note_data in bar_data["notes"]:
+                y = max_note - note_data["note"]
+                assert y >= 0 and y < height, f"Invalid y: {y}, note {note_data['note']} min_note: {min_note}, max_note: {max_note}, difference: {max_note - min_note}, height: {height}"
+                for i in range(note_data["start"], note_data["end"]):
+                    image.putpixel((x + i, y), color)
+    # Resize the image. Use nearest neighbor for pixel art.
+    factor = 4
+    image = image.resize((width * factor, height * factor), Image.NEAREST)
+    return image
+def convert_notesequence_to_wave(note_sequence):
+    if len(note_sequence.notes) == 0:
+        return None
+    try:
+        synthesizer = note_seq.fluidsynth
+        wave = synthesizer(note_sequence, sample_rate=44100)
+        return wave
+    except Exception as e:
+        synthesizer = note_seq.synthesize
+        wave = synthesizer(note_sequence)
+        return wave
+def convert_notesequence_to_midi(note_sequence, filename="output.mid"):
+    if len(note_sequence.notes) == 0:
+        return None
+    # Returns the file content of the midi file.
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+        filename = temp_file.name
+        note_seq.sequence_proto_to_midi_file(note_sequence, filename)
+        with open(filename, "rb") as file:
+            content = file.read()
+    return content