Spaces:

helamouri
/

medichat_assignment

Sleeping

App Files Files Community

helamouri commited on Jan 12

Commit

eca6215

0 Parent(s):

update model

Browse files

Files changed (41) hide show

.gitattributes +38 -0
.github/workflows/deploy.yml +115 -0
.gitignore +9 -0
Dokerfile +29 -0
Makefile +91 -0
README.md +45 -0
Setup.py +76 -0
User.code-workspace +32 -0
__init__.py +51 -0
app.py +116 -0
pytest.ini +3 -0
requirements.txt +20 -0
space.yml +19 -0
src/__init__.py +6 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/config.cpython-312.pyc +0 -0
src/__pycache__/dataset.cpython-312.pyc +0 -0
src/__pycache__/inference.cpython-312.pyc +0 -0
src/__pycache__/model.cpython-312.pyc +0 -0
src/__pycache__/save_model.cpython-312.pyc +0 -0
src/__pycache__/training.cpython-312.pyc +0 -0
src/config.py +44 -0
src/dataset.py +22 -0
src/fine_tune_llama.py +38 -0
src/inference.py +51 -0
src/model.py +32 -0
src/save_model.py +23 -0
src/training.py +20 -0
tests/__init__.py +0 -0
tests/__pycache__/test_config.cpython-312.pyc +0 -0
tests/__pycache__/test_dataset.cpython-312.pyc +0 -0
tests/__pycache__/test_inference.cpython-312.pyc +0 -0
tests/__pycache__/test_model.cpython-312.pyc +0 -0
tests/__pycache__/test_save_model.cpython-312.pyc +0 -0
tests/__pycache__/test_training.cpython-312.pyc +0 -0
tests/test_config.py +79 -0
tests/test_dataset.py +43 -0
tests/test_inference.py +44 -0
tests/test_model.py +46 -0
tests/test_save_model.py +49 -0
tests/test_training.py +64 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,38 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
+llama3_medichat filter=lfs diff=lfs merge=lfs -text

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,115 @@

+name: CI/CD Workflow
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+jobs:
+  build-test-deploy:
+    runs-on: ubuntu-latest
+    steps:
+    # Checkout repository
+    - name: Checkout code
+      uses: actions/checkout@v3
+    # Set up Python
+    - name: Setup Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12.3'
+    # Install dependencies
+    - name: Install dependencies
+      run: |
+        python3 -m venv .venv
+        . .venv/bin/activate
+        pip install --upgrade pip
+        pip install -r requirements.txt
+    - name: Check for GPU Availability
+      id: gpu-check
+      run: |
+        if lspci | grep -i nvidia; then
+          echo "gpu=true" >> $GITHUB_ENV
+        else
+          echo "gpu=false" >> $GITHUB_ENV
+        fi
+    # Run tests
+    - name: Run Tests
+      if: env.gpu == 'true'
+      run: |
+        source .venv/bin/activate
+        pytest --maxfail=5 --disable-warnings
+    - name: Skip Tests (No GPU)
+      if: env.gpu == 'false'
+      run: |
+        echo "Skipping GPU-dependent tests: No GPU available."
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Set Git user identity
+        run: |
+          git config --global user.name "Hussein El Amouri"
+          git config --global user.email "[email protected]"
+      # - name: Set up Git LFS
+      #   run: |
+      #     git lfs install  # Ensure Git LFS is installed and set up
+      # - name: Track large files with Git LFS
+      #   run: |
+      #     # Track specific large files that exceed the 10 MB limit
+      #     git lfs track "*.gguf"  # Add GGUF model to LFS
+      #     git lfs track "*.safetensors"  # Add safetensors model to LFS
+      #     git lfs track "*.pt"  # Add optimizer checkpoint to LFS
+      #     git lfs track "*.json"  # Add tokenizer to LFS
+      #     # Add .gitattributes file to the staging area for Git LFS tracking
+      #     git add .gitattributes
+      - name: Push to Hugging Face
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          # git lfs ls-files
+          # git lfs fetch --all
+          # git lfs pull
+          # git rev-parse --is-shallow-repository
+          git filter-branch -- --all
+          git push https://helamouri:[email protected]/spaces/helamouri/medichat_assignment main  --force # Push to Hugging Face
+      # - name: Set up Hugging Face CLI
+      #   run: |
+      #     pip install huggingface_hub
+      # - name: Login to Hugging Face
+      #   env:
+      #     HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      #   run: |
+      #     huggingface-cli login --token $HF_TOKEN
+      # - name: Sync with Hugging Face (including large files)
+      #   env:
+      #     HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      #   run: |
+      #     # Initialize git-lfs
+      #     git lfs install
+      #     # Pull any LFS-tracked files (if needed)
+      #     git lfs pull
+      #     # Push the repository to Hugging Face
+      #     huggingface-cli upload spaces/helamouri/medichat_assignment ./* ./medichat_assignment

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+# Ignore large files that are tracked by Git LFS
+*.log
+# Ignore build directories (e.g., for Python, Java, etc.)
+env/
+# Ensure .gitattributes is not ignored (needed for Git LFS tracking)
+!.gitattributes

Dokerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+# Use the official NVIDIA CUDA image as a base (you can adjust the CUDA version if needed)
+FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
+# Set environment variable to avoid interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+# Install Python 3.9 and dependencies
+RUN apt-get update && \
+    apt-get install -y python3.9 python3.9-dev python3.9-venv python3.9-distutils curl && \
+    ln -s /usr/bin/python3.9 /usr/bin/python && \
+    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
+    python get-pip.py && \
+    rm get-pip.py
+# Set the working directory
+WORKDIR /src
+# Copy the requirements and application files
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all source code
+COPY . .
+# Expose the default Streamlit port
+EXPOSE 8501
+# Run the Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

Makefile ADDED Viewed

	@@ -0,0 +1,91 @@

+SHELL := /bin/bash
+# Makefile for Llama3.1:8B Project
+# Variables
+PYTHON = python
+PIP = pip
+VENV_DIR = ./env
+VENV_PYTHON = $(VENV_DIR)/bin/python
+VENV_PIP = $(VENV_DIR)/bin/pip
+REQUIREMENTS = requirements.txt
+# Default target
+.DEFAULT_GOAL := help
+# Help target
+help:
+	@echo "Makefile for Llama3.1:8B Project"
+	@echo ""
+	@echo "Targets:"
+	@echo "  help            - Show this help message"
+	@echo "  setup           - Create virtual environment and install dependencies"
+	@echo "  run             - Run the main application"
+	@echo "  test            - Run unit tests"
+	@echo "  lint            - Run linters"
+	@echo "  clean           - Remove temporary files and directories"
+	@echo "  clean-venv      - Remove virtual environment"
+	@echo "  purge           - Clean and reinstall everything"
+	@echo "  install         - Install or update dependencies"
+# Check for Python and pip
+check-deps:
+	@echo "Checking for Python and pip..."
+	@if ! command -v $(PYTHON) >/dev/null 2>&1; then \
+		echo "Python is not installed. Please install Python3."; \
+		exit 1; \
+	fi
+	@echo "Python is installed."
+	@if ! command -v $(PIP) >/dev/null 2>&1; then \
+		echo "pip is not installed. Installing pip..."; \
+		sudo apt update && sudo apt install -y python3-pip; \
+	fi
+	@echo "pip is installed."
+# Create virtual environment and install dependencies
+setup: check-deps
+	@echo "Setting up virtual environment..."
+	@if [ ! -d "$(VENV_DIR)" ]; then \
+		$(PYTHON) -m venv $(VENV_DIR); \
+		echo "Virtual environment created."; \
+	fi
+	@echo "Installing dependencies..."
+	$(VENV_PIP) install --upgrade pip
+	$(VENV_PIP) install -r $(REQUIREMENTS)
+	@echo "Setup completed."
+# Run the main application
+run:
+	@echo "Running the application..."
+	$(VENV_PYTHON) main.py
+# Run tests
+test:
+	@echo "Running tests..."
+	$(VENV_PYTHON) -m unittest discover tests
+# Run linters
+lint:
+	@echo "Running linters..."
+	$(VENV_PYTHON) -m flake8 src/ tests/
+# Clean temporary files and directories
+clean:
+	@echo "Cleaning temporary files and directories..."
+	find . -type f -name '*.pyc' -delete
+	find . -type d -name '__pycache__' -exec rm -r {} +
+	@echo "Cleanup completed."
+# Clean virtual environment
+clean-venv:
+	@echo "Removing virtual environment..."
+	rm -rf $(VENV_DIR)
+	@echo "Virtual environment removed."
+# Purge: remove all and reinstall environment
+purge: clean clean-venv setup
+# Install or update dependencies
+install:
+	@echo "Installing or updating dependencies..."
+	$(VENV_PIP) install -r $(REQUIREMENTS)
+	@echo "Dependencies installed or updated."

README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+---
+title: MediChat
+emoji: 🩺
+colorFrom: blue
+colorTo: yellow
+sdk: streamlit
+sdk_version: "1.40.1"  # Replace with the actual version of your SDK
+app_file: app.py  # Replace with the main app file name
+pinned: false
+---
+[![CI/CD Workflow](https://github.com/hussein88al88amouri/medichat_assignment/actions/workflows/deploy.yml/badge.svg)](https://github.com/hussein88al88amouri/medichat_assignment/actions/workflows/deploy.yml)
+# MediChat: AI-Powered Medical Consultation Assistant
+MediChat is an intelligent chatbot designed to provide medical consultations using a fine-tuned Llama3.1:8B model. The project bridges advanced AI capabilities with practical healthcare assistance.
+## Features
+- Fine-tuned model for medical conversations
+- Interactive and user-friendly interface
+- Secure and containerized deployment
+## How to Use
+1. Access the chatbot interface.
+2. Input your medical query.
+3. Receive intelligent and context-aware responses.
+## Technical Details
+- Model: Llama3.1:8B
+- Framework: Gradio
+## Installation
+1. Clone the repository:
+   ```bash
+   git clone https://github.com/your_username/medichat.git
+   cd medichat
+2. Build and run the Docker container: (in bash copy the following code)
+   docker build -t medichat-app .
+   docker run -p 8501:8501 medichat-app
+3. Access the app at http://localhost:8501.
+Limitations
+This tool is not a replacement for professional medical advice.
+For critical issues, always consult a licensed medical professional.

Setup.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from setuptools import setup, find_packages
+from pathlib import Path
+# Read the requirements from the requirements.txt file
+def parse_requirements():
+    requirements_path = Path(__file__).parent / 'requirements.txt'
+    with open(requirements_path, 'r') as file:
+        return [line.strip() for line in file if line.strip() and not line.startswith('#')]
+setup(
+    # The name of your package.
+    name='medichat',
+    # A version number for your package.
+    version='0.1.0',
+    # A brief summary of what your package does.
+    description='A fine-tuned LLM for medical consultations based on the Meta-Llama 3.1 8B model.',
+    # The URL of your project's homepage.
+    url='https://github.com/hussein88al88amouri/medichat',
+    # The author’s name.
+    author='Hussein El Amouri',
+    # The author’s email address.
+    author_email='[email protected]',
+    # This defines which packages should be included in the distribution.
+    packages=find_packages(),
+    # Read dependencies from the requirements.txt
+    install_requires=parse_requirements(),
+    # Additional classification of your package.
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+    ],
+    # A license for your package.
+    license='MIT',
+    # You can add entry points for command-line tools if your package includes such functionality.
+    entry_points={
+        'console_scripts': [
+            'medichat=medichat.cli:main',  # Adjust to your actual CLI entry point, if any
+        ],
+    },
+    # If you have data files (like configuration files), you can specify them here.
+    data_files=[
+        # Example of configuration files for saving the model, etc.
+        ('share/config', ['config/config.json']),
+    ],
+    # If your package has specific testing requirements or needs test dependencies, list them here.
+    extras_require={
+        'dev': ['pytest', 'tox'],  # Optional dependencies for development or testing
+        'docs': ['sphinx'],  # Optional dependencies for documentation generation
+    },
+    # Specify your package's minimum supported Python version
+    python_requires='>=3.8',
+    # If your package includes command-line scripts, you can list them here
+    scripts=['scripts/cli_script.py'],  # Update path if you have a script to run
+    # If your package includes C extensions or other modules, specify them here.
+    ext_modules=[],
+)

User.code-workspace ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+	"folders": [
+		{
+			"path": "C:/Users/hasso/AppData/Roaming/Code/User"
+		},
+		{
+			"path": ".."
+		}
+	],
+	"settings": {
+		"workbench.colorCustomizations": {
+			"activityBar.activeBackground": "#5b5b5b",
+			"activityBar.background": "#5b5b5b",
+			"activityBar.foreground": "#e7e7e7",
+			"activityBar.inactiveForeground": "#e7e7e799",
+			"activityBarBadge.background": "#103010",
+			"activityBarBadge.foreground": "#e7e7e7",
+			"commandCenter.border": "#e7e7e799",
+			"sash.hoverBorder": "#5b5b5b",
+			"statusBar.background": "#424242",
+			"statusBar.foreground": "#e7e7e7",
+			"statusBarItem.hoverBackground": "#5b5b5b",
+			"statusBarItem.remoteBackground": "#424242",
+			"statusBarItem.remoteForeground": "#e7e7e7",
+			"titleBar.activeBackground": "#424242",
+			"titleBar.activeForeground": "#e7e7e7",
+			"titleBar.inactiveBackground": "#42424299",
+			"titleBar.inactiveForeground": "#e7e7e799"
+		},
+		"peacock.color": "#424242"
+	}
+}

__init__.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# __init__.py
+# Import necessary modules or functions from submodules.
+# This is where you can aggregate the public API of your package.
+# Example:
+# from .module_name import function_name, ClassName
+# You can also import specific components to make them available directly from the package level.
+# For example:
+# from .subpackage.module_name import function_name
+from src import llama3_finetune
+from src import main
+# Initialize any package-level variables or constants
+# For example, if you have any version number or author info, you can define it here.
+__version__ = "0.1.0"  # Replace with your actual package version
+__author__ = "Hussein El Amouri"  # Replace with your name or the author name
+# You can include initialization code here, if your package requires any.
+# For example, setting up logging, initializing global variables, etc.
+# Example:
+# import logging
+# logging.basicConfig(level=logging.INFO)
+# Define a list of publicly exposed items (optional)
+# This list is used to specify which functions, classes, or variables
+# should be available when `from package_name import *` is used.
+# Example:
+__all__ = [
+    'function_name',  # List the names of the functions, classes, or variables you want exposed
+    'ClassName',
+]
+# If your package uses a specific function or submodule as the primary entry point,
+# you can set that here.
+# For example, if the main function of the package is in a submodule called 'main.py',
+# you can import that here:
+# from .main import run
+# Initialize any necessary package-specific code here, if needed
+# Example for adding environment setup, database initialization, etc.
+# If your package contains a command-line interface (CLI), you can import it here,
+# so it can be executed as a script if the package is installed:
+# from .cli import main
+# Any other necessary imports that users should be aware of can go here.

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+# from unsloth import FastLanguageModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+import os
+import sys
+# # Suppress unwanted outputs (e.g., from unsloth or other libraries)
+# def suppress_output():
+#     sys.stdout = open(os.devnull, 'w')  # Redirect stdout to devnull
+#     sys.stderr = open(os.devnull, 'w')  # Redirect stderr to devnull
+# def restore_output():
+#     sys.stdout = sys.__stdout__  # Restore stdout
+#     sys.stderr = sys.__stderr__  # Restore stderr
+# Load the model (GGUF format)
+@st.cache_resource
+def load_model():
+    # Define the repository and model filenames for both the base model and LoRA adapter
+    base_model_repo = "helamouri/Meta-Llama-3.1-8B-Q8_0.gguf"
+    base_model_filename = "Meta-Llama-3.1-8B-Q8_0.gguf"
+    adapter_repo = "helamouri/medichat_assignment"
+    # adapter_filename = "llama3_medichat.gguf"  # assuming adapter is also in safetensors format
+    adapter_repo = "helamouri/model_medichat_finetuned_v1"
+    # Download the base model and adapter model to local paths
+    base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_filename)
+    adapter_model_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_filename)
+    # Log paths for debugging
+    print(f"Base model path: {base_model_path}")
+    print(f"Adapter model path: {adapter_model_path}")
+    # Load the full model (base model) and the adapter (LoRA)
+    try:
+        model = Llama(model_path=base_model_path) #, adapter_path=adapter_model_path)
+        print("Model loaded successfully.")
+    except ValueError as e:
+        print(f"Error loading model: {e}")
+        raise
+    return model
+# Generate a response using Llama.cpp
+def generate_response(model, prompt):
+    print('prompt')
+    print(prompt)
+    response = model(
+        prompt,
+        max_tokens=200,  # Maximum tokens for the response
+        temperature=0.7,  # Adjust for creativity (lower = deterministic)
+        top_p=0.9,  # Nucleus sampling
+        stop=["\n"]  # Stop generating when newline is encountered
+    )
+    print('response["choices"]')
+    print(response["choices"])
+    return response["choices"][0]["text"]
+# Load the model and tokenizer (GGUF format)
+# @st.cache_resource
+# def load_model():
+#     model_name = "helamouri/model_medichat_finetuned_v1"  # Replace with your model's GGUF path
+#     model = FastLanguageModel.from_pretrained(model_name, device='cpu')  # Load the model using unsloth
+#     tokenizer = model.tokenizer  # Assuming the tokenizer is part of the GGUF model object
+#     return tokenizer, model
+# @st.cache_resource
+# def load_model():
+#     model_name = "helamouri/model_medichat_finetuned_v1"  # Replace with your model's path
+#     # Load the tokenizer
+#     tokenizer = AutoTokenizer.from_pretrained(model_name)
+#     # Load the model (if it's a causal language model or suitable model type)
+#     model = AutoModelForCausalLM.from_pretrained(model_name,
+#                                                  device_map="cpu",
+#                                                  revision="main",
+#                                                  quantize=False,
+#                                                  load_in_8bit=False,
+#                                                  load_in_4bit=False,
+#                                                  #torch_dtype=torch.float32
+#                                                  )
+#     return tokenizer, model
+# Suppress unwanted outputs from unsloth or any other libraries during model loading
+#suppress_output()
+# Load the GGUF model
+print('Loading the model')
+model = load_model()
+# Restore stdout and stderr
+#restore_output()
+# App layout
+print('Setting App layout')
+st.title("MediChat: Your AI Medical Consultation Assistant")
+st.markdown("Ask me anything about your health!")
+st.write("Enter your symptoms or medical questions below:")
+# User input
+print(f'Setting user interface')
+user_input = st.text_input("Your Question:")
+if st.button("Get Response"):
+    if user_input:
+        with st.spinner("Generating response..."):
+            # Generate Response
+            response = generate_response(model, user_input)
+            print('Response')
+            print(response)
+        # Display response
+        st.text_area("Response:", value=response, height=200)
+    else:
+        st.warning("Please enter a question.")

pytest.ini ADDED Viewed

	@@ -0,0 +1,3 @@

+[pytest]
+markers =
+    gpu: marks tests that require a GPU

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# Install the multi-backend version of bitsandbytes
+https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl
+llama-cpp-python
+datasets
+huggingface_hub
+huggingface_hub[cli]
+unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
+xformers==0.0.28.post2
+trl
+peft
+accelerate
+bitsandbytes
+torchvision
+torch
+sentencepiece
+transformers[torch]>=4.45.1
+streamlit==1.40.1
+gguf>=0.10.0
+pytest
+flake8

space.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+# docker-compose.yaml for Hugging Face Spaces
+version: '3.8'
+services:
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7860:7860"  # Default port for Streamlit or Gradio apps
+    environment:
+      HF_TOKEN: ${HF_TOKEN}  # Hugging Face API token
+    command: >
+      bash -c "
+      python3 -m venv .venv &&
+      . .venv/bin/activate &&
+      pip install -r requirements.txt &&
+      python main.py
+      "

src/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .config import *
+from .model import load_model, configure_peft_model
+from .dataset import load_and_prepare_dataset, formatting_prompts_func
+from .training import train_model
+from .inference import prepare_inference_inputs, generate_responses, stream_responses
+from .save_model import save_model_and_tokenizer

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (601 Bytes). View file

src/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.16 kB). View file

src/__pycache__/dataset.cpython-312.pyc ADDED Viewed

Binary file (1.37 kB). View file

src/__pycache__/inference.cpython-312.pyc ADDED Viewed

Binary file (2.51 kB). View file

src/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (1.41 kB). View file

src/__pycache__/save_model.cpython-312.pyc ADDED Viewed

Binary file (1.11 kB). View file

src/__pycache__/training.cpython-312.pyc ADDED Viewed

Binary file (743 Bytes). View file

src/config.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+def get_device_map():
+    if torch.cuda.is_available():
+        return {'' : torch.cuda.current_device()}
+    else:
+        return {}  # Or some default, fallback configuration
+# General configuration
+MAX_SEQ_LENGTH = 2**4
+DTYPE = None
+LOAD_IN_4BIT = True
+DEVICE_MAP = {'': get_device_map()}
+EOS_TOKEN = None  # Set dynamically based on tokenizer
+# Alpaca prompt template
+ALPACA_PROMPT_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+###Instruction:
+{}
+###Input:
+{}
+###Response:
+{}"""
+# Training arguments
+TRAIN_ARGS = {
+    "per_device_train_batch_size": 2,
+    "gradient_accumulation_steps": 4,
+    "warmup_steps": 5,
+    "max_steps": 60,
+    "learning_rate": 2e-4,
+    "fp16": not torch.cuda.is_bf16_supported(),
+    "bf16": torch.cuda.is_bf16_supported(),
+    "logging_steps": 1,
+    "optim": "adamw_8bit",
+    "weight_decay": 0.01,
+    "lr_scheduler_type": "linear",
+    "seed": 3407,
+    "output_dir": "outputs",
+}

src/dataset.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from datasets import load_dataset
+def formatting_prompts_func(examples, template, eos_token):
+    instructions = examples["instruction"]
+    inputs = examples["input"]
+    outputs = examples["output"]
+    # Format the examples using the provided template
+    texts = []
+    for instruction, input_text, output in zip(instructions, inputs, outputs):
+        text = template.format(instruction, input_text, output) + eos_token
+        texts.append(text)
+    # Return a dictionary with the formatted text
+    return {"text": texts}
+def load_and_prepare_dataset(dataset_name, nsamples, formatting_func, template, eos_token):
+    # Load the dataset and prepare it by applying the formatting function
+    dataset = load_dataset(dataset_name, split="train").select(range(nsamples))
+    # Map the formatting function over the dataset
+    return dataset.map(lambda examples: formatting_func(examples, template, eos_token), batched=True)

src/fine_tune_llama.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from src import *
+# Load configuration
+max_seq_length = config.MAX_SEQ_LENGTH
+device_map = config.DEVICE_MAP
+eos_token = config.EOS_TOKEN
+# Load and configure model
+model_name = "unsloth/Meta-Llama-3.1-8B"
+model, tokenizer = load_model(model_name, max_seq_length, config.DTYPE, config.LOAD_IN_4BIT, device_map)
+eos_token = tokenizer.eos_token
+model = configure_peft_model(model, target_modules=["q_proj", "down_proj"])
+# Prepare dataset
+nsamples = 1000
+dataset = load_and_prepare_dataset(
+    "lavita/ChatDoctor-HealthCareMagic-100k",
+    nsamples,
+    formatting_prompts_func,
+    config.ALPACA_PROMPT_TEMPLATE,
+    eos_token,
+)
+# Train model
+trainer_stats = train_model(
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=max_seq_length,
+    dataset_num_proc=2,
+    packing=False,
+    training_args=config.TRAIN_ARGS,
+)
+# Save the model
+save_model_and_tokenizer(model, tokenizer, "./llama3_medichat")

src/inference.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from transformers import TextStreamer
+def prepare_inference_inputs(tokenizer, template, instruction, input_text, eos_token, device="cuda"):
+    """
+    Prepares the inputs for inference by formatting the prompt and tokenizing it.
+    Args:
+    - tokenizer: The tokenizer used for tokenization.
+    - template: The template string for the prompt format.
+    - instruction: The instruction to be included in the prompt.
+    - input_text: The input to be included in the prompt.
+    - eos_token: The end of sequence token.
+    - device: The device for the model ('cuda' or 'cpu').
+    Returns:
+    - Tokenized inputs ready for inference.
+    """
+    prompt = template.format(instruction, input_text, "") + eos_token
+    return tokenizer([prompt], return_tensors="pt").to(device)
+def generate_responses(model, inputs, tokenizer, max_new_tokens=64):
+    """
+    Generates responses from the model based on the provided inputs.
+    Args:
+    - model: The pre-trained model for generation.
+    - inputs: The tokenized inputs to generate responses.
+    - tokenizer: The tokenizer used to decode the output.
+    - max_new_tokens: The maximum number of tokens to generate.
+    Returns:
+    - Decoded responses from the model.
+    """
+    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, use_cache=True)
+    return tokenizer.batch_decode(outputs, skip_special_tokens=True)
+def stream_responses(model, inputs, tokenizer, max_new_tokens=128):
+    """
+    Streams the model's response using a text streamer.
+    Args:
+    - model: The pre-trained model for generation.
+    - inputs: The tokenized inputs to generate responses.
+    - tokenizer: The tokenizer used to decode the output.
+    - max_new_tokens: The maximum number of tokens to generate.
+    Returns:
+    - Streams the output directly.
+    """
+    text_streamer = TextStreamer(tokenizer)
+    model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens)

src/model.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+from unsloth import FastLanguageModel
+def load_model(model_name, max_seq_length, dtype, load_in_4bit, device_map):
+    try:
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=model_name,
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+            device_map=device_map,
+        )
+        return model, tokenizer
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model {model_name}: {e}")
+def configure_peft_model(model, target_modules, lora_alpha=16, lora_dropout=0, random_state=3407, use_rslora=False):
+    try:
+        peft_model = FastLanguageModel.get_peft_model(
+            model=model,
+            target_modules=target_modules,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            bias="none",
+            use_gradient_checkpointing="unsloth",
+            random_state=random_state,
+            use_rslora=use_rslora,
+            loftq_config=None,
+        )
+        return peft_model
+    except Exception as e:
+        raise RuntimeError(f"Failed to configure PEFT model: {e}")

src/save_model.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+def save_model_and_tokenizer(model, tokenizer, save_directory):
+    """
+    Save model and tokenizer to the specified directory.
+    Args:
+    - model: The model to save.
+    - tokenizer: The tokenizer to save.
+    - save_directory: Directory where the model and tokenizer should be saved.
+    """
+    try:
+        # Ensure the save directory exists
+        os.makedirs(save_directory, exist_ok=True)
+        # Save model and tokenizer
+        model.save_pretrained(save_directory, safe_serialization=True)
+        tokenizer.save_pretrained(save_directory)
+        print(f"Model and tokenizer saved locally at {save_directory}")
+    except Exception as e:
+        print(f"Error saving model and tokenizer: {str(e)}")
+        raise

src/training.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from trl import SFTTrainer
+from transformers import TrainingArguments
+def train_model(model, tokenizer, train_dataset, dataset_text_field, max_seq_length, dataset_num_proc, packing, training_args):
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=train_dataset,
+        dataset_text_field=dataset_text_field,
+        max_seq_length=max_seq_length,
+        dataset_num_proc=dataset_num_proc,
+        packing=packing,
+        args=TrainingArguments(**training_args),
+    )
+    # Train the model
+    train_results = trainer.train()
+    # Optionally, you can return more specific training information if necessary
+    return train_results

tests/__init__.py ADDED Viewed

File without changes

tests/__pycache__/test_config.cpython-312.pyc ADDED Viewed

Binary file (4.25 kB). View file

tests/__pycache__/test_dataset.cpython-312.pyc ADDED Viewed

Binary file (1.66 kB). View file

tests/__pycache__/test_inference.cpython-312.pyc ADDED Viewed

Binary file (1.86 kB). View file

tests/__pycache__/test_model.cpython-312.pyc ADDED Viewed

Binary file (1.83 kB). View file

tests/__pycache__/test_save_model.cpython-312.pyc ADDED Viewed

Binary file (2.89 kB). View file

tests/__pycache__/test_training.cpython-312.pyc ADDED Viewed

Binary file (1.86 kB). View file

tests/test_config.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import pytest
+import torch
+from src.config import (MAX_SEQ_LENGTH, DTYPE, LOAD_IN_4BIT, DEVICE_MAP, EOS_TOKEN,
+                        ALPACA_PROMPT_TEMPLATE, TRAIN_ARGS)
+# Test that required configuration keys are present
+def test_required_config_keys():
+    assert MAX_SEQ_LENGTH is not None, "MAX_SEQ_LENGTH is not set."
+    assert TRAIN_ARGS is not None, "TRAIN_ARGS is not set."
+    assert ALPACA_PROMPT_TEMPLATE is not None, "ALPACA_PROMPT_TEMPLATE is not set."
+    assert DEVICE_MAP is not None, "DEVICE_MAP is not set."
+# Test that MAX_SEQ_LENGTH is a power of two
+def test_max_seq_length():
+    assert isinstance(MAX_SEQ_LENGTH, int), "MAX_SEQ_LENGTH should be an integer."
+    assert MAX_SEQ_LENGTH > 0, "MAX_SEQ_LENGTH should be greater than 0."
+    assert (MAX_SEQ_LENGTH & (MAX_SEQ_LENGTH - 1)) == 0, "MAX_SEQ_LENGTH should be a power of two."
+# Test that TRAIN_ARGS dictionary contains required fields and types
+def test_train_args():
+    required_keys = [
+        "per_device_train_batch_size",
+        "gradient_accumulation_steps",
+        "warmup_steps",
+        "max_steps",
+        "learning_rate",
+        "fp16",
+        "bf16",
+        "logging_steps",
+        "optim",
+        "weight_decay",
+        "lr_scheduler_type",
+        "seed",
+        "output_dir"
+    ]
+    for key in required_keys:
+        assert key in TRAIN_ARGS, f"Missing {key} in TRAIN_ARGS."
+    # Check types of specific fields
+    assert isinstance(TRAIN_ARGS["per_device_train_batch_size"], int), "per_device_train_batch_size should be an integer."
+    assert isinstance(TRAIN_ARGS["learning_rate"], float), "learning_rate should be a float."
+    assert isinstance(TRAIN_ARGS["output_dir"], str), "output_dir should be a string."
+# Test that the DEVICE_MAP references a valid CUDA device
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_device_map():
+    device = DEVICE_MAP.get('', None)
+    assert device is not None, "DEVICE_MAP should reference a CUDA device."
+    assert isinstance(device, int), "DEVICE_MAP should be an integer (CUDA device ID)."
+    assert torch.cuda.is_available(), "CUDA is not available, but DEVICE_MAP points to a CUDA device."
+# Test that the EOS_TOKEN is set dynamically based on the tokenizer
+def test_eos_token():
+    assert EOS_TOKEN is not None, "EOS_TOKEN should be dynamically set based on tokenizer."
+# Test the ALPACA_PROMPT_TEMPLATE for expected formatting
+def test_alpaca_prompt_template():
+    test_instruction = "Test Instruction"
+    test_input = "Test Input"
+    test_output = "Test Output"
+    formatted_prompt = ALPACA_PROMPT_TEMPLATE.format(test_instruction, test_input, test_output)
+    # Ensure that the prompt template contains the required placeholders
+    assert "{}" in formatted_prompt, "ALPACA_PROMPT_TEMPLATE should contain placeholders."
+    assert "###Instruction:" in formatted_prompt, "ALPACA_PROMPT_TEMPLATE should contain '###Instruction'."
+    assert "###Input:" in formatted_prompt, "ALPACA_PROMPT_TEMPLATE should contain '###Input'."
+    assert "###Response:" in formatted_prompt, "ALPACA_PROMPT_TEMPLATE should contain '###Response'."
+# Test that the LOAD_IN_4BIT setting is a boolean
+def test_load_in_4bit():
+    assert isinstance(LOAD_IN_4BIT, bool), "LOAD_IN_4BIT should be a boolean."
+# Test for the DTYPE (should be None or a valid data type)
+def test_dtype():
+    assert DTYPE is None or isinstance(DTYPE, type), "DTYPE should be None or a valid data type."

tests/test_dataset.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from src.dataset import formatting_prompts_func
+def test_formatting_prompts_func():
+    # Test case with basic input
+    examples = {
+        "instruction": ["Test instruction"],
+        "input": ["Test input"],
+        "output": ["Test output"],
+    }
+    template = "Instruction: {}\nInput: {}\nOutput: {}"
+    eos_token = "<EOS>"
+    result = formatting_prompts_func(examples, template, eos_token)
+    # Check if result contains the 'text' key
+    assert "text" in result
+    # Check if result contains exactly one formatted entry
+    assert len(result["text"]) == 1
+    # Check if the formatted text is correct
+    expected = "Instruction: Test instruction\nInput: Test input\nOutput: Test output<EOS>"
+    assert result["text"][0] == expected
+    # Test with empty inputs (edge case)
+    examples_empty = {
+        "instruction": [""],
+        "input": [""],
+        "output": [""],
+    }
+    result_empty = formatting_prompts_func(examples_empty, template, eos_token)
+    assert result_empty["text"][0] == "Instruction: \nInput: \nOutput: <EOS>"
+    # Test with multiple examples
+    examples_multi = {
+        "instruction": ["Test instruction 1", "Test instruction 2"],
+        "input": ["Test input 1", "Test input 2"],
+        "output": ["Test output 1", "Test output 2"],
+    }
+    result_multi = formatting_prompts_func(examples_multi, template, eos_token)
+    assert len(result_multi["text"]) == 2
+    assert result_multi["text"][0] == "Instruction: Test instruction 1\nInput: Test input 1\nOutput: Test output 1<EOS>"
+    assert result_multi["text"][1] == "Instruction: Test instruction 2\nInput: Test input 2\nOutput: Test output 2<EOS>"

tests/test_inference.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from src.inference import prepare_inference_inputs, generate_responses
+from src.model import load_model
+import pytest
+import torch
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_gpu_feature():
+    # Your test code that needs a GPU
+    assert torch.cuda.is_available()
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+@pytest.fixture
+def model_and_tokenizer():
+    """Fixture to load model and tokenizer for inference"""
+    model_name = "unsloth/Meta-Llama-3.1-8B"
+    model, tokenizer = load_model(model_name, 16, None, True, {'': 0})
+    return model, tokenizer
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_inference(model_and_tokenizer):
+    model, tokenizer = model_and_tokenizer
+    # Test input values
+    instruction = "What is your name?"
+    input_text = "Tell me about yourself."
+    eos_token = "<EOS>"
+    # Prepare inference inputs
+    inputs = prepare_inference_inputs(tokenizer, "Instruction: {}\nInput: {}", instruction, input_text, eos_token)
+    # Generate responses
+    responses = generate_responses(model, inputs, tokenizer, max_new_tokens=32)
+    # Assertions
+    assert isinstance(responses, list), f"Expected list, but got {type(responses)}"
+    assert len(responses) > 0, "Expected non-empty responses list"
+    assert isinstance(responses[0], str), f"Expected string, but got {type(responses[0])}"
+    assert len(responses[0]) > 0, "Expected non-empty string response"
+    # Optionally, assert that the response matches some expected pattern or content
+    assert "name" in responses[0].lower(), "Response does not contain expected content"

tests/test_model.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from src.model import load_model, configure_peft_model
+import torch
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_gpu_feature():
+    # Your test code that needs a GPU
+    assert torch.cuda.is_available()
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_load_model():
+    model_name = "unsloth/Meta-Llama-3.1-8B"
+    model, tokenizer = load_model(model_name, 16, None, True, {'': 0})
+    # Check that model and tokenizer are not None
+    assert model is not None
+    assert tokenizer is not None
+    # Check that model is on the correct device (e.g., GPU or CPU)
+    assert next(model.parameters()).device == torch.device('cuda:0'), "Model should be loaded on CUDA device"
+    # Check that the tokenizer is an instance of the correct class
+    assert hasattr(tokenizer, "encode"), "Tokenizer should have the 'encode' method"
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_configure_peft_model():
+    model_name = "unsloth/Meta-Llama-3.1-8B"
+    model, _ = load_model(model_name, 16, None, True, {'': 0})
+    # Configure the PEFT model
+    peft_model = configure_peft_model(model, target_modules=["q_proj", "down_proj"])
+    # Check that PEFT model is not None
+    assert peft_model is not None, "PEFT model should not be None"
+    # Check that the PEFT model has a forward method
+    assert hasattr(peft_model, "forward"), "PEFT model should have a 'forward' method"
+    # Ensure that PEFT model can perform a forward pass (check if no error is raised)
+    try:
+        dummy_input = torch.randint(0, 1000, (1, 16))  # Dummy input tensor
+        peft_model(dummy_input)
+    except Exception as e:
+        pytest.fail(f"PEFT model forward pass failed: {e}")

tests/test_save_model.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import pytest
+from src.save_model import save_model_and_tokenizer
+from src.model import load_model
+import torch
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_gpu_feature():
+    # Your test code that needs a GPU
+    assert torch.cuda.is_available()
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+@pytest.fixture
+def model_and_tokenizer():
+    """Fixture to load the model and tokenizer for saving."""
+    model_name = "unsloth/Meta-Llama-3.1-8B"
+    model, tokenizer = load_model(model_name, 16, None, True, {'': 0})
+    return model, tokenizer
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_save_model(model_and_tokenizer):
+    model, tokenizer = model_and_tokenizer
+    save_directory = "./test_save_dir"
+    # Save model and tokenizer
+    save_model_and_tokenizer(model, tokenizer, save_directory)
+    # Check if the directory exists
+    assert os.path.exists(save_directory), f"Directory {save_directory} does not exist"
+    # Check for key model files
+    assert os.path.exists(os.path.join(save_directory, "config.json")), "config.json not found"
+    assert os.path.exists(os.path.join(save_directory, "tokenizer_config.json")), "tokenizer_config.json not found"
+    assert os.path.exists(os.path.join(save_directory, "pytorch_model.bin")), "pytorch_model.bin not found"
+    # Check that files are not empty
+    assert os.path.getsize(os.path.join(save_directory, "pytorch_model.bin")) > 0, "pytorch_model.bin is empty"
+    assert os.path.getsize(os.path.join(save_directory, "config.json")) > 0, "config.json is empty"
+    assert os.path.getsize(os.path.join(save_directory, "tokenizer_config.json")) > 0, "tokenizer_config.json is empty"
+    # Cleanup after test
+    for file in os.listdir(save_directory):
+        file_path = os.path.join(save_directory, file)
+        if os.path.isfile(file_path):
+            os.remove(file_path)
+    os.rmdir(save_directory)

tests/test_training.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from src.training import train_model
+from src.model import load_model
+from src.dataset import formatting_prompts_func
+from datasets import Dataset
+import pytest
+import torch
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_gpu_feature():
+    # Your test code that needs a GPU
+    assert torch.cuda.is_available()
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+@pytest.fixture
+def mock_dataset():
+    """Fixture to provide a mock dataset for training"""
+    data = {
+        "instruction": ["Test instruction 1", "Test instruction 2"],
+        "input": ["Test input 1", "Test input 2"],
+        "output": ["Test output 1", "Test output 2"]
+    }
+    formatted_data = formatting_prompts_func(data, template="Instruction: {}\nInput: {}\nOutput: {}", eos_token="<EOS>")
+    return Dataset.from_dict(formatted_data)
+@pytest.mark.gpu
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
+def test_train_model(mock_dataset):
+    """Test to ensure the training model function works with a mock dataset"""
+    # Load model
+    model_name = "unsloth/Meta-Llama-3.1-8B"
+    model, tokenizer = load_model(model_name, 16, None, True, {'': 0})
+    # Training arguments
+    training_args = {
+        "max_steps": 1,
+        "output_dir": "outputs"
+    }
+    # Train the model
+    train_stats = train_model(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=mock_dataset,
+        dataset_text_field="text",
+        max_seq_length=16,
+        dataset_num_proc=1,
+        packing=False,
+        training_args=training_args
+    )
+    # Assert that training statistics are returned
+    assert train_stats is not None
+    # Optionally, check for specific fields in `train_stats` (e.g., loss, global_step)
+    # Since trainer.train() returns an object that has 'global_step' and 'train_loss', we can assert them
+    assert hasattr(train_stats, "global_step")
+    assert hasattr(train_stats, "train_loss")
+    # For further validation, assert that the model directory was created (outputs directory)
+    assert "outputs" in train_stats.args.output_dir