Spaces:
Paused
Paused
| import subprocess | |
| import os, requests | |
| import torch, torchvision | |
| import spaces | |
| from huggingface_hub import login | |
| from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration | |
| # Install required package | |
| def install_flash_attn(): | |
| subprocess.run( | |
| "pip install flash-attn --no-build-isolation", | |
| env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
| shell=True, | |
| ) | |
| # Authenticate with Hugging Face | |
| def authenticate_hf(token): | |
| login(token=token, add_to_git_credential=True) | |
| # Function to get the model summary | |
| model_cache = {} | |
| def get_model_summary(model_name): | |
| if model_name in model_cache: | |
| return model_cache[model_name], "" | |
| try: | |
| # Fetch the config.json file | |
| config_url = f"https://huggingface.co/{model_name}/raw/main/config.json" | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
| response = requests.get(config_url, headers=headers) | |
| response.raise_for_status() | |
| config = response.json() | |
| architecture = config["architectures"][0] | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Check if the model is quantized | |
| is_quantized = "quantized" in model_name.lower() | |
| # Set up BitsAndBytesConfig if the model is quantized | |
| bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None | |
| # Load the model based on its architecture and quantization status | |
| if architecture == "LlavaNextForConditionalGeneration": | |
| model = LlavaNextForConditionalGeneration.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| elif architecture == "LlavaForConditionalGeneration": | |
| model = LlavaForConditionalGeneration.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| elif architecture == "PaliGemmaForConditionalGeneration": | |
| model = PaliGemmaForConditionalGeneration.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| elif architecture == "Idefics2ForConditionalGeneration": | |
| model = Idefics2ForConditionalGeneration.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| else: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| # Move to device only if the model is not quantized | |
| if not is_quantized: | |
| model = model.to(device) | |
| model_summary = str(model) | |
| model_cache[model_name] = model_summary | |
| return model_summary, "" | |
| except Exception as e: | |
| return "", str(e) | |