Spaces:
Runtime error
Runtime error
| from datetime import datetime | |
| from functools import lru_cache | |
| from typing import List, Optional | |
| from datasets import Dataset | |
| from huggingface_hub import list_models | |
| from pydantic import BaseModel, field_validator | |
| from toolz import groupby | |
| from tqdm.auto import tqdm | |
| def get_all_models(): | |
| models = list( | |
| tqdm( | |
| iter( | |
| list_models( | |
| cardData=True, full=True, limit=None, sort="downloads", direction=-1 | |
| ) | |
| ) | |
| ) | |
| ) | |
| return [model for model in models if model is not None] | |
| def has_base_model_info(model): | |
| return bool(hasattr(model.card_data, "base_model")) | |
| class HubModel(BaseModel): | |
| author: Optional[str] = None | |
| last_modified: Optional[datetime] = None | |
| createdAt: Optional[datetime] = None | |
| downloads: Optional[int] = None | |
| likes: Optional[int] = None | |
| library_name: Optional[str] = None | |
| modelId: Optional[str] = None | |
| datasets: Optional[List[str]] = None | |
| language: Optional[List[str]] = None | |
| base_model: Optional[str] = None | |
| def ensure_list(cls, v): | |
| return [v] if isinstance(v, str) else v | |
| def from_original(cls, original_data: dict) -> "HubModel": | |
| card_data = original_data.get("card_data", {}) | |
| if card_data is None: | |
| card_data = {} | |
| if not isinstance(card_data, dict): | |
| card_data = card_data.__dict__ | |
| return cls( | |
| author=original_data.get("author"), | |
| last_modified=original_data.get("last_modified"), | |
| createdAt=original_data.get("createdAt"), | |
| downloads=original_data.get("downloads"), | |
| likes=original_data.get("likes"), | |
| library_name=original_data.get("library_name"), | |
| modelId=original_data.get("modelId"), | |
| datasets=card_data.get("datasets"), | |
| language=card_data.get("language"), | |
| base_model=card_data.get("base_model"), | |
| ) | |
| def load_data(): | |
| grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models()) | |
| models_with_base_model_info = grouped_by_has_base_model_info.get(True) | |
| models_without_base_models = grouped_by_has_base_model_info.get(False) | |
| parsed_models = [ | |
| HubModel.from_original(model.__dict__).model_dump() | |
| for model in models_with_base_model_info | |
| ] | |
| base_models = {model["base_model"] for model in parsed_models} | |
| base_models = [ | |
| model for model in tqdm(models_without_base_models) if model.id in base_models | |
| ] | |
| base_models = [model for model in base_models if model is not None] | |
| parsed_base_models = [ | |
| HubModel.from_original(model.__dict__).model_dump() for model in base_models | |
| ] | |
| ds = Dataset.from_list(parsed_models + parsed_base_models) | |
| ds.push_to_hub("davanstrien/hub_models_with_base_model_info") | |
| print("Pushed to hub") | |
| return ds | |