Spaces:
Sleeping
Sleeping
| import os | |
| from dataclasses import dataclass | |
| class ModelConfig: | |
| # Whisper ASR | |
| whisper_model: str = "openai/whisper-medium" | |
| whisper_language: str = "id" | |
| # Speaker Diarization | |
| diarization_model: str = "pyannote/speaker-diarization-3.1" | |
| min_speakers: int = 1 | |
| max_speakers: int = 10 | |
| # Text Processing | |
| summarization_model: str = "bert-base-multilingual-cased" | |
| ner_model: str = "cahya/bert-base-indonesian-NER" | |
| keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2" | |
| # Processing Parameters | |
| chunk_size: int = 3000 | |
| chunk_overlap: int = 200 | |
| summary_ratio: float = 0.3 | |
| max_summary_sentences: int = 6 | |
| # Output | |
| output_formats: list = None | |
| def __post_init__(self): | |
| if self.output_formats is None: | |
| self.output_formats = ["markdown", "json", "html"] | |
| # Set HF token from environment | |
| self.hf_token = os.environ.get("HF_TOKEN", None) | |
| # Global config instance | |
| config = ModelConfig() |