Spaces:

konieshadow
/

podcast-transcriber

Running on Zero

App Files Files Community

konieshadow commited on 18 days ago

Commit

924aa01

1 Parent(s): 48811fe

更新LLM模型为google/gemma-3-4b-it，移除不再使用的Phi-4模型，优化设备参数支持，增强说话人识别器的日志记录功能。

Browse files

Files changed (9) hide show

examples/combined_podcast_transcription.py +4 -10
examples/simple_llm.py +5 -5
examples/simple_speaker_identify.py +4 -2
src/podcast_transcribe/llm/llm_base.py +0 -7
src/podcast_transcribe/llm/llm_gemma_transfomers.py +3 -2
src/podcast_transcribe/llm/llm_phi4_transfomers.py +0 -369
src/podcast_transcribe/llm/llm_router.py +23 -32
src/podcast_transcribe/summary/speaker_identify.py +9 -2
src/podcast_transcribe/transcriber.py +12 -20

examples/combined_podcast_transcription.py CHANGED Viewed

@@ -9,9 +9,6 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from src.podcast_transcribe.transcriber import transcribe_podcast_audio
 from src.podcast_transcribe.audio import load_audio
 from src.podcast_transcribe.rss.podcast_rss_parser import parse_rss_xml_content
-from podcast_transcribe.llm.llm_gemma_mlx import GemmaMLXChatCompletion
-from src.podcast_transcribe.schemas import EnhancedSegment, CombinedTranscriptionResult
-from src.podcast_transcribe.summary.speaker_identify import recognize_speaker_names
 def main():
     """主函数"""
@@ -20,9 +17,10 @@ def main():
     # audio_file = Path("/Users/konie/Desktop/voices/lex_ai_john_carmack_30.wav")
     # 模型配置
-    asr_model_name = "mlx-community/"  # ASR模型名称
     diarization_model_name = "pyannote/speaker-diarization-3.1"  # 说话人分离模型名称
-    llm_model_path = "mlx-community/gemma-3-12b-it-4bit-DWQ"
     device = "mps"  # 设备类型
     segmentation_batch_size = 64
     parallel = True
@@ -60,13 +58,9 @@ def main():
     result = transcribe_podcast_audio(audio,
                              podcast_info=mock_podcast_info,
                              episode_info=mock_episode_info,
-                             asr_model_name=asr_model_name,
-                             diarization_model_name=diarization_model_name,
-                             llm_model_name=llm_model_path,
                              device=device,
                              segmentation_batch_size=segmentation_batch_size,
-                             parallel=parallel,
-                             llm_model_name=llm_model_path)
     # 输出结果
     print("\n转录结果:")

 from src.podcast_transcribe.transcriber import transcribe_podcast_audio
 from src.podcast_transcribe.audio import load_audio
 from src.podcast_transcribe.rss.podcast_rss_parser import parse_rss_xml_content
 def main():
     """主函数"""
     # audio_file = Path("/Users/konie/Desktop/voices/lex_ai_john_carmack_30.wav")
     # 模型配置
+    asr_model_name = "distil-whisper/distil-large-v3.5"  # ASR模型名称
     diarization_model_name = "pyannote/speaker-diarization-3.1"  # 说话人分离模型名称
+    llm_model_name = "google/gemma-3-4b-it"
+    llm_provider = "gemma-transformers"
     device = "mps"  # 设备类型
     segmentation_batch_size = 64
     parallel = True
     result = transcribe_podcast_audio(audio,
                              podcast_info=mock_podcast_info,
                              episode_info=mock_episode_info,
                              device=device,
                              segmentation_batch_size=segmentation_batch_size,
+                             parallel=parallel,)
     # 输出结果
     print("\n转录结果:")

examples/simple_llm.py CHANGED Viewed

@@ -1,11 +1,10 @@
 # 添加项目根目录到Python路径
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
-from src.podcast_transcribe.llm.llm_phi4_transfomers import Phi4TransformersChatCompletion
 from src.podcast_transcribe.llm.llm_gemma_mlx import GemmaMLXChatCompletion
 from src.podcast_transcribe.llm.llm_gemma_transfomers import GemmaTransformersChatCompletion
@@ -14,6 +13,7 @@ if __name__ == "__main__":
     # 示例用法：
     print("正在初始化 LLM 聊天补全...")
     try:
         model_name = "google/gemma-3-4b-it"
         use_4bit_quantization = False
         device = "mps"
@@ -22,10 +22,10 @@ if __name__ == "__main__":
         # 或者，如果您有更小、更快的模型，可以尝试使用，例如："mlx-community/gemma-2b-it-8bit"
         if model_name.startswith("mlx-community"):
             gemma_chat = GemmaMLXChatCompletion(model_name=model_name)
-        elif model_name.startswith("microsoft"):
-            gemma_chat = Phi4TransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization, device=device)
         else:
-            gemma_chat = GemmaTransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization, device=device)
         print("\n--- 示例 1: 简单用户查询 ---")
         messages_example1 = [

+import torch # 导入 torch
 # 添加项目根目录到Python路径
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from src.podcast_transcribe.llm.llm_gemma_mlx import GemmaMLXChatCompletion
 from src.podcast_transcribe.llm.llm_gemma_transfomers import GemmaTransformersChatCompletion
     # 示例用法：
     print("正在初始化 LLM 聊天补全...")
     try:
+        # model_name = "mlx-community/gemma-3-12b-it-4bit-DWQ"
         model_name = "google/gemma-3-4b-it"
         use_4bit_quantization = False
         device = "mps"
         # 或者，如果您有更小、更快的模型，可以尝试使用，例如："mlx-community/gemma-2b-it-8bit"
         if model_name.startswith("mlx-community"):
             gemma_chat = GemmaMLXChatCompletion(model_name=model_name)
         else:
+            # 如果设备是 mps，则使用 float32 以增加稳定性
+            dtype_to_use = torch.float32 if device == "mps" else torch.float16
+            gemma_chat = GemmaTransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization, device=device, torch_dtype=dtype_to_use)
         print("\n--- 示例 1: 简单用户查询 ---")
         messages_example1 = [

examples/simple_speaker_identify.py CHANGED Viewed

@@ -13,6 +13,7 @@ from src.podcast_transcribe.summary.speaker_identify import SpeakerIdentifier
 if __name__ == '__main__':
     transcribe_result_dump_file = Path.joinpath(Path(__file__).parent, "output", "lex_ai_john_carmack_1.transcription.json")
     podcast_rss_xml_file = Path.joinpath(Path(__file__).parent, "input", "lexfridman.com.rss.xml")
     # Load the transcription result
     if not os.path.exists(transcribe_result_dump_file):
@@ -57,8 +58,9 @@ if __name__ == '__main__':
     speaker_identifier = SpeakerIdentifier(
-        llm_model_name="mlx-community/gemma-3-12b-it-4bit-DWQ",
-        llm_provider="gemma-mlx"
     )
     # 3. Call the function

 if __name__ == '__main__':
     transcribe_result_dump_file = Path.joinpath(Path(__file__).parent, "output", "lex_ai_john_carmack_1.transcription.json")
     podcast_rss_xml_file = Path.joinpath(Path(__file__).parent, "input", "lexfridman.com.rss.xml")
+    device = "mps"
     # Load the transcription result
     if not os.path.exists(transcribe_result_dump_file):
     speaker_identifier = SpeakerIdentifier(
+        llm_model_name="google/gemma-3-4b-it",
+        llm_provider="gemma-transformers",
+        device=device
     )
     # 3. Call the function

src/podcast_transcribe/llm/llm_base.py CHANGED Viewed

@@ -146,19 +146,12 @@ class BaseChatCompletion(ABC):
         temperature: float = 0.7,
         max_tokens: int = 2048,
         top_p: float = 1.0,
-        model: Optional[str] = None,
         **kwargs,
     ):
         """
         创建聊天完成响应。
         模仿OpenAI的ChatCompletion.create方法。
         """
-        if model and model != self.model_name:
-            # 这是一个简化的处理。在实际场景中，您可能希望加载新模型。
-            # 目前，我们将只打印一个警告并使用初始化的模型。
-            print(f"警告: 'model' 参数 ({model}) 与初始化的模型 ({self.model_name}) 不同。"
-                  f"正在使用初始化的模型。要使用不同的模型，请重新初始化该类。")
         # 为Gemma格式化消息
         prompt_str = self._format_messages_for_gemma(messages)

         temperature: float = 0.7,
         max_tokens: int = 2048,
         top_p: float = 1.0,
         **kwargs,
     ):
         """
         创建聊天完成响应。
         模仿OpenAI的ChatCompletion.create方法。
         """
         # 为Gemma格式化消息
         prompt_str = self._format_messages_for_gemma(messages)

src/podcast_transcribe/llm/llm_gemma_transfomers.py CHANGED Viewed

@@ -13,7 +13,8 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
         use_4bit_quantization: bool = False,
         device_map: Optional[str] = None,
         device: Optional[str] = None,
-        trust_remote_code: bool = True
     ):
         # Gemma 使用 float16 作为默认数据类型
         super().__init__(
@@ -22,7 +23,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
             device_map=device_map,
             device=device,
             trust_remote_code=trust_remote_code,
-            torch_dtype=torch.float16
         )
     def _print_error_hints(self):

         use_4bit_quantization: bool = False,
         device_map: Optional[str] = None,
         device: Optional[str] = None,
+        trust_remote_code: bool = True,
+        torch_dtype: Optional[torch.dtype] = None
     ):
         # Gemma 使用 float16 作为默认数据类型
         super().__init__(
             device_map=device_map,
             device=device,
             trust_remote_code=trust_remote_code,
+            torch_dtype=torch_dtype if torch_dtype is not None else torch.float16
         )
     def _print_error_hints(self):

src/podcast_transcribe/llm/llm_phi4_transfomers.py DELETED Viewed

@@ -1,369 +0,0 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from typing import List, Dict, Optional, Union, Literal
-from .llm_base import TransformersBaseChatCompletion
-class Phi4TransformersChatCompletion(TransformersBaseChatCompletion):
-    """基于 Transformers 库的 Phi-4-mini-reasoning 聊天完成实现"""
-    def __init__(
-        self,
-        model_name: str = "microsoft/Phi-4-mini-reasoning",
-        use_4bit_quantization: bool = False,
-        device_map: Optional[str] = None,
-        device: Optional[str] = None,
-        trust_remote_code: bool = True
-    ):
-        # Phi-4 使用 bfloat16 作为推荐数据类型
-        super().__init__(
-            model_name=model_name,
-            use_4bit_quantization=use_4bit_quantization,
-            device_map=device_map,
-            device=device,
-            trust_remote_code=trust_remote_code,
-            torch_dtype=torch.bfloat16
-        )
-    def _print_error_hints(self):
-        """打印Phi-4特定的错误提示信息"""
-        super()._print_error_hints()
-        print("Phi-4 特殊要求:")
-        print("- 建议使用 Transformers >= 4.51.3")
-        print("- 推荐使用 bfloat16 数据类型")
-        print("- 模型支持 128K token 上下文长度")
-    def _format_phi4_messages(self, messages: List[Dict[str, str]]) -> str:
-        """
-        格式化消息为 Phi-4 的聊天格式
-        Phi-4 使用特定的聊天模板格式
-        """
-        # 使用 tokenizer 的内置聊天模板
-        if hasattr(self.tokenizer, 'apply_chat_template'):
-            return self.tokenizer.apply_chat_template(
-                messages,
-                tokenize=False,
-                add_generation_prompt=True
-            )
-        else:
-            # 如果没有聊天模板，使用 Phi-4 的标准格式
-            formatted_prompt = ""
-            for message in messages:
-                role = message.get("role", "user")
-                content = message.get("content", "")
-                if role == "system":
-                    formatted_prompt += f"<|system|>\n{content}<|end|>\n"
-                elif role == "user":
-                    formatted_prompt += f"<|user|>\n{content}<|end|>\n"
-                elif role == "assistant":
-                    formatted_prompt += f"<|assistant|>\n{content}<|end|>\n"
-            # 添加助手开始标记
-            formatted_prompt += "<|assistant|>\n"
-            return formatted_prompt
-    def _generate_response(
-        self,
-        prompt_str: str,
-        temperature: float,
-        max_tokens: int,
-        top_p: float,
-        enable_reasoning: bool = True,
-        **kwargs
-    ) -> str:
-        """使用 transformers 生成响应，针对 Phi-4 推理功能优化"""
-        # 对提示进行编码
-        inputs = self.tokenizer.encode(prompt_str, return_tensors="pt")
-        # 移动输入到正确的设备
-        if self.device_map is None or self.device.type == "mps":
-            inputs = inputs.to(self.device)
-        # Phi-4-mini-reasoning 优化的生成参数
-        generation_config = {
-            "max_new_tokens": min(max_tokens, 32768),  # Phi-4-mini 支持最大 32K token
-            "temperature": temperature,
-            "top_p": top_p,
-            "do_sample": True if temperature > 0 else False,
-            "pad_token_id": self.tokenizer.pad_token_id,
-            "eos_token_id": self.tokenizer.eos_token_id,
-            "repetition_penalty": kwargs.get("repetition_penalty", 1.1),
-            "no_repeat_ngram_size": kwargs.get("no_repeat_ngram_size", 3),
-        }
-        # 推理模式配置
-        if enable_reasoning and "reasoning" in self.model_name.lower():
-            # 为推理任务优化的配置
-            generation_config.update({
-                "temperature": max(temperature, 0.1),  # 推理模式下保持一定的温度
-                "top_p": min(top_p, 0.95),  # 推理模式下限制 top_p
-                "do_sample": True,  # 推理模式下总是启用采样
-                "early_stopping": False,  # 允许完整的推理过程
-            })
-        # 如果温度为0，使用贪婪解码
-        if temperature == 0:
-            generation_config["do_sample"] = False
-            generation_config.pop("temperature", None)
-            generation_config.pop("top_p", None)
-        try:
-            # 生成响应
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    inputs,
-                    **generation_config
-                )
-            # 解码生成的文本，跳过输入部分
-            generated_tokens = outputs[0][len(inputs[0]):]
-            generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
-            return generated_text
-        except Exception as e:
-            print(f"生成响应时出错: {e}")
-            raise
-    def create(
-        self,
-        messages: List[Dict[str, str]],
-        temperature: float = 0.7,
-        max_tokens: int = 2048,
-        top_p: float = 1.0,
-        model: Optional[str] = None,
-        enable_reasoning: bool = True,
-        **kwargs,
-    ):
-        """
-        创建聊天完成响应，支持Phi-4特有的推理功能
-        """
-        if model and model != self.model_name:
-            print(f"警告: 'model' 参数 ({model}) 与初始化的模型 ({self.model_name}) 不同。"
-                  f"正在使用初始化的模型。要使用不同的模型，请重新初始化该类。")
-        # 检查是否为推理任务
-        is_reasoning_task = self._is_reasoning_task(messages)
-        # 格式化消息为 Phi-4 聊天格式
-        if is_reasoning_task and enable_reasoning:
-            prompt_str = self._format_reasoning_prompt(messages)
-        else:
-            prompt_str = self._format_phi4_messages(messages)
-        # 生成响应
-        response_text = self._generate_response(
-            prompt_str,
-            temperature,
-            max_tokens,
-            top_p,
-            enable_reasoning=enable_reasoning and is_reasoning_task,
-            **kwargs
-        )
-        # 后处理响应（使用基类的方法，但针对Phi-4调整）
-        assistant_message_content = self._post_process_phi4_response(response_text, prompt_str)
-        # 计算token使用量
-        token_usage = self._calculate_tokens(prompt_str, assistant_message_content)
-        # 构建响应对象
-        response = self._build_chat_completion_response(assistant_message_content, token_usage)
-        # 添加Phi-4特有的信息
-        response["reasoning_enabled"] = enable_reasoning and is_reasoning_task
-        return response
-    def _post_process_phi4_response(self, response_text: str, prompt_str: str) -> str:
-        """
-        后处理Phi-4生成的响应文本
-        """
-        # Phi-4的输出通常不包含输入提示，直接返回生成的内容
-        assistant_message_content = response_text.strip()
-        # 清理可能的特殊标记
-        if assistant_message_content.endswith("<|end|>"):
-            assistant_message_content = assistant_message_content[:-7].strip()
-        return assistant_message_content
-    def _is_reasoning_task(self, messages: List[Dict[str, str]]) -> bool:
-        """检测是否为推理任务"""
-        reasoning_keywords = [
-            "解题", "推理", "计算", "证明", "分析", "逻辑", "步骤",
-            "solve", "reasoning", "calculate", "prove", "analyze", "logic", "step"
-        ]
-        for message in messages:
-            content = message.get("content", "").lower()
-            if any(keyword in content for keyword in reasoning_keywords):
-                return True
-        return False
-    def _format_reasoning_prompt(self, messages: List[Dict[str, str]]) -> str:
-        """
-        为推理任务格式化特殊的提示词
-        """
-        # 添加推理指导的系统消息
-        reasoning_system_msg = {
-            "role": "system",
-            "content": "你是一个专业的数学推理助手。请逐步分析问题，展示详细的推理过程，包括：\n1. 问题理解\n2. 解题思路\n3. 具体步骤\n4. 最终答案\n\n每个步骤都要清晰明了。"
-        }
-        # 将推理系统消息添加到消息列表的开头
-        enhanced_messages = [reasoning_system_msg] + messages
-        # 使用标准格式化方法
-        return self._format_phi4_messages(enhanced_messages)
-    def reasoning_completion(
-        self,
-        messages: List[Dict[str, str]],
-        temperature: float = 0.3,  # 推理任务使用较低的温度
-        max_tokens: int = 2048,   # 推理任务需要更多 tokens
-        top_p: float = 0.9,
-        extract_reasoning_steps: bool = True,
-        **kwargs
-    ) -> Dict[str, Union[str, Dict, List]]:
-        """
-        专门用于推理任务的聊天完成接口
-        Args:
-            messages: 对话消息列表
-            temperature: 采样温度（推理任务建议使用较低值）
-            max_tokens: 最大生成token数量
-            top_p: top-p采样参数
-            extract_reasoning_steps: 是否提取推理步骤
-            **kwargs: 其他参数
-        Returns:
-            包含推理步骤的响应字典
-        """
-        # 强制启用推理模式
-        response = self.create(
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=top_p,
-            enable_reasoning=True,
-            **kwargs
-        )
-        if extract_reasoning_steps:
-            # 提取推理步骤
-            content = response["choices"][0]["message"]["content"]
-            reasoning_steps = self._extract_reasoning_steps(content)
-            response["reasoning_steps"] = reasoning_steps
-        return response
-    def _extract_reasoning_steps(self, content: str) -> List[Dict[str, str]]:
-        """
-        从响应内容中提取推理步骤
-        """
-        steps = []
-        lines = content.split('\n')
-        current_step = {"title": "", "content": ""}
-        step_patterns = [
-            "1. 问题理解", "2. 解题思路", "3. 具体步骤", "4. 最终答案",
-            "步骤", "分析", "解答", "结论", "reasoning", "step", "analysis", "solution"
-        ]
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            # 检查是否是新的步骤开始
-            is_new_step = any(pattern in line.lower() for pattern in step_patterns)
-            if is_new_step and current_step["content"]:
-                steps.append(current_step.copy())
-                current_step = {"title": line, "content": ""}
-            elif is_new_step:
-                current_step["title"] = line
-            else:
-                if current_step["title"]:
-                    current_step["content"] += line + "\n"
-                else:
-                    current_step["content"] = line + "\n"
-        # 添加最后一个步骤
-        if current_step["title"] or current_step["content"]:
-            steps.append(current_step)
-        return steps
-    def get_model_info(self) -> Dict[str, Union[str, bool, int]]:
-        """获取 Phi-4 模型信息"""
-        model_info = super().get_model_info()
-        # 添加Phi-4特有的信息
-        model_info.update({
-            "model_family": "Phi-4-mini-reasoning",
-            "parameters": "3.8B",
-            "context_length": "128K tokens",
-            "specialization": "数学推理优化",
-        })
-        return model_info
-# 工厂函数
-def create_phi4_transformers_client(
-    model_name: str = "microsoft/Phi-4-mini-reasoning",
-    use_4bit_quantization: bool = False,
-    device: Optional[str] = None,
-    **kwargs
-) -> Phi4TransformersChatCompletion:
-    """
-    创建 Phi-4 Transformers 客户端的工厂函数
-    Args:
-        model_name: 模型名称，默认为 microsoft/Phi-4-mini-reasoning
-        use_4bit_quantization: 是否使用4bit量化
-        device: 指定设备 ("cpu", "cuda", "mps", 等)
-        **kwargs: 其他传递给构造函数的参数
-    Returns:
-        Phi4TransformersChatCompletion 实例
-    """
-    return Phi4TransformersChatCompletion(
-        model_name=model_name,
-        use_4bit_quantization=use_4bit_quantization,
-        device=device,
-        **kwargs
-    )
-def create_reasoning_client(
-    model_name: str = "microsoft/Phi-4-mini-reasoning",
-    use_4bit_quantization: bool = False,
-    device: Optional[str] = None,
-    **kwargs
-) -> Phi4TransformersChatCompletion:
-    """
-    创建专门用于推理任务的 Phi-4 客户端
-    Args:
-        model_name: 模型名称，推荐使用 microsoft/Phi-4-mini-reasoning
-        use_4bit_quantization: 是否使用4bit量化
-        device: 指定设备 ("cpu", "cuda", "mps", 等)
-        **kwargs: 其他传递给构造函数的参数
-    Returns:
-        优化了推理功能的 Phi4TransformersChatCompletion 实例
-    """
-    # 确保使用推理模型
-    if "reasoning" not in model_name.lower():
-        print("警告: 建议使用包含 'reasoning' 的模型名称以获得最佳推理性能")
-    return Phi4TransformersChatCompletion(
-        model_name=model_name,
-        use_4bit_quantization=use_4bit_quantization,
-        device=device,
-        **kwargs
-    )

src/podcast_transcribe/llm/llm_router.py CHANGED Viewed

@@ -4,13 +4,13 @@ LLM模型调用路由器
 """
 import logging
 from typing import Dict, Any, Optional, List, Union
 import spaces
 from .llm_base import BaseChatCompletion
 from . import llm_gemma_mlx
 from . import llm_gemma_transfomers
-from . import llm_phi4_transfomers
 # 配置日志
 logger = logging.getLogger("llm")
@@ -39,19 +39,9 @@ class LLMRouter:
                 "default_model": "google/gemma-3-4b-it",
                 "supported_params": [
                     "model_name", "use_4bit_quantization", "device_map",
-                    "device", "trust_remote_code"
                 ],
                 "description": "基于Transformers库的Gemma聊天完成实现"
-            },
-            "phi4-transformers": {
-                "module_path": "llm_phi4_transfomers",
-                "class_name": "Phi4TransformersChatCompletion",
-                "default_model": "microsoft/Phi-4-reasoning",
-                "supported_params": [
-                    "model_name", "use_4bit_quantization", "device_map",
-                    "device", "trust_remote_code", "enable_reasoning"
-                ],
-                "description": "基于Transformers库的Phi-4推理聊天完成实现"
             }
         }
@@ -77,8 +67,6 @@ class LLMRouter:
                 module = llm_gemma_mlx
             elif module_path == "llm_gemma_transfomers":
                 module = llm_gemma_transfomers
-            elif module_path == "llm_phi4_transfomers":
-                module = llm_phi4_transfomers
             else:
                 raise ImportError(f"未找到模块: {module_path}")
@@ -219,6 +207,12 @@ class LLMRouter:
             if model is not None:
                 kwargs["model_name"] = model
             # 获取或创建LLM实例
             llm_instance = self._get_or_create_instance(provider, **kwargs)
@@ -242,7 +236,7 @@ class LLMRouter:
     def reasoning_completion(
         self,
         messages: List[Dict[str, str]],
-        provider: str = "phi4-transformers",
         temperature: float = 0.3,
         max_tokens: int = 2048,
         top_p: float = 0.9,
@@ -255,7 +249,7 @@ class LLMRouter:
         参数:
             messages: 消息列表，每个消息包含role和content
-            provider: LLM提供者名称，默认使用phi4-transformers
             temperature: 温度参数（推理任务建议使用较低值）
             max_tokens: 最大生成token数
             top_p: nucleus采样参数
@@ -269,14 +263,20 @@ class LLMRouter:
         logger.info(f"使用provider '{provider}' 进行推理完成，消息数量: {len(messages)}")
         # 确保使用支持推理的provider
-        if provider not in ["phi4-transformers"]:
-            logger.warning(f"Provider '{provider}' 可能不支持推理功能，建议使用 'phi4-transformers'")
         try:
             # 如果提供了model参数，添加到kwargs中
             if model is not None:
                 kwargs["model_name"] = model
             # 获取或创建LLM实例
             llm_instance = self._get_or_create_instance(provider, **kwargs)
@@ -372,7 +372,7 @@ _router = LLMRouter()
 @spaces.GPU(duration=60)
 def chat_completion(
     messages: List[Dict[str, str]],
-    provider: str = "gemma-mlx",
     temperature: float = 0.7,
     max_tokens: int = 2048,
     top_p: float = 1.0,
@@ -391,7 +391,6 @@ def chat_completion(
         provider: LLM提供者，可选值：
             - "gemma-mlx": 基于MLX库的Gemma聊天完成实现
             - "gemma-transformers": 基于Transformers库的Gemma聊天完成实现
-            - "phi4-transformers": 基于Transformers库的Phi-4推理聊天完成实现
         temperature: 温度参数，控制生成的随机性 (0.0-2.0)
         max_tokens: 最大生成token数
         top_p: nucleus采样参数 (0.0-1.0)
@@ -421,14 +420,6 @@ def chat_completion(
             use_4bit_quantization=True
         )
-        # 使用Phi-4推理实现
-        response = chat_completion(
-            messages=[{"role": "user", "content": "解这个数学题：2x + 5 = 15"}],
-            provider="phi4-transformers",
-            model="microsoft/Phi-4-mini-reasoning",
-            device="cuda"
-        )
         # 自定义参数
         response = chat_completion(
             messages=[
@@ -466,7 +457,7 @@ def chat_completion(
 @spaces.GPU(duration=60)
 def reasoning_completion(
     messages: List[Dict[str, str]],
-    provider: str = "phi4-transformers",
     temperature: float = 0.3,
     max_tokens: int = 2048,
     top_p: float = 0.9,
@@ -483,7 +474,7 @@ def reasoning_completion(
     参数:
         messages: 消息列表，每个消息包含role和content字段
-        provider: LLM提供者，默认使用phi4-transformers
         temperature: 温度参数（推理任务建议使用较低值）
         max_tokens: 最大生成token数
         top_p: nucleus采样参数
@@ -502,14 +493,14 @@ def reasoning_completion(
         # 数学推理任务
         response = reasoning_completion(
             messages=[{"role": "user", "content": "解这个方程：3x + 7 = 22"}],
-            provider="phi4-transformers",
             extract_reasoning_steps=True
         )
         # 逻辑推理任务
         response = reasoning_completion(
             messages=[{"role": "user", "content": "如果所有的猫都是动物，而小花是一只猫，那么小花是什么？"}],
-            provider="phi4-transformers",
             temperature=0.2
         )
     """

 """
 import logging
+import torch
 from typing import Dict, Any, Optional, List, Union
 import spaces
 from .llm_base import BaseChatCompletion
 from . import llm_gemma_mlx
 from . import llm_gemma_transfomers
 # 配置日志
 logger = logging.getLogger("llm")
                 "default_model": "google/gemma-3-4b-it",
                 "supported_params": [
                     "model_name", "use_4bit_quantization", "device_map",
+                    "device", "trust_remote_code", "torch_dtype"
                 ],
                 "description": "基于Transformers库的Gemma聊天完成实现"
             }
         }
                 module = llm_gemma_mlx
             elif module_path == "llm_gemma_transfomers":
                 module = llm_gemma_transfomers
             else:
                 raise ImportError(f"未找到模块: {module_path}")
             if model is not None:
                 kwargs["model_name"] = model
+            # 如果设备是 mps，并且是 transformers provider，则强制使用 float32
+            current_device = kwargs.get("device")
+            if current_device == "mps":
+                if provider == "gemma-transformers":
+                    kwargs["torch_dtype"] = torch.float32
             # 获取或创建LLM实例
             llm_instance = self._get_or_create_instance(provider, **kwargs)
     def reasoning_completion(
         self,
         messages: List[Dict[str, str]],
+        provider: str = "gemma-transformers",
         temperature: float = 0.3,
         max_tokens: int = 2048,
         top_p: float = 0.9,
         参数:
             messages: 消息列表，每个消息包含role和content
+            provider: LLM提供者名称，默认使用gemma-transformers
             temperature: 温度参数（推理任务建议使用较低值）
             max_tokens: 最大生成token数
             top_p: nucleus采样参数
         logger.info(f"使用provider '{provider}' 进行推理完成，消息数量: {len(messages)}")
         # 确保使用支持推理的provider
+        if provider not in ["gemma-transformers"]:
+            logger.warning(f"Provider '{provider}' 可能不支持推理功能，建议使用 'gemma-transformers'")
         try:
             # 如果提供了model参数，添加到kwargs中
             if model is not None:
                 kwargs["model_name"] = model
+            # 如果设备是 mps，并且是 transformers provider，则强制使用 float32
+            current_device = kwargs.get("device")
+            if current_device == "mps":
+                if provider == "gemma-transformers":
+                    kwargs["torch_dtype"] = torch.float32
             # 获取或创建LLM实例
             llm_instance = self._get_or_create_instance(provider, **kwargs)
 @spaces.GPU(duration=60)
 def chat_completion(
     messages: List[Dict[str, str]],
+    provider: str = "gemma-transformers",
     temperature: float = 0.7,
     max_tokens: int = 2048,
     top_p: float = 1.0,
         provider: LLM提供者，可选值：
             - "gemma-mlx": 基于MLX库的Gemma聊天完成实现
             - "gemma-transformers": 基于Transformers库的Gemma聊天完成实现
         temperature: 温度参数，控制生成的随机性 (0.0-2.0)
         max_tokens: 最大生成token数
         top_p: nucleus采样参数 (0.0-1.0)
             use_4bit_quantization=True
         )
         # 自定义参数
         response = chat_completion(
             messages=[
 @spaces.GPU(duration=60)
 def reasoning_completion(
     messages: List[Dict[str, str]],
+    provider: str = "gemma-transformers",
     temperature: float = 0.3,
     max_tokens: int = 2048,
     top_p: float = 0.9,
     参数:
         messages: 消息列表，每个消息包含role和content字段
+        provider: LLM提供者，默认使用gemma-transformers
         temperature: 温度参数（推理任务建议使用较低值）
         max_tokens: 最大生成token数
         top_p: nucleus采样参数
         # 数学推理任务
         response = reasoning_completion(
             messages=[{"role": "user", "content": "解这个方程：3x + 7 = 22"}],
+            provider="gemma-transformers",
             extract_reasoning_steps=True
         )
         # 逻辑推理任务
         response = reasoning_completion(
             messages=[{"role": "user", "content": "如果所有的猫都是动物，而小花是一只猫，那么小花是什么？"}],
+            provider="gemma-transformers",
             temperature=0.2
         )
     """

src/podcast_transcribe/summary/speaker_identify.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import List, Dict, Optional
 import json
 import re
@@ -5,22 +6,26 @@ import re
 from ..schemas import EnhancedSegment, PodcastChannel, PodcastEpisode
 from ..llm import llm_router
 class SpeakerIdentifier:
     """
     说话人识别器类，用于根据转录分段和播客元数据识别说话人的真实姓名或昵称
     """
-    def __init__(self, llm_model_name: str, llm_provider: str):
         """
         初始化说话人识别器
         参数:
             llm_model_name: LLM模型名称，如果为None则使用默认模型
             llm_provider: LLM提供者，默认为"gemma-mlx"
         """
         self.llm_model_name = llm_model_name
         self.llm_provider = llm_provider
     def _clean_html(self, html_string: Optional[str]) -> str:
         """
@@ -280,8 +285,10 @@ Please begin your analysis and provide the JSON result.
                 provider=self.llm_provider,
                 model=self.llm_model_name,
                 temperature=0.1,
-                max_tokens=1024
             )
             assistant_response_content = response["choices"][0]["message"]["content"]
             parsed_llm_output = None

+import logging
 from typing import List, Dict, Optional
 import json
 import re
 from ..schemas import EnhancedSegment, PodcastChannel, PodcastEpisode
 from ..llm import llm_router
+# 配置日志
+logger = logging.getLogger("speaker_identify")
 class SpeakerIdentifier:
     """
     说话人识别器类，用于根据转录分段和播客元数据识别说话人的真实姓名或昵称
     """
+    def __init__(self, llm_model_name: str, llm_provider: str, device: Optional[str] = None):
         """
         初始化说话人识别器
         参数:
             llm_model_name: LLM模型名称，如果为None则使用默认模型
             llm_provider: LLM提供者，默认为"gemma-mlx"
+            device: 计算设备，例如 "cpu", "cuda", "mps"
         """
         self.llm_model_name = llm_model_name
         self.llm_provider = llm_provider
+        self.device = device
     def _clean_html(self, html_string: Optional[str]) -> str:
         """
                 provider=self.llm_provider,
                 model=self.llm_model_name,
                 temperature=0.1,
+                max_tokens=1024,
+                device=self.device
             )
+            logger.info(f"LLM调用日志，请求参数:【{messages}】, 响应: 【{response}】")
             assistant_response_content = response["choices"][0]["message"]["content"]
             parsed_llm_output = None

src/podcast_transcribe/transcriber.py CHANGED Viewed

@@ -29,8 +29,8 @@ class CombinedTranscriber:
         asr_provider: str,
         diarization_provider: str,
         diarization_model_name: str,
-        llm_model_name: Optional[str] = None,
-        llm_provider: Optional[str] = None,
         device: Optional[str] = None,
         segmentation_batch_size: int = 64,
         parallel: bool = False,
@@ -43,6 +43,8 @@ class CombinedTranscriber:
             asr_provider: ASR提供者名称
             diarization_provider: 说话人分离提供者名称
             diarization_model_name: 说话人分离模型名称
             device: 推理设备，'cpu'或'cuda'
             segmentation_batch_size: 分割批处理大小，默认为64
             parallel: 是否并行执行ASR和说话人分离，默认为False
@@ -51,23 +53,10 @@ class CombinedTranscriber:
             import torch
             if torch.backends.mps.is_available():
                 device = "mps"
-                if not llm_model_name:
-                    llm_model_name = "mlx-community/gemma-3-12b-it-4bit-DWQ"
-                if not llm_provider:
-                    llm_provider = "gemma-mlx"
             elif torch.cuda.is_available():
                 device = "cuda"
-                if not llm_model_name:
-                    llm_model_name = "google/gemma-3-4b-it"
-                if not llm_provider:
-                    llm_provider = "gemma-transformers"
             else:
                 device = "cpu"
-                if not llm_model_name:
-                    llm_model_name = "google/gemma-3-4b-it"
-                if not llm_provider:
-                    llm_provider = "gemma-transformers"
         self.asr_model_name = asr_model_name
         self.asr_provider = asr_provider
@@ -79,7 +68,8 @@ class CombinedTranscriber:
         self.speaker_identifier = SpeakerIdentifier(
             llm_model_name=llm_model_name,
-            llm_provider=llm_provider
         )
         logger.info(f"初始化组合转录器，ASR提供者: {asr_provider}，ASR模型: {asr_model_name}，分离提供者: {diarization_provider}，分离模型: {diarization_model_name}，分割批处理大小: {segmentation_batch_size}，并行执行: {parallel}，推理设备: {device}")
@@ -513,6 +503,8 @@ def transcribe_audio(
         asr_provider=asr_provider,
         diarization_model_name=diarization_model_name,
         diarization_provider=diarization_provider,
         device=device,
         segmentation_batch_size=segmentation_batch_size,
         parallel=parallel
@@ -529,8 +521,8 @@ def transcribe_podcast_audio(
     asr_provider: str = "distil_whisper_transformers",
     diarization_model_name: str = "pyannote/speaker-diarization-3.1",
     diarization_provider: str = "pyannote_transformers",
-    llm_model_name: Optional[str] = None,
-    llm_provider: Optional[str] = None,
     device: Optional[str] = None,
     segmentation_batch_size: int = 64,
     parallel: bool = False,
@@ -546,8 +538,8 @@ def transcribe_podcast_audio(
         asr_provider: ASR提供者名称
         diarization_provider: 说话人分离提供者名称
         diarization_model_name: 说话人分离模型名称
-        llm_model_name: LLM模型名称，如果为None则无法识别说话人名称
-        llm_provider: LLM提供者名称，如果为None则无法识别说话人名称
         device: 推理设备，'cpu'或'cuda'
         segmentation_batch_size: 分割批处理大小，默认为64
         parallel: 是否并行执行ASR和说话人分离，默认为False

         asr_provider: str,
         diarization_provider: str,
         diarization_model_name: str,
+        llm_model_name: str,
+        llm_provider: str,
         device: Optional[str] = None,
         segmentation_batch_size: int = 64,
         parallel: bool = False,
             asr_provider: ASR提供者名称
             diarization_provider: 说话人分离提供者名称
             diarization_model_name: 说话人分离模型名称
+            llm_model_name: LLM模型名称
+            llm_provider: LLM提供者名称
             device: 推理设备，'cpu'或'cuda'
             segmentation_batch_size: 分割批处理大小，默认为64
             parallel: 是否并行执行ASR和说话人分离，默认为False
             import torch
             if torch.backends.mps.is_available():
                 device = "mps"
             elif torch.cuda.is_available():
                 device = "cuda"
             else:
                 device = "cpu"
         self.asr_model_name = asr_model_name
         self.asr_provider = asr_provider
         self.speaker_identifier = SpeakerIdentifier(
             llm_model_name=llm_model_name,
+            llm_provider=llm_provider,
+            device=device
         )
         logger.info(f"初始化组合转录器，ASR提供者: {asr_provider}，ASR模型: {asr_model_name}，分离提供者: {diarization_provider}，分离模型: {diarization_model_name}，分割批处理大小: {segmentation_batch_size}，并行执行: {parallel}，推理设备: {device}")
         asr_provider=asr_provider,
         diarization_model_name=diarization_model_name,
         diarization_provider=diarization_provider,
+        llm_model_name="",
+        llm_provider="",
         device=device,
         segmentation_batch_size=segmentation_batch_size,
         parallel=parallel
     asr_provider: str = "distil_whisper_transformers",
     diarization_model_name: str = "pyannote/speaker-diarization-3.1",
     diarization_provider: str = "pyannote_transformers",
+    llm_model_name: str = "google/gemma-3-4b-it",
+    llm_provider: str = "gemma-transformers",
     device: Optional[str] = None,
     segmentation_batch_size: int = 64,
     parallel: bool = False,
         asr_provider: ASR提供者名称
         diarization_provider: 说话人分离提供者名称
         diarization_model_name: 说话人分离模型名称
+        llm_model_name: LLM模型名称
+        llm_provider: LLM提供者名称
         device: 推理设备，'cpu'或'cuda'
         segmentation_batch_size: 分割批处理大小，默认为64
         parallel: 是否并行执行ASR和说话人分离，默认为False