""" Piper TTS 服务实现 - 本地高效文字转语音 """ from __future__ import annotations import os from pathlib import Path from typing import Generator, Optional, Tuple import numpy as np from loguru import logger from app.services.tts_service import TTSServiceInterface, TTSAudioChunk from app.config import settings class PiperTTSService(TTSServiceInterface): """Piper TTS 服务实现""" def __init__(self): self._voice = None self._initialized = False self._model_path: Optional[Path] = None self._config_path: Optional[Path] = None async def initialize(self) -> bool: """加载 Piper 模型""" try: from piper import PiperVoice # 查找模型文件 model_dir = Path(settings.TTS_MODEL_DIR) voice_name = settings.TTS_VOICE_NAME # Piper 模型文件命名: zh_CN-huayan-medium.onnx + .json onnx_file = model_dir / f"{voice_name}.onnx" json_file = model_dir / f"{voice_name}.onnx.json" if not onnx_file.exists(): logger.error(f"Piper 模型文件不存在: {onnx_file}") logger.info(f"请下载 Piper 中文模型到 {model_dir}") logger.info(f"下载命令: python -m piper.download_voice zh_CN-huayan-medium") return False self._model_path = onnx_file self._config_path = json_file # 加载模型 logger.info(f"正在加载 Piper 模型: {voice_name}") self._voice = PiperVoice.load(str(onnx_file)) logger.info( f"Piper TTS 初始化成功: {voice_name}, " f"采样率={settings.TTS_SAMPLE_RATE}Hz" ) self._initialized = True return True except ImportError: logger.error("piper-tts 未安装,请运行: pip install piper-tts") return False except Exception as e: logger.error(f"Piper TTS 初始化失败: {e}") return False def synthesize( self, text: str, sample_rate: int = 24000, ) -> Generator[np.ndarray, None, None]: """ 流式合成语音 Args: text: 要合成的文本 sample_rate: 目标采样率 Yields: 音频块 (numpy array, int16) """ if not self._initialized or self._voice is None: raise RuntimeError("TTS 服务未初始化") try: # 勿 list() 化全句:保持生成器迭代以便尽快 yield 首块 for chunk_data in self._voice.synthesize(text): yield chunk_data except Exception as e: logger.error(f"TTS 合成失败: text='{text[:50]}...', 错误={e}") raise def synthesize_complete( self, text: str, sample_rate: int = 24000, ) -> Tuple[np.ndarray, int]: """ 完整合成语音(一次性返回全部音频) Args: text: 要合成的文本 sample_rate: 目标采样率 Returns: (音频数据, 采样率) """ if not self._initialized or self._voice is None: raise RuntimeError("TTS 服务未初始化") try: # 合成完整音频 audio_parts = [] for chunk_data in self._voice.synthesize(text): audio_parts.append(chunk_data) # 拼接所有块 if audio_parts: full_audio = np.concatenate(audio_parts) return full_audio, sample_rate else: return np.array([], dtype=np.int16), sample_rate except Exception as e: logger.error(f"TTS 完整合成失败: text='{text[:50]}...', 错误={e}") raise async def shutdown(self): """关闭服务""" self._voice = None self._initialized = False logger.info("Piper TTS 服务已关闭")