2026-04-14 10:08:41 +08:00

132 lines
4.2 KiB
Python

"""
Piper TTS 服务实现 - 本地高效文字转语音
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Generator, Optional, Tuple
import numpy as np
from loguru import logger
from app.services.tts_service import TTSServiceInterface, TTSAudioChunk
from app.config import settings
class PiperTTSService(TTSServiceInterface):
"""Piper TTS 服务实现"""
def __init__(self):
self._voice = None
self._initialized = False
self._model_path: Optional[Path] = None
self._config_path: Optional[Path] = None
async def initialize(self) -> bool:
"""加载 Piper 模型"""
try:
from piper import PiperVoice
# 查找模型文件
model_dir = Path(settings.TTS_MODEL_DIR)
voice_name = settings.TTS_VOICE_NAME
# Piper 模型文件命名: zh_CN-huayan-medium.onnx + .json
onnx_file = model_dir / f"{voice_name}.onnx"
json_file = model_dir / f"{voice_name}.onnx.json"
if not onnx_file.exists():
logger.error(f"Piper 模型文件不存在: {onnx_file}")
logger.info(f"请下载 Piper 中文模型到 {model_dir}")
logger.info(f"下载命令: python -m piper.download_voice zh_CN-huayan-medium")
return False
self._model_path = onnx_file
self._config_path = json_file
# 加载模型
logger.info(f"正在加载 Piper 模型: {voice_name}")
self._voice = PiperVoice.load(str(onnx_file))
logger.info(
f"Piper TTS 初始化成功: {voice_name}, "
f"采样率={settings.TTS_SAMPLE_RATE}Hz"
)
self._initialized = True
return True
except ImportError:
logger.error("piper-tts 未安装,请运行: pip install piper-tts")
return False
except Exception as e:
logger.error(f"Piper TTS 初始化失败: {e}")
return False
def synthesize(
self,
text: str,
sample_rate: int = 24000,
) -> Generator[np.ndarray, None, None]:
"""
流式合成语音
Args:
text: 要合成的文本
sample_rate: 目标采样率
Yields:
音频块 (numpy array, int16)
"""
if not self._initialized or self._voice is None:
raise RuntimeError("TTS 服务未初始化")
try:
# 勿 list() 化全句:保持生成器迭代以便尽快 yield 首块
for chunk_data in self._voice.synthesize(text):
yield chunk_data
except Exception as e:
logger.error(f"TTS 合成失败: text='{text[:50]}...', 错误={e}")
raise
def synthesize_complete(
self,
text: str,
sample_rate: int = 24000,
) -> Tuple[np.ndarray, int]:
"""
完整合成语音(一次性返回全部音频)
Args:
text: 要合成的文本
sample_rate: 目标采样率
Returns:
(音频数据, 采样率)
"""
if not self._initialized or self._voice is None:
raise RuntimeError("TTS 服务未初始化")
try:
# 合成完整音频
audio_parts = []
for chunk_data in self._voice.synthesize(text):
audio_parts.append(chunk_data)
# 拼接所有块
if audio_parts:
full_audio = np.concatenate(audio_parts)
return full_audio, sample_rate
else:
return np.array([], dtype=np.int16), sample_rate
except Exception as e:
logger.error(f"TTS 完整合成失败: text='{text[:50]}...', 错误={e}")
raise
async def shutdown(self):
"""关闭服务"""
self._voice = None
self._initialized = False
logger.info("Piper TTS 服务已关闭")