132 lines
4.2 KiB
Python
132 lines
4.2 KiB
Python
"""
|
|
Piper TTS 服务实现 - 本地高效文字转语音
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Generator, Optional, Tuple
|
|
import numpy as np
|
|
from loguru import logger
|
|
|
|
from app.services.tts_service import TTSServiceInterface, TTSAudioChunk
|
|
from app.config import settings
|
|
|
|
|
|
class PiperTTSService(TTSServiceInterface):
|
|
"""Piper TTS 服务实现"""
|
|
|
|
def __init__(self):
|
|
self._voice = None
|
|
self._initialized = False
|
|
self._model_path: Optional[Path] = None
|
|
self._config_path: Optional[Path] = None
|
|
|
|
async def initialize(self) -> bool:
|
|
"""加载 Piper 模型"""
|
|
try:
|
|
from piper import PiperVoice
|
|
|
|
# 查找模型文件
|
|
model_dir = Path(settings.TTS_MODEL_DIR)
|
|
voice_name = settings.TTS_VOICE_NAME
|
|
|
|
# Piper 模型文件命名: zh_CN-huayan-medium.onnx + .json
|
|
onnx_file = model_dir / f"{voice_name}.onnx"
|
|
json_file = model_dir / f"{voice_name}.onnx.json"
|
|
|
|
if not onnx_file.exists():
|
|
logger.error(f"Piper 模型文件不存在: {onnx_file}")
|
|
logger.info(f"请下载 Piper 中文模型到 {model_dir}")
|
|
logger.info(f"下载命令: python -m piper.download_voice zh_CN-huayan-medium")
|
|
return False
|
|
|
|
self._model_path = onnx_file
|
|
self._config_path = json_file
|
|
|
|
# 加载模型
|
|
logger.info(f"正在加载 Piper 模型: {voice_name}")
|
|
self._voice = PiperVoice.load(str(onnx_file))
|
|
|
|
logger.info(
|
|
f"Piper TTS 初始化成功: {voice_name}, "
|
|
f"采样率={settings.TTS_SAMPLE_RATE}Hz"
|
|
)
|
|
self._initialized = True
|
|
return True
|
|
|
|
except ImportError:
|
|
logger.error("piper-tts 未安装,请运行: pip install piper-tts")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Piper TTS 初始化失败: {e}")
|
|
return False
|
|
|
|
def synthesize(
|
|
self,
|
|
text: str,
|
|
sample_rate: int = 24000,
|
|
) -> Generator[np.ndarray, None, None]:
|
|
"""
|
|
流式合成语音
|
|
|
|
Args:
|
|
text: 要合成的文本
|
|
sample_rate: 目标采样率
|
|
|
|
Yields:
|
|
音频块 (numpy array, int16)
|
|
"""
|
|
if not self._initialized or self._voice is None:
|
|
raise RuntimeError("TTS 服务未初始化")
|
|
|
|
try:
|
|
# 勿 list() 化全句:保持生成器迭代以便尽快 yield 首块
|
|
for chunk_data in self._voice.synthesize(text):
|
|
yield chunk_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"TTS 合成失败: text='{text[:50]}...', 错误={e}")
|
|
raise
|
|
|
|
def synthesize_complete(
|
|
self,
|
|
text: str,
|
|
sample_rate: int = 24000,
|
|
) -> Tuple[np.ndarray, int]:
|
|
"""
|
|
完整合成语音(一次性返回全部音频)
|
|
|
|
Args:
|
|
text: 要合成的文本
|
|
sample_rate: 目标采样率
|
|
|
|
Returns:
|
|
(音频数据, 采样率)
|
|
"""
|
|
if not self._initialized or self._voice is None:
|
|
raise RuntimeError("TTS 服务未初始化")
|
|
|
|
try:
|
|
# 合成完整音频
|
|
audio_parts = []
|
|
for chunk_data in self._voice.synthesize(text):
|
|
audio_parts.append(chunk_data)
|
|
|
|
# 拼接所有块
|
|
if audio_parts:
|
|
full_audio = np.concatenate(audio_parts)
|
|
return full_audio, sample_rate
|
|
else:
|
|
return np.array([], dtype=np.int16), sample_rate
|
|
|
|
except Exception as e:
|
|
logger.error(f"TTS 完整合成失败: text='{text[:50]}...', 错误={e}")
|
|
raise
|
|
|
|
async def shutdown(self):
|
|
"""关闭服务"""
|
|
self._voice = None
|
|
self._initialized = False
|
|
logger.info("Piper TTS 服务已关闭")
|