79 lines
1.8 KiB
Python
79 lines
1.8 KiB
Python
"""
|
|
服务层接口定义 - TTS 服务
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Generator, Optional, Tuple
|
|
import numpy as np
|
|
from loguru import logger
|
|
|
|
|
|
class TTSServiceInterface(ABC):
|
|
"""TTS 服务接口 - 所有 TTS 提供者需实现此接口"""
|
|
|
|
@abstractmethod
|
|
def synthesize(
|
|
self,
|
|
text: str,
|
|
sample_rate: int = 24000,
|
|
) -> Generator[np.ndarray, None, None]:
|
|
"""
|
|
流式合成语音
|
|
|
|
Args:
|
|
text: 要合成的文本
|
|
sample_rate: 目标采样率
|
|
|
|
Yields:
|
|
音频块 (numpy array, int16)
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def initialize(self) -> bool:
|
|
"""
|
|
初始化服务(加载模型等)
|
|
|
|
Returns:
|
|
是否成功
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def shutdown(self):
|
|
"""关闭服务,释放资源"""
|
|
pass
|
|
|
|
|
|
class TTSAudioChunk:
|
|
"""TTS 音频块"""
|
|
def __init__(
|
|
self,
|
|
data: bytes,
|
|
turn_id: str = "",
|
|
codec: str = "pcm_s16le",
|
|
sample_rate: int = 24000,
|
|
seq: int = 0,
|
|
is_final: bool = False,
|
|
):
|
|
self.data = data
|
|
self.turn_id = turn_id
|
|
self.codec = codec
|
|
self.sample_rate = sample_rate
|
|
self.seq = seq
|
|
self.is_final = is_final
|
|
|
|
def to_metadata_dict(self) -> dict:
|
|
"""转换为协议元数据字典"""
|
|
return {
|
|
"type": "tts_audio_chunk",
|
|
"proto_version": "1.0",
|
|
"transport_profile": "text_uplink",
|
|
"turn_id": self.turn_id,
|
|
"seq": self.seq,
|
|
"codec": self.codec,
|
|
"sample_rate_hz": self.sample_rate,
|
|
"is_final": self.is_final,
|
|
}
|