2026-04-14 10:08:41 +08:00

79 lines
1.8 KiB
Python

"""
服务层接口定义 - TTS 服务
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Generator, Optional, Tuple
import numpy as np
from loguru import logger
class TTSServiceInterface(ABC):
"""TTS 服务接口 - 所有 TTS 提供者需实现此接口"""
@abstractmethod
def synthesize(
self,
text: str,
sample_rate: int = 24000,
) -> Generator[np.ndarray, None, None]:
"""
流式合成语音
Args:
text: 要合成的文本
sample_rate: 目标采样率
Yields:
音频块 (numpy array, int16)
"""
pass
@abstractmethod
async def initialize(self) -> bool:
"""
初始化服务(加载模型等)
Returns:
是否成功
"""
pass
@abstractmethod
async def shutdown(self):
"""关闭服务,释放资源"""
pass
class TTSAudioChunk:
"""TTS 音频块"""
def __init__(
self,
data: bytes,
turn_id: str = "",
codec: str = "pcm_s16le",
sample_rate: int = 24000,
seq: int = 0,
is_final: bool = False,
):
self.data = data
self.turn_id = turn_id
self.codec = codec
self.sample_rate = sample_rate
self.seq = seq
self.is_final = is_final
def to_metadata_dict(self) -> dict:
"""转换为协议元数据字典"""
return {
"type": "tts_audio_chunk",
"proto_version": "1.0",
"transport_profile": "text_uplink",
"turn_id": self.turn_id,
"seq": self.seq,
"codec": self.codec,
"sample_rate_hz": self.sample_rate,
"is_final": self.is_final,
}