""" 服务层接口定义 - TTS 服务 """ from __future__ import annotations from abc import ABC, abstractmethod from typing import Generator, Optional, Tuple import numpy as np from loguru import logger class TTSServiceInterface(ABC): """TTS 服务接口 - 所有 TTS 提供者需实现此接口""" @abstractmethod def synthesize( self, text: str, sample_rate: int = 24000, ) -> Generator[np.ndarray, None, None]: """ 流式合成语音 Args: text: 要合成的文本 sample_rate: 目标采样率 Yields: 音频块 (numpy array, int16) """ pass @abstractmethod async def initialize(self) -> bool: """ 初始化服务(加载模型等) Returns: 是否成功 """ pass @abstractmethod async def shutdown(self): """关闭服务,释放资源""" pass class TTSAudioChunk: """TTS 音频块""" def __init__( self, data: bytes, turn_id: str = "", codec: str = "pcm_s16le", sample_rate: int = 24000, seq: int = 0, is_final: bool = False, ): self.data = data self.turn_id = turn_id self.codec = codec self.sample_rate = sample_rate self.seq = seq self.is_final = is_final def to_metadata_dict(self) -> dict: """转换为协议元数据字典""" return { "type": "tts_audio_chunk", "proto_version": "1.0", "transport_profile": "text_uplink", "turn_id": self.turn_id, "seq": self.seq, "codec": self.codec, "sample_rate_hz": self.sample_rate, "is_final": self.is_final, }