""" 协议模型定义 - Cloud Voice Protocol v1.0 (text_uplink) 所有消息都遵循协议文档 F 节规范 """ from __future__ import annotations from pydantic import BaseModel, Field, ConfigDict, model_validator from typing import Optional, List, Dict, Any import uuid from app.config import settings from enum import Enum from datetime import datetime # ==================== 协议常量 ==================== PROTO_VERSION = "1.0" TRANSPORT_PROFILE = "text_uplink" PCM_ASR_TRANSPORT_PROFILE = "pcm_asr_uplink" DIALOG_RESULT_PROTOCOL_V1 = "cloud_voice_dialog_v1" # ==================== 枚举定义 ==================== class RoutingType(str, Enum): """路由类型""" FLIGHT_INTENT = "flight_intent" CHITCHAT = "chitchat" ERROR = "error" class FlightActionType(str, Enum): """飞控动作类型(与 FLIGHT_INTENT_SCHEMA v1 §3 一致)""" TAKEOFF = "takeoff" LAND = "land" RETURN_HOME = "return_home" HOVER = "hover" HOLD = "hold" GOTO = "goto" WAIT = "wait" class SourceType(str, Enum): """文本来源类型""" DEVICE_STT = "device_stt" DEBUG_KEYBOARD = "debug_keyboard" TEXT_ONLY = "text_only" CLOUD_FUN_ASR = "cloud_fun_asr" class ErrorCode(str, Enum): """错误码枚举""" UNAUTHORIZED = "UNAUTHORIZED" INVALID_MESSAGE = "INVALID_MESSAGE" LLM_FAILED = "LLM_FAILED" LLM_TIMEOUT = "LLM_TIMEOUT" TTS_FAILED = "TTS_FAILED" ASR_FAILED = "ASR_FAILED" RATE_LIMIT = "RATE_LIMIT" INTERNAL = "INTERNAL" # ==================== 客户端 -> 服务端消息 ==================== class ClientCapabilities(BaseModel): """客户端能力""" playback_sample_rate_hz: int = 24000 prefer_tts_codec: str = "pcm_s16le" class ClientProtocolDecl(BaseModel): """session.start.client.protocol — 见 CLOUD_VOICE_DIALOG_v1.md §8""" model_config = ConfigDict(extra="forbid") dialog_result: Optional[str] = None class Px4VehicleProfile(BaseModel): """ 机载 PX4 上下文:由客户端在 session.start 上报,供云端 LLM 按真实机型消歧义。 均可选;未提供时模型按 unknown 处理并在 summary 中保守说明假设。 """ # 与 PX4 airframe / MAV_TYPE 对齐的粗分类(客户端可二选一或同时给) vehicle_class: str = Field( default="unknown", description=( "multicopter | fixed_wing | vtol_standard | vtol_tailsitter | " "rover | boat | submarine | other | unknown" ), ) mav_type: Optional[int] = Field( default=None, description="MAVLink HEARTBEAT.mav_type(0–255),与 ArduPilot 共用枚举值", ) px4_version: Optional[str] = Field( default=None, description="PX4 固件版本,如 1.14.3" ) airframe_id: Optional[str] = Field( default=None, description="PX4 机型 id / QGC 显示的 airframe 名,便于日志" ) # 控制与任务能力(机端已知时建议带上) default_setpoint_frame: str = Field( default="local_ned", description="默认相对位移 frame:local_ned | body_ned 等", ) offboard_capable: bool = Field( default=False, description="Companion 能否稳定进入并维持 Offboard" ) mission_capable: bool = Field(default=True, description="是否可执行 Mission / 航点任务") rtl_available: bool = Field(default=True, description="RTL / 返航是否可用(含 Home 参数已配置)") home_position_valid: bool = Field( default=False, description="Home 是否已记入飞控(影响 RTL 语义)" ) # 当前 NAV_STATE / 用户可读模式名(MAVROS / px4_msgs 可映射字符串) current_nav_state: Optional[str] = Field( default=None, description="如 POSITION_MODE、OFFBOARD、MISSION、AUTO.LAND" ) # 运行包线(口语里的「高一点」可结合默认高度尺度;单位_meter) cruise_alt_m_agl: Optional[float] = Field( default=None, description="典型巡航相对高度_m,缺省时仅靠用户口述" ) extras: Dict[str, Any] = Field( default_factory=dict, description="扩展键值(如 estimator、GPS 状态),原样进入 LLM 上下文", ) class ClientInfo(BaseModel): """客户端信息""" device_id: str locale: str = "zh-CN" capabilities: ClientCapabilities = Field(default_factory=ClientCapabilities) px4: Optional[Px4VehicleProfile] = Field( default=None, description="PX4 载具与能力上下文;强烈建议填写 vehicle_class" ) protocol: Optional[ClientProtocolDecl] = Field( default=None, description='声明 dialog_result 形状;dialog_result=="cloud_voice_dialog_v1" 启用 §3', ) class SessionStartMessage(BaseModel): """session.start - 客户端发起会话""" type: str = "session.start" proto_version: str = PROTO_VERSION transport_profile: str # text_uplink | pcm_asr_uplink session_id: str auth_token: Optional[str] = None client: ClientInfo class TurnTextMessage(BaseModel): """turn.text - 客户端发送文本""" type: str = "turn.text" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE # 兼容旧机端;可与 session 不一致时以 session 为准 turn_id: str text: str is_final: bool = True source: SourceType = SourceType.DEVICE_STT class TurnAudioStartMessage(BaseModel): """turn.audio.start - 开始一轮云端 ASR 上行(pcm_asr_uplink)""" type: str = "turn.audio.start" proto_version: str = PROTO_VERSION transport_profile: str = PCM_ASR_TRANSPORT_PROFILE turn_id: str sample_rate_hz: int = Field(default=16000, description="须与 Fun-ASR 约定一致,默认 16000") format: str = Field(default="pcm_s16le", description="小端 mono int16") class TurnAudioChunkMessage(BaseModel): """turn.audio.chunk - PCM 分片(base64)""" type: str = "turn.audio.chunk" proto_version: str = PROTO_VERSION transport_profile: str = PCM_ASR_TRANSPORT_PROFILE turn_id: str pcm_base64: str = Field(..., description="raw PCM s16le 字节序列的 base64") class TurnAudioEndMessage(BaseModel): """turn.audio.end - 本轮麦克风推流结束,触发识别收尾并入队 LLM""" type: str = "turn.audio.end" proto_version: str = PROTO_VERSION transport_profile: str = PCM_ASR_TRANSPORT_PROFILE turn_id: str class TtsSynthesizeMessage(BaseModel): """tts.synthesize - 仅 TTS 播报,无 LLM / dialog_result / 历史""" type: str = "tts.synthesize" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: str text: str class SessionEndMessage(BaseModel): """session.end - 客户端结束会话""" type: str = "session.end" proto_version: str = PROTO_VERSION session_id: str # ==================== 服务端 -> 客户端消息 ==================== class ServerCapabilities(BaseModel): """服务端能力""" accepts_audio_uplink: bool = False llm: bool = True tts_codecs: List[str] = ["pcm_s16le"] llm_context_turns: int = 4 accepts_px4_vehicle_profile: bool = True class SessionReadyMessage(BaseModel): """session.ready - 服务端确认会话""" type: str = "session.ready" proto_version: str = PROTO_VERSION transport_profile: str # 与 session.start 一致 session_id: str server_caps: ServerCapabilities = Field(default_factory=ServerCapabilities) class FlightIntentAction(BaseModel): """飞控动作 — 每项仅允许 type + args(与 Schema v1 一致)""" model_config = ConfigDict(extra="forbid") type: str args: Dict[str, Any] = Field(default_factory=dict) class FlightIntentPayload(BaseModel): """飞控意图载荷 — 对齐 FLIGHT_INTENT_SCHEMA_v1.md""" model_config = ConfigDict(extra="forbid") is_flight_intent: bool = True version: int = 1 actions: List[FlightIntentAction] summary: str trace_id: Optional[str] = Field(default=None, max_length=128) class UserInput(BaseModel): """用户输入信息""" text: str language: str = "zh" is_final: bool = True source: str = "device_stt" class TTSHint(BaseModel): """TTS 提示""" speak_summary_or_reply: bool = True voice_id: str = "default" class FlightConfirmSpec(BaseModel): """routing=flight_intent 时必填 — CLOUD_VOICE_DIALOG_v1.md §3.4""" model_config = ConfigDict(extra="forbid") required: bool = Field(description="true:首轮禁止执行,需确认窗") timeout_sec: float = Field(ge=1, le=600) confirm_phrases: List[str] = Field(min_length=1) cancel_phrases: List[str] = Field(min_length=1) pending_id: str = Field(min_length=1) summary_for_user: Optional[str] = None class DialogResultCloudV1(BaseModel): """dialog_result(cloud_voice_dialog_v1)— CLOUD_VOICE_DIALOG_v1.md""" model_config = ConfigDict(extra="forbid") type: str = "dialog_result" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: str protocol: str = DIALOG_RESULT_PROTOCOL_V1 user_input: str routing: RoutingType flight_intent: Optional[FlightIntentPayload] = None confirm: Optional[FlightConfirmSpec] = None chat_reply: Optional[str] = None @model_validator(mode="after") def _routing_shape(self): if self.routing == RoutingType.CHITCHAT: if self.flight_intent is not None or self.confirm is not None: raise ValueError("chitchat 不得携带 flight_intent / confirm") elif self.routing == RoutingType.FLIGHT_INTENT: if self.flight_intent is None or self.confirm is None: raise ValueError("flight_intent 路由必须同时携带 flight_intent 与 confirm") else: raise ValueError("DialogResultCloudV1 仅支持 chitchat / flight_intent") return self class DialogResultMessage(BaseModel): """dialog_result(兼容旧机端:无 protocol / confirm,user_input 为对象)""" type: str = "dialog_result" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: str user_input: UserInput routing: RoutingType flight_intent: Optional[FlightIntentPayload] = None chat_reply: Optional[str] = None tts_hint: TTSHint = Field(default_factory=TTSHint) class TTSAudioChunkMessage(BaseModel): """tts_audio_chunk - TTS 音频块元数据""" type: str = "tts_audio_chunk" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: str seq: int = 0 codec: str = "pcm_s16le" sample_rate_hz: int = 24000 is_final: bool = False class Metrics(BaseModel): """性能指标""" llm_ms: Optional[int] = None tts_first_byte_ms: Optional[int] = None class TurnCompleteMessage(BaseModel): """turn.complete - 轮次完成""" type: str = "turn.complete" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: str metrics: Metrics = Field(default_factory=Metrics) class ErrorMessage(BaseModel): """error - 错误消息""" type: str = "error" proto_version: str = PROTO_VERSION transport_profile: str = TRANSPORT_PROFILE turn_id: Optional[str] = None code: ErrorCode message: str retryable: bool = False # ==================== 辅助函数 ==================== def create_error_message( code: ErrorCode, message: str, turn_id: Optional[str] = None, retryable: bool = False, ) -> dict: """创建错误消息字典""" msg = ErrorMessage( turn_id=turn_id, code=code, message=message, retryable=retryable, ) return msg.model_dump(exclude_none=True) def create_asr_partial( *, turn_id: str, text: str, is_final: bool, transport_profile: str, ) -> dict: """云端 Fun-ASR 中间/分句结果,供端上显示听写或状态。""" return { "type": "asr.partial", "proto_version": PROTO_VERSION, "transport_profile": transport_profile, "turn_id": turn_id, "text": text, "is_final": is_final, } def create_llm_text_delta( turn_id: str, delta: str, *, done: bool = False, ) -> dict: """流式大模型增量文本(可先于 dialog_result 下发,便于端上显示打字效果)。""" return { "type": "llm.text_delta", "proto_version": PROTO_VERSION, "transport_profile": TRANSPORT_PROFILE, "turn_id": turn_id, "delta": delta, "done": done, } def create_dialog_result( turn_id: str, user_text: str, routing: RoutingType, flight_intent: Optional[dict] = None, chat_reply: Optional[str] = None, ) -> dict: """创建旧版 dialog_result(无 protocol / confirm,user_input 为嵌套对象)。""" intent_payload = None if flight_intent: intent_payload = FlightIntentPayload(**flight_intent) msg = DialogResultMessage( turn_id=turn_id, user_input=UserInput(text=user_text), routing=routing, flight_intent=intent_payload, chat_reply=chat_reply, ) return msg.model_dump(exclude_none=True) def create_dialog_result_cloud_v1( turn_id: str, user_text: str, routing: RoutingType, flight_intent: Optional[dict] = None, chat_reply: Optional[str] = None, *, confirm_required: Optional[bool] = None, confirm_timeout_sec: Optional[float] = None, ) -> dict: """ cloud_voice_dialog_v1 — CLOUD_VOICE_DIALOG_v1.md。 flight 分支自动补 FlightIntentPayload 与 FlightConfirmSpec。 """ req = settings.FLIGHT_CONFIRM_REQUIRED if confirm_required is None else confirm_required timeout = ( settings.FLIGHT_CONFIRM_TIMEOUT_SEC if confirm_timeout_sec is None else confirm_timeout_sec ) intent_payload = None confirm_payload = None if routing == RoutingType.FLIGHT_INTENT: if not flight_intent: raise ValueError("flight_intent 路由缺少 flight_intent dict") intent_payload = FlightIntentPayload(**flight_intent) summary = (intent_payload.summary or "").strip() or "飞控指令" confirm_payload = FlightConfirmSpec( required=req, timeout_sec=float(timeout), confirm_phrases=["确认"], cancel_phrases=["取消"], pending_id=str(uuid.uuid4()), summary_for_user=summary, ) msg = DialogResultCloudV1( turn_id=turn_id, user_input=user_text or "", routing=routing, flight_intent=intent_payload, confirm=confirm_payload, chat_reply=chat_reply, ) return msg.model_dump(exclude_none=True)