# 实时检测语音:用「无人机」唤醒 → TTS「你好,我在呢」→ 收音一句指令(关麦)→ 大模型 Kokoro 播报答句 → 再仅听唤醒词。 # 可选:assistant.local_keyword_takeoff_enabled 或 ROCKET_LOCAL_KEYWORD_TAKEOFF=1 时,「无人机 + keywords.yaml 里 takeoff 词」走本地 offboard + WAV(默认关闭)。 # 其它指令走云端/本地 LLM → flight_intent 等(设 ROCKET_CLOUD_EXECUTE_FLIGHT=1 才执行机端序列)。 # 环境变量:ROCKET_LLM_GGUF、ROCKET_LLM_MAX_TOKENS(默认 256)、ROCKET_LLM_CTX(默认 4096,可试 2048 省显存/略提速)、 # ROCKET_LLM_N_THREADS(llama.cpp 线程数,如 RK3588 可试 6~8)、ROCKET_LLM_N_GPU_LAYERS(有 CUDA/Vulkan 时>0)、ROCKET_LLM_N_BATCH、 # ROCKET_TTS_ORT_INTRA_OP_THREADS / ROCKET_TTS_ORT_INTER_OP_THREADS(Kokoro ONNXRuntime 线程), # ROCKET_CHAT_IDLE_SEC(历史占位,每轮重置上下文)、ROCKET_TTS_DEVICE(同 qwen15b_chat --tts-device)、 # ROCKET_INPUT_HW=2,0 对应 arecord -l 的 card,device;ROCKET_INPUT_DEVICE_INDEX、ROCKET_INPUT_DEVICE_NAME; # 录音:默认交互列出 arecord -l + PyAudio 并选择;--input-index / ROCKET_INPUT_DEVICE_INDEX 跳过交互;--non-interactive 用 yaml 的 input_device_index(可为 null 自动探测)。 # ROCKET_LLM_DISABLE=1 关闭对话。 # ROCKET_LLM_STREAM=0 关闭流式输出(整段推理后再单次 TTS,便于对照调试)。 # ROCKET_STREAM_TTS_CHUNK_CHARS 流式闲聊时、无句末标点则按此长度强制切段(默认 64,过小会听感碎)。 # 云端语音(见 voice_drone_assistant/clientguide.md):ROCKET_CLOUD_VOICE=1 或 cloud_voice.enabled; # ROCKET_CLOUD_WS_URL、ROCKET_CLOUD_AUTH_TOKEN、ROCKET_CLOUD_DEVICE_ID;ROCKET_CLOUD_FALLBACK_LOCAL=0 禁用本地回退。 # 云端会话固定 pcm_asr_uplink(VAD 截句→turn.audio.*→Fun-ASR);同句快路径仍可用 turn.text。 # 闲聊「无语音」超时:listen_silence_timeout_sec(默认 5):滴声后仅当 RMS Path: raw = os.environ.get("ROCKET_WAKE_GREETING_WAV", "").strip() return Path(raw).expanduser() if raw else _WAKE_GREETING_WAV _CORE_DIR = _PROJECT_ROOT / "voice_drone" / "core" _TAKEOFF_ACK_WAV = _CORE_DIR / "好的收到,开始起飞.wav" _TAKEOFF_DONE_WAV = _CORE_DIR / "任务执行完成,开始返航降落.wav" _OFFBOARD_SCRIPT = _PROJECT_ROOT / "scripts" / "run_px4_offboard_one_terminal.sh" def _play_wav_blocking(path: Path) -> None: """与 src/play_wav.py 相同:16-bit PCM 单文件 blocking 播放。""" import pyaudio with wave.open(str(path), "rb") as wf: ch = wf.getnchannels() sw = wf.getsampwidth() sr = wf.getframerate() nframes = wf.getnframes() if sw != 2: raise ValueError(f"仅支持 16-bit PCM: {path}") pcm = wf.readframes(nframes) p = pyaudio.PyAudio() try: fmt = p.get_format_from_width(sw) chunk = 1024 stream = p.open( format=fmt, channels=ch, rate=sr, output=True, frames_per_buffer=chunk, ) stream.start_stream() try: step = chunk * sw * ch for i in range(0, len(pcm), step): stream.write(pcm[i : i + step]) finally: stream.stop_stream() stream.close() finally: p.terminate() def _synthesize_ready_beep( sample_rate: int = 24000, *, duration_sec: float = 0.11, frequency_hz: float = 988.0, amplitude: float = 0.22, ) -> np.ndarray: """正弦短鸣 + 淡入淡出,作唤醒后「可以说话」提示。""" n = max(8, int(sample_rate * duration_sec)) x = np.arange(n, dtype=np.float32) w = np.sin(2.0 * np.pi * frequency_hz * x / float(sample_rate)).astype(np.float32) fade = max(2, min(n // 3, int(0.006 * sample_rate))) ramp = np.linspace(0.0, 1.0, fade, dtype=np.float32) w[:fade] *= ramp w[-fade:] *= ramp[::-1] return np.clip(w * np.float32(amplitude), -1.0, 1.0) def _terminate_process_group(proc: subprocess.Popen) -> None: if proc.poll() is not None: return try: os.killpg(proc.pid, signal.SIGTERM) except ProcessLookupError: return except Exception as e: # noqa: BLE001 logger.warning("SIGTERM offboard 进程组失败: %s", e) try: proc.wait(timeout=10) except subprocess.TimeoutExpired: try: os.killpg(proc.pid, signal.SIGKILL) except Exception as e: # noqa: BLE001 logger.warning("SIGKILL offboard 进程组失败: %s", e) class _WakeFlowPhase(enum.IntEnum): IDLE = 0 GREETING_WAIT = 1 ONE_SHOT_LISTEN = 2 LLM_BUSY = 3 FLIGHT_CONFIRM_LISTEN = 4 class TakeoffPrintRecognizer(VoiceCommandRecognizer): """待机(IDLE)仅识别含唤醒词的句子;唤醒后多轮对话在 ONE_SHOT_LISTEN 等阶段不要求句内唤醒词。 云端会话为 pcm_asr_uplink:滴声后整句 PCM 上云 Fun-ASR;结束一轮回到 IDLE 再要唤醒词。""" def __init__(self, *, skip_model_preload: bool = False) -> None: super().__init__(auto_connect_socket=False) self.ack_tts_enabled = False self._audio_play_lock = threading.Lock() self._offboard_proc_lock = threading.Lock() self._active_offboard_proc: subprocess.Popen | None = None self._takeoff_side_task_busy = threading.Lock() self._model_warm_lock = threading.Lock() # 流式闲聊会按句/块多次入队,队列过小易丢段 self._llm_playback_queue: queue.Queue[str] = queue.Queue(maxsize=64) self._chat_session_lock = threading.Lock() self._chat_session_until: float = 0.0 self._llm_messages: list = [] self._llm = None self._llm_tts_engine = None self._llm_model_path = Path( os.environ.get( "ROCKET_LLM_GGUF", str(default_qwen_gguf_path(_PROJECT_ROOT)), ) ) self._chat_idle_sec = float(os.environ.get("ROCKET_CHAT_IDLE_SEC", "120")) self._llm_max_tokens = int(os.environ.get("ROCKET_LLM_MAX_TOKENS", "256")) self._llm_ctx = int(os.environ.get("ROCKET_LLM_CTX", "4096")) self._llm_tts_max_chars = int(os.environ.get("ROCKET_LLM_TTS_MAX_CHARS", "800")) self._llm_stream_enabled = os.environ.get( "ROCKET_LLM_STREAM", "1" ).lower() not in ("0", "false", "no") self._stream_tts_chunk_chars = max( 16, int(os.environ.get("ROCKET_STREAM_TTS_CHUNK_CHARS", "64")), ) self._llm_disabled = os.environ.get("ROCKET_LLM_DISABLE", "").lower() in ( "1", "true", "yes", ) _kw_raw = os.environ.get("ROCKET_LOCAL_KEYWORD_TAKEOFF", "").strip() if _kw_raw: self._local_keyword_takeoff_enabled = _kw_raw.lower() in ( "1", "true", "yes", ) else: _ac = ( SYSTEM_ASSISTANT_CONFIG if isinstance(SYSTEM_ASSISTANT_CONFIG, dict) else {} ) self._local_keyword_takeoff_enabled = bool( _ac.get("local_keyword_takeoff_enabled", False) ) self._skip_model_preload = skip_model_preload or os.environ.get( "ROCKET_SKIP_MODEL_PRELOAD", "" ).lower() in ("1", "true", "yes") cv = SYSTEM_CLOUD_VOICE_CONFIG if isinstance(SYSTEM_CLOUD_VOICE_CONFIG, dict) else {} env_cloud = os.environ.get("ROCKET_CLOUD_VOICE", "").lower() in ( "1", "true", "yes", ) self._cloud_voice_enabled = bool(env_cloud or cv.get("enabled")) self._cloud_fallback_local = os.environ.get( "ROCKET_CLOUD_FALLBACK_LOCAL", "" ).lower() not in ("0", "false", "no") and bool( cv.get("fallback_to_local", True) ) # 唤醒词仅在 IDLE 由命令线程强制;ONE_SHOT_LISTEN 整句直接上行或处理,不要求句内唤醒词。 try: self._listen_silence_timeout_sec = max( 0.5, float( os.environ.get("ROCKET_PROMPT_LISTEN_TIMEOUT_SEC") or cv.get("listen_silence_timeout_sec") or 5.0 ), ) except ValueError: self._listen_silence_timeout_sec = 5.0 try: self._post_cue_mic_mute_ms = float( os.environ.get("ROCKET_POST_CUE_MIC_MUTE_MS") or cv.get("post_cue_mic_mute_ms") or 200.0 ) except ValueError: self._post_cue_mic_mute_ms = 200.0 self._post_cue_mic_mute_ms = max(0.0, min(2000.0, self._post_cue_mic_mute_ms)) try: self._segment_cue_duration_ms = float( os.environ.get("ROCKET_SEGMENT_CUE_DURATION_MS") or cv.get("segment_cue_duration_ms") or 120.0 ) except ValueError: self._segment_cue_duration_ms = 120.0 self._segment_cue_duration_ms = max(20.0, min(500.0, self._segment_cue_duration_ms)) ws_url = (os.environ.get("ROCKET_CLOUD_WS_URL") or cv.get("server_url") or "").strip() auth_tok = ( os.environ.get("ROCKET_CLOUD_AUTH_TOKEN") or cv.get("auth_token") or "" ).strip() dev_id = ( os.environ.get("ROCKET_CLOUD_DEVICE_ID") or cv.get("device_id") or "drone-001" ).strip() self._cloud_client = None self._cloud_remote_tts_for_local = False if self._cloud_voice_enabled: if ws_url and auth_tok: from voice_drone.core.cloud_voice_client import CloudVoiceClient self._cloud_client = CloudVoiceClient( server_url=ws_url, auth_token=auth_tok, device_id=dev_id, recv_timeout=float(cv.get("timeout") or 120), session_client_extensions=dict(SYSTEM_CLOUD_VOICE_PX4_CONTEXT) if SYSTEM_CLOUD_VOICE_PX4_CONTEXT else None, ) _env_rt = os.environ.get("ROCKET_CLOUD_REMOTE_TTS", "").strip().lower() if _env_rt in ("0", "false", "no"): self._cloud_remote_tts_for_local = False elif _env_rt in ("1", "true", "yes"): self._cloud_remote_tts_for_local = True else: self._cloud_remote_tts_for_local = bool( cv.get("remote_tts_for_local", True) ) print( f"[云端] 已启用 WebSocket 对话: {ws_url} device_id={dev_id}", flush=True, ) if self._cloud_remote_tts_for_local: print( "[云端] 本地文案播报将走 tts.synthesize(失败回退 Kokoro)。", flush=True, ) print( f"[云端] Fun-ASR 上行 turn.audio.*;仅待机时说唤醒词;" f"滴声后累计静默 {self._listen_silence_timeout_sec:.1f}s(低于 yaml energy_vad_rms_low 才计);" f"断句提示 {self._segment_cue_duration_ms:.0f}ms、消抖 {self._post_cue_mic_mute_ms:.0f}ms。", flush=True, ) else: logger.warning("cloud_voice 已启用但缺少 server_url/auth_token,将使用本地 LLM") self._cloud_voice_enabled = False self._wake_flow_lock = threading.Lock() self._wake_phase: int = int(_WakeFlowPhase.IDLE) self._greeting_done = threading.Event() self._playback_batch_is_greeting = False self._pending_finish_wake_cycle_after_tts = False self._pending_flight_confirm_after_tts = False self._pending_flight_confirm: dict | None = None self._flight_confirm_timer: threading.Timer | None = None self._flight_confirm_timer_lock = threading.Lock() self._staged_one_shot_after_greeting: str | None = None self._mic_op_queue: queue.Queue[str] = queue.Queue(maxsize=8) # 默认仅 1 段在 STT 队列等待;可设 ROCKET_STT_QUEUE_MAX=2~8 允许少量排队 _raw_sq = os.environ.get("ROCKET_STT_QUEUE_MAX", "1").strip() try: _stn = max(1, min(16, int(_raw_sq))) except ValueError: _stn = 1 self.stt_queue = queue.Queue(maxsize=_stn) # PROMPT_LISTEN:v1 §4 为「RMS 低于阈值持续累计」,不是滴声后固定墙上时钟 5s self._prompt_listen_watch_armed: bool = False self._prompt_silence_accum_sec: float = 0.0 self._segment_cue_done = threading.Event() self._pending_chitchat_reprompt_after_tts = False if self._cloud_client is not None: self._vad_speech_start_hook = self._on_vad_speech_start_prompt_listen self._after_processed_audio_chunk = self._tick_prompt_listen_silence_accum def _cancel_prompt_listen_timer(self) -> None: """停止「滴声后静默监听」累计(飞控/结束唤醒/起 PCM 上行前等)。""" self._prompt_listen_watch_armed = False self._prompt_silence_accum_sec = 0.0 def _arm_prompt_listen_timeout(self) -> None: """滴声后进 PROMPT_LISTEN:仅在麦克持续低于 energy_vad_rms_low 时累加,超时再播 MSG。""" if self._cloud_client is None: return with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN): return self._prompt_silence_accum_sec = 0.0 self._prompt_listen_watch_armed = True logger.debug( "PROMPT_LISTEN: 已启用 RMS 累计静默 %.1fs(低于 rms_low 才计时;说话或 rms≥low 清零)", self._listen_silence_timeout_sec, ) def _on_prompt_listen_timeout(self) -> None: with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN): return self._prompt_listen_watch_armed = False self._prompt_silence_accum_sec = 0.0 logger.info( "[会话] 滴声后持续静默 ≥%.1fs(未截句),播超时提示并回待机", self._listen_silence_timeout_sec, ) self._enqueue_llm_speak(MSG_PROMPT_LISTEN_TIMEOUT) self._pending_finish_wake_cycle_after_tts = True def _tick_prompt_listen_silence_accum(self, processed_chunk: np.ndarray) -> None: if not self._prompt_listen_watch_armed or self._cloud_client is None: return with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN): return rms = self._int16_chunk_rms(processed_chunk) dt = float(len(processed_chunk)) / float(self.audio_capture.sample_rate) speaking = ( self._ev_speaking if self._use_energy_vad else self.vad.is_speaking ) if speaking or rms >= self._energy_rms_low: self._prompt_silence_accum_sec = 0.0 return self._prompt_silence_accum_sec += dt if self._prompt_silence_accum_sec >= self._listen_silence_timeout_sec: try: self._on_prompt_listen_timeout() except Exception as e: # noqa: BLE001 logger.error("PROMPT_LISTEN 静默超时处理异常: %s", e, exc_info=True) def _on_vad_speech_start_prompt_listen(self) -> None: """VAD 判「开始说话」时清零静默累计(v1 §4,与 RMS≥rms_low 并行)。""" if self._cloud_client is None: return with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN): return self._prompt_silence_accum_sec = 0.0 def _submit_concatenated_speech_to_stt(self) -> None: """在唤醒/一问一答流程中节流 VAD:避免问候或云端推理时继续向 STT 积压整句。""" allow_greeting_stt = os.environ.get( "ROCKET_VAD_STT_DURING_GREETING", "" ).lower() in ("1", "true", "yes") with self._wake_flow_lock: phase = self._wake_phase if phase == int(_WakeFlowPhase.GREETING_WAIT) and not allow_greeting_stt: with self.speech_buffer_lock: self.speech_buffer.clear() if os.environ.get("ROCKET_PRINT_VAD", "").lower() in ( "1", "true", "yes", ): print( "[VAD] 问候播放中,本段不送 STT(说完问候后再说指令;" "若需在问候同时识别请设 ROCKET_VAD_STT_DURING_GREETING=1", flush=True, ) return if phase == int(_WakeFlowPhase.LLM_BUSY): with self.speech_buffer_lock: self.speech_buffer.clear() if os.environ.get("ROCKET_PRINT_VAD", "").lower() in ( "1", "true", "yes", ): print( "[VAD] 大模型/云端处理中,本段不送 STT(请等本轮播报结束后再说)", flush=True, ) return if ( self._cloud_client is not None and phase == int(_WakeFlowPhase.ONE_SHOT_LISTEN) ): if len(self.speech_buffer) == 0: return speech_audio = np.concatenate(self.speech_buffer) self.speech_buffer.clear() min_samples = int(self.audio_capture.sample_rate * 0.5) if len(speech_audio) >= min_samples: try: self.command_queue.put( ( _PCM_TURN_MARKER, speech_audio.copy(), int(self.audio_capture.sample_rate), ), block=False, ) if os.environ.get("ROCKET_PRINT_VAD", "").lower() in ( "1", "true", "yes", ): print( f"[VAD] turn.audio 已排队,{len(speech_audio)} 采样点" f"(≈{len(speech_audio) / float(self.audio_capture.sample_rate):.2f}s)", flush=True, ) except queue.Full: logger.warning("命令队列已满,跳过 PCM 上行") elif os.environ.get("ROCKET_PRINT_VAD", "").lower() in ( "1", "true", "yes", ): print( f"[VAD] 语音段太短已丢弃({len(speech_audio)} < {min_samples} 采样)", flush=True, ) return super()._submit_concatenated_speech_to_stt() def _llm_tts_output_device(self) -> str | int | None: raw = os.environ.get("ROCKET_TTS_DEVICE", "").strip() if raw.isdigit(): return int(raw) if raw: return raw return None def _before_audio_iteration(self) -> None: self._drain_mic_ops() super()._before_audio_iteration() self._drain_llm_playback_queue() def _drain_mic_ops(self) -> None: """主线程:执行命令线程请求的麦克风流 stop/start。""" while True: try: op = self._mic_op_queue.get_nowait() except queue.Empty: break try: if op == "stop": if self.audio_capture.stream is not None: self.audio_capture.stop_stream() elif op == "start" and self.running: if self.audio_capture.stream is None: self.audio_capture.start_stream() self.vad.reset() with self.speech_buffer_lock: self.speech_buffer.clear() self.pre_speech_buffer.clear() except Exception as e: # noqa: BLE001 logger.warning("麦克风流控制失败 (%r): %s", op, e) def _finish_wake_cycle(self) -> None: self._cancel_prompt_listen_timer() self._cancel_flight_confirm_timer() with self._flight_confirm_timer_lock: self._pending_flight_confirm = None self._pending_flight_confirm_after_tts = False self._pending_finish_wake_cycle_after_tts = False with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.IDLE) self._reset_llm_history() print("[唤醒] 本轮结束。请说「无人机」再次唤醒。", flush=True) def _reset_llm_history(self) -> None: with self._chat_session_lock: self._llm_messages.clear() self._chat_session_until = 0.0 def _flush_llm_playback_queue_silent(self) -> None: """丢弃 LLM 播报队列(无日志);新一轮唤醒前清空,避免与问候语或上一轮残段叠播。""" while True: try: self._llm_playback_queue.get_nowait() except queue.Empty: break def _prepare_wake_session_resources(self) -> None: """新一轮唤醒:清空对话状态、播报队列与待 STT 段(问候/快路径共用)。""" self._reset_llm_history() self._flush_llm_playback_queue_silent() self.discard_pending_stt_segments() def _recover_from_cloud_failure( self, user_msg: str, *, finish_wake_after_tts: bool, idle_speak: str, ) -> None: """云端 run_turn 失败后:按需回退本地 LLM 或播一句占位。""" if self._cloud_fallback_local: print("[云端] 回退本地 LLM…", flush=True) self._handle_llm_turn_local(user_msg, finish_wake_after_tts=finish_wake_after_tts) return self._enqueue_llm_speak(idle_speak) if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True def _begin_wake_cycle(self, staged_followup: str | None) -> None: """命中唤醒后:排队问候语,并在主线程播完后由 _after_greeting_pipeline 继续。""" with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.IDLE): logger.info( "唤醒忽略:当前非 IDLE(phase=%s),不重复排队问候", _WakeFlowPhase(self._wake_phase).name, ) return self._wake_phase = int(_WakeFlowPhase.GREETING_WAIT) self._prepare_wake_session_resources() s = (staged_followup or "").strip() self._staged_one_shot_after_greeting = s if s else None self._greeting_done.clear() self._playback_batch_is_greeting = True self._enqueue_wake_word_ack_beep() self._enqueue_llm_speak(_WAKE_GREETING) threading.Thread( target=self._after_greeting_pipeline, daemon=True, name="wake-after-greeting", ).start() def _wake_fast_path_process_follow(self, follow: str) -> bool: """同一句已含唤醒词+指令时:跳过问候与滴声,清队列后直接 _process_one_shot_command。""" follow = (follow or "").strip() if not follow: return False with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.IDLE): logger.info( "唤醒连带指令忽略:当前非 IDLE(phase=%s)", _WakeFlowPhase(self._wake_phase).name, ) return False self._wake_phase = int(_WakeFlowPhase.LLM_BUSY) self._prepare_wake_session_resources() self._staged_one_shot_after_greeting = None self._enqueue_wake_word_ack_beep() logger.info("唤醒含指令,跳过问候与提示音,直接处理: %s", follow[:120]) self._process_one_shot_command(follow) return True def _after_greeting_pipeline(self) -> None: if not self._greeting_done.wait(timeout=120): logger.error("问候语播放超时,回到 IDLE") self._finish_wake_cycle() return self._greeting_done.clear() staged: str | None = None with self._wake_flow_lock: staged = self._staged_one_shot_after_greeting self._staged_one_shot_after_greeting = None if staged is not None: with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.LLM_BUSY) self._process_one_shot_command(staged) else: with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.ONE_SHOT_LISTEN) print("[唤醒] 请说您的指令(一句)。", flush=True) self._arm_prompt_listen_timeout() def _process_one_shot_command(self, raw: str) -> None: """已关麦或准备关麦:处理一句指令(起飞 / LLM),结束后再切回 IDLE。""" user_msg = (raw or "").strip() if not user_msg: self._finish_wake_cycle() return iw, _ = self.wake_word_detector.detect(user_msg) if iw: user_msg = ( self.wake_word_detector.extract_command_text(user_msg) or user_msg ).strip() if not user_msg: self._finish_wake_cycle() return print(f"[指令] {user_msg}", flush=True) try: self._mic_op_queue.put_nowait("stop") except queue.Full: pass time.sleep(0.12) _, params = self.text_preprocessor.preprocess_fast(user_msg) if ( self._local_keyword_takeoff_enabled and params.command_keyword == "takeoff" ): threading.Thread( target=self._run_takeoff_offboard_and_wavs, daemon=True, ).start() self._finish_wake_cycle() try: self._mic_op_queue.put_nowait("start") except queue.Full: pass return if self._llm_disabled and not self._cloud_voice_enabled: print("[LLM] 已禁用(ROCKET_LLM_DISABLE)。", flush=True) self._finish_wake_cycle() try: self._mic_op_queue.put_nowait("start") except queue.Full: pass return self._handle_llm_turn( user_msg, finish_wake_after_tts=(self._cloud_client is None) ) @staticmethod def _flight_payload_requests_takeoff(payload: dict) -> bool: for a in payload.get("actions") or []: if isinstance(a, dict) and a.get("type") == "takeoff": return True return False def _enqueue_llm_speak(self, line: str) -> None: t = (line or "").strip() if not t: return try: self._llm_playback_queue.put(t, block=False) except queue.Full: logger.warning("LLM 播报队列已满,跳过: %s…", t[:40]) def _ensure_llm(self): if self._llm is not None: return self._llm with self._model_warm_lock: if self._llm is not None: return self._llm if not self._llm_model_path.is_file(): logger.error("未找到 GGUF: %s", self._llm_model_path) return None logger.info("正在加载 LLM: %s", self._llm_model_path) print("[LLM] 正在加载 Qwen(GGUF)…", flush=True) self._llm = load_llama_qwen(self._llm_model_path, n_ctx=self._llm_ctx) if self._llm is None: logger.error("llama-cpp-python 未安装或加载失败") else: print("[LLM] Qwen 已载入。", flush=True) return self._llm def _ensure_llm_tts(self): if self._llm_tts_engine is not None: return self._llm_tts_engine with self._model_warm_lock: if self._llm_tts_engine is not None: return self._llm_tts_engine from voice_drone.core.tts import KokoroOnnxTTS print("[LLM] 正在加载 Kokoro TTS(ONNX)…", flush=True) self._llm_tts_engine = KokoroOnnxTTS() print("[LLM] Kokoro 已载入。", flush=True) return self._llm_tts_engine def _preload_llm_and_tts_if_enabled(self) -> None: """启动后预加载,避免首轮对话/播报长时间卡顿。""" if self._cloud_voice_enabled: print( "[云端] 跳过本地 Qwen 预加载;对话 TTS 以云端 PCM 为主。", flush=True, ) try: p = _resolve_wake_greeting_wav() if not p.is_file(): if ( not self._llm_disabled and not self._cloud_remote_tts_for_local ): self._ensure_wake_greeting_wav_on_disk() except Exception as e: # noqa: BLE001 logger.debug("云端模式下预热问候 WAV 跳过: %s", e) if self._cloud_remote_tts_for_local: print( "[云端] 本地字符串播报由 tts.synthesize 提供,跳过 Kokoro 预加载" "(失败时会临场加载 Kokoro)。", flush=True, ) return # 飞控确认超时/取消、云端 fallback 等仍走本地 Kokoro;启动时加载一次, # 避免超时播报时现场冷启动模型(数秒卡顿)。 if self._skip_model_preload: print( "[云端] 已跳过 Kokoro 预加载(--no-preload / ROCKET_SKIP_MODEL_PRELOAD);" "首次本地提示时再加载。", flush=True, ) else: t0 = time.monotonic() try: print( "[LLM] 云端模式:预加载 Kokoro(确认超时/取消等本地语音)…", flush=True, ) self._ensure_llm_tts() except Exception as e: # noqa: BLE001 logger.warning( "云端模式 Kokoro 预加载失败(将在首次本地播报时重试): %s", e, exc_info=True, ) print(f"[LLM] Kokoro 预加载失败: {e}", flush=True) else: dt = time.monotonic() - t0 print(f"[LLM] Kokoro 预加载完成(约 {dt:.1f}s)。", flush=True) return if self._llm_disabled or self._skip_model_preload: if self._skip_model_preload and not self._llm_disabled: print( "[LLM] 已跳过预加载(--no-preload 或 ROCKET_SKIP_MODEL_PRELOAD),将在首次使用时加载。", flush=True, ) return if not self._llm_model_path.is_file(): print( f"[LLM] 未找到 GGUF,跳过预加载: {self._llm_model_path}", flush=True, ) return print( "[LLM] 预加载 Qwen + Kokoro(数十秒属正常,完成后的首轮对话会快很多)…", flush=True, ) t0 = time.monotonic() try: if self._ensure_llm() is None: return self._ensure_llm_tts() self._ensure_wake_greeting_wav_on_disk() except Exception as e: # noqa: BLE001 logger.warning("预加载模型失败(将在首次使用时重试): %s", e, exc_info=True) print(f"[LLM] 预加载失败: {e}", flush=True) return dt = time.monotonic() - t0 print(f"[LLM] 预加载完成(耗时约 {dt:.1f}s)。", flush=True) def _ensure_wake_greeting_wav_on_disk(self) -> Path: """若尚无问候 WAV,则用 Kokoro 合成一次并写入;之后只走 play_wav_path。""" p = _resolve_wake_greeting_wav() if p.is_file(): return p try: p.parent.mkdir(parents=True, exist_ok=True) except OSError as e: logger.warning("无法创建问候缓存目录 %s: %s", p.parent, e) return p try: tts = self._ensure_llm_tts() tts.synthesize_to_file(_WAKE_GREETING, str(p)) logger.info("已自动生成唤醒问候缓存(此后只播此文件): %s", p) print(f"[TTS] 已写入问候缓存,下次起不再合成: {p}", flush=True) except Exception as e: # noqa: BLE001 logger.warning( "自动生成问候 WAV 失败(需 scipy 写盘;将本次仍用实时合成): %s", e, exc_info=True, ) return p def _play_wake_ready_beep(self, output_device: object | None) -> None: """问候语播完后短鸣一声,提示用户再开口下指令。""" from voice_drone.core.tts import play_tts_audio if os.environ.get("ROCKET_WAKE_PROMPT_BEEP", "1").lower() in ( "0", "false", "no", ): return sr = 24000 try: dur = float(os.environ.get("ROCKET_WAKE_BEEP_SEC", "0.11")) except ValueError: dur = 0.11 dur = max(0.04, min(0.25, dur)) try: hz = float(os.environ.get("ROCKET_WAKE_BEEP_HZ", "988")) except ValueError: hz = 988.0 try: amp = float(os.environ.get("ROCKET_WAKE_BEEP_GAIN", "0.22")) except ValueError: amp = 0.22 amp = max(0.05, min(0.45, amp)) audio = _synthesize_ready_beep( sr, duration_sec=dur, frequency_hz=hz, amplitude=amp ) try: play_tts_audio(audio, sr, output_device=output_device) print("[唤醒] 提示音已播,请说指令。", flush=True) except Exception as e: # noqa: BLE001 logger.debug("唤醒提示音播放跳过: %s", e) def _enqueue_wake_word_ack_beep(self) -> None: """唤醒词命中后立即排队一声短鸣,主线程播报(与云 TTS 同队列,不阻塞命令线程)。""" if os.environ.get("ROCKET_WAKE_ACK_BEEP", "1").lower() in ( "0", "false", "no", ): return try: self._llm_playback_queue.put_nowait(_WAKE_HIT_BEEP_TAG) except queue.Full: logger.warning("播报队列已满,跳过唤醒确认短音") def _play_wake_word_hit_beep(self, output_device: object | None) -> None: """刚识别到唤醒词时的一声「滴」,默认略短于问候后的滴声。""" from voice_drone.core.tts import play_tts_audio if os.environ.get("ROCKET_WAKE_ACK_BEEP", "1").lower() in ( "0", "false", "no", ): return sr = 24000 try: raw = os.environ.get("ROCKET_WAKE_ACK_BEEP_SEC", "").strip() if raw: dur = float(raw) else: dur = float(os.environ.get("ROCKET_WAKE_BEEP_SEC", "0.11")) * 0.72 except ValueError: dur = 0.08 dur = max(0.04, min(0.25, dur)) try: raw_h = os.environ.get("ROCKET_WAKE_ACK_BEEP_HZ", "").strip() hz = float(raw_h) if raw_h else float(os.environ.get("ROCKET_WAKE_BEEP_HZ", "988")) except ValueError: hz = 1100.0 try: raw_g = os.environ.get("ROCKET_WAKE_ACK_BEEP_GAIN", "").strip() amp = float(raw_g) if raw_g else float(os.environ.get("ROCKET_WAKE_BEEP_GAIN", "0.22")) except ValueError: amp = 0.22 amp = max(0.05, min(0.45, amp)) audio = _synthesize_ready_beep( sr, duration_sec=dur, frequency_hz=hz, amplitude=amp ) try: play_tts_audio(audio, sr, output_device=output_device) except Exception as e: # noqa: BLE001 logger.debug("唤醒确认短音播放失败: %s", e) return print("[唤醒] 确认短音已播。", flush=True) def _try_play_line_via_cloud_tts(self, s: str, dev: object | None) -> bool: """docs/API.md §3.3 tts.synthesize:成功播放返回 True,否则 False(调用方回退 Kokoro)。""" if not self._cloud_remote_tts_for_local or self._cloud_client is None: return False txt = (s or "").strip() if not txt: return False from voice_drone.core.cloud_voice_client import CloudVoiceError from voice_drone.core.tts import play_tts_audio t0 = time.monotonic() try: out = self._cloud_client.run_tts_synthesize(txt) except CloudVoiceError as e: logger.warning("云端 tts.synthesize 失败: %s", e) return False except Exception as e: # noqa: BLE001 logger.warning("云端 tts.synthesize 异常: %s", e, exc_info=True) return False pcm = out.get("pcm") try: sr = int(out.get("sample_rate_hz") or 24000) except (TypeError, ValueError): sr = 24000 if pcm is None or np.asarray(pcm).size == 0: logger.warning("云端 tts.synthesize 返回空 PCM") return False pcm_i16 = np.asarray(pcm, dtype=np.int16).reshape(-1) logger.info( "云端 tts.synthesize: samples=%s int16_max_abs=%s elapsed=%.3fs", pcm_i16.size, int(np.max(np.abs(pcm_i16))), time.monotonic() - t0, ) audio_f32 = pcm_i16.astype(np.float32) / 32768.0 try: play_tts_audio(audio_f32, sr, output_device=dev) except Exception as e: # noqa: BLE001 logger.warning("播放云端 tts.synthesize 结果失败: %s", e, exc_info=True) return False return True def _play_segment_end_cue(self, dev: object | None) -> None: """断句后极短提示(§5);不计入闲聊再滴声。""" from voice_drone.core.tts import play_tts_audio sr = 24000 dur = self._segment_cue_duration_ms / 1000.0 dur = max(0.02, min(0.5, dur)) audio = _synthesize_ready_beep( sr, duration_sec=dur, frequency_hz=1420.0, amplitude=0.18, ) try: play_tts_audio(audio, sr, output_device=dev) except Exception as e: # noqa: BLE001 logger.debug("断句提示音: %s", e) def _play_chitchat_reprompt_beep(self, dev: object | None) -> None: """闲聊 TTS 播完后再滴一声,进入下一轮 PROMPT_LISTEN。""" self._play_wake_word_hit_beep(dev) def _handle_pcm_uplink_turn(self, pcm: np.ndarray, sample_rate_hz: int) -> None: """SEGMENT_END:断句提示 + 消抖 → turn.audio 上行一轮。""" with self._wake_flow_lock: if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN): logger.debug("PCM 上行忽略:当前非 PROMPT_LISTEN") return self._cancel_prompt_listen_timer() try: self._mic_op_queue.put_nowait("stop") except queue.Full: pass self._segment_cue_done.clear() try: self._llm_playback_queue.put_nowait(_SEGMENT_END_CUE_TAG) except queue.Full: logger.error("播报队列满,无法播断句提示") try: self._mic_op_queue.put_nowait("start") except queue.Full: pass return if not self._segment_cue_done.wait(timeout=15.0): logger.error("断句提示音同步超时") try: self._mic_op_queue.put_nowait("start") except queue.Full: pass return time.sleep(self._post_cue_mic_mute_ms / 1000.0) with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.LLM_BUSY) self._handle_llm_turn_cloud_pcm( pcm, sample_rate_hz, finish_wake_after_tts=False ) def _drain_llm_playback_queue(self, recover_mic: bool = True) -> None: from voice_drone.core.tts import play_tts_audio, play_wav_path lines: list[str] = [] while True: try: lines.append(self._llm_playback_queue.get_nowait()) except queue.Empty: break if not lines: # 流式分段 TTS 时:最后一次 drain 可能在 _finalize_llm_turn 设置 # _pending_finish_wake_cycle_after_tts 之前就把队列播空;此处补上结束本轮唤醒。 # 注意:飞控确认窗须在「播完含本轮云端 TTS 的一批队列」之后在 finally 里进入, # 不可在此处用 _pending_flight_confirm_after_tts,否则主线程可能在 PCM 入队前 # 空跑 drain,抢先 begin_confirm 并清掉标志,命令线程末尾又会设 _pending_finish_wake_cycle。 if self._pending_finish_wake_cycle_after_tts: self._pending_finish_wake_cycle_after_tts = False self._finish_wake_cycle() return greeting_batch = self._playback_batch_is_greeting self._playback_batch_is_greeting = False mic_stopped = False if self.ack_pause_mic_for_playback: # 关麦前再丢一次队列:唤醒到 drain 之间 VAD 可能又提交了片段 self.discard_pending_stt_segments() try: self.audio_capture.stop_stream() mic_stopped = True except Exception as e: # noqa: BLE001 logger.warning("暂停麦克风失败: %s", e) try: tts = None dev = self._llm_tts_output_device() for line in lines: if line == _WAKE_HIT_BEEP_TAG: self._play_wake_word_hit_beep(dev) continue if line == _SEGMENT_END_CUE_TAG: self._play_segment_end_cue(dev) self._segment_cue_done.set() continue if line == _CHITCHAT_REPROMPT_BEEP_TAG: self._play_chitchat_reprompt_beep(dev) self._arm_prompt_listen_timeout() continue if ( isinstance(line, tuple) and len(line) == 3 and line[0] == _CLOUD_PCM_TAG ): _, pcm_i16, sr_cloud = line try: pcm_i16 = np.asarray(pcm_i16, dtype=np.int16).reshape(-1) if pcm_i16.size == 0: continue dbg_max = int(np.max(np.abs(pcm_i16))) logger.info( "云端 PCM 解码: samples=%s int16_max_abs=%s (若 max_abs=0 则为全零或" "协议/端序与云端不一致;请在服务端导出同段 WAV 对比)", pcm_i16.size, dbg_max, ) audio_f32 = pcm_i16.astype(np.float32) / 32768.0 t_play0 = time.monotonic() play_tts_audio( audio_f32, int(sr_cloud), output_device=dev ) print( f"[计时] 云端 TTS 播放 {time.monotonic() - t_play0:.3f}s " f"({pcm_i16.size / int(sr_cloud):.2f}s 音频)", flush=True, ) print("[LLM] 已播报。", flush=True) except Exception as e: # noqa: BLE001 logger.warning("云端 PCM 播放失败: %s", e, exc_info=True) continue s = (line or "").strip() if not s: continue try: if s == _WAKE_GREETING: t_w0 = time.monotonic() cloud_ok = self._try_play_line_via_cloud_tts(s, dev) if not cloud_ok: greet_wav = self._ensure_wake_greeting_wav_on_disk() if greet_wav.is_file(): play_wav_path(greet_wav, output_device=dev) print( f"[计时] TTS 预生成问候 WAV 播完,耗时 " f"{time.monotonic() - t_w0:.3f}s", flush=True, ) else: if tts is None: tts = self._ensure_llm_tts() logger.info("TTS: 开始合成并播放: %r", s) t_syn0 = time.monotonic() audio, sr = tts.synthesize(s) t_syn1 = time.monotonic() play_tts_audio(audio, sr, output_device=dev) t_play1 = time.monotonic() print( f"[计时] TTS 合成 {t_syn1 - t_syn0:.3f}s," f"播放 {t_play1 - t_syn1:.3f}s" f"(本段合计 {t_play1 - t_syn0:.3f}s)", flush=True, ) logger.info("TTS: 播放完成") else: print( f"[计时] 云端 tts.synthesize 问候,耗时 " f"{time.monotonic() - t_w0:.3f}s", flush=True, ) if greeting_batch: self._play_wake_ready_beep(dev) else: t_line0 = time.monotonic() cloud_ok = self._try_play_line_via_cloud_tts(s, dev) if not cloud_ok: if tts is None: tts = self._ensure_llm_tts() logger.info("TTS: 开始合成并播放: %r", s) t_syn0 = time.monotonic() audio, sr = tts.synthesize(s) t_syn1 = time.monotonic() play_tts_audio(audio, sr, output_device=dev) t_play1 = time.monotonic() print( f"[计时] TTS 合成 {t_syn1 - t_syn0:.3f}s," f"播放 {t_play1 - t_syn1:.3f}s" f"(本段合计 {t_play1 - t_syn0:.3f}s)", flush=True, ) logger.info("TTS: 播放完成") else: print( f"[计时] 云端 tts.synthesize 本段合计 " f"{time.monotonic() - t_line0:.3f}s", flush=True, ) print("[LLM] 已播报。", flush=True) except Exception as e: # noqa: BLE001 logger.warning("LLM 播报失败: %s", e, exc_info=True) finally: if mic_stopped and recover_mic: try: self.audio_capture.start_stream() try: settle_ms = float( os.environ.get("ROCKET_MIC_RESTART_SETTLE_MS", "150") ) except ValueError: settle_ms = 150.0 settle_ms = max(0.0, min(2000.0, settle_ms)) if settle_ms > 0: time.sleep(settle_ms / 1000.0) try: self.audio_preprocessor.reset() except Exception as e: # noqa: BLE001 logger.debug("audio_preprocessor.reset: %s", e) self.vad.reset() with self.speech_buffer_lock: self.speech_buffer.clear() self.pre_speech_buffer.clear() except Exception as e: # noqa: BLE001 logger.error("麦克风恢复失败: %s", e) if greeting_batch: self._greeting_done.set() if self._pending_flight_confirm_after_tts: self._pending_flight_confirm_after_tts = False self._begin_flight_confirm_listen() elif self._pending_chitchat_reprompt_after_tts: self._pending_chitchat_reprompt_after_tts = False with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.ONE_SHOT_LISTEN) try: self._llm_playback_queue.put_nowait(_CHITCHAT_REPROMPT_BEEP_TAG) except queue.Full: logger.warning("播报队列已满,跳过闲聊再滴声") elif self._pending_finish_wake_cycle_after_tts: self._pending_finish_wake_cycle_after_tts = False self._finish_wake_cycle() def _discard_llm_playback_queue(self) -> None: """退出时丢弃未播完的大模型 TTS,避免 stop() 里 speak_text/sounddevice 长时间阻塞导致 Ctrl+C 无法结束进程。""" dropped = 0 while True: try: self._llm_playback_queue.get_nowait() dropped += 1 except queue.Empty: break if dropped: logger.info("退出:已丢弃 %s 条待播 LLM 语音", dropped) @staticmethod def _chunk_delta_text(chunk: object) -> str: if not isinstance(chunk, dict): return "" choices = chunk.get("choices") or [] if not choices: return "" c0 = choices[0] d = c0.get("delta") if isinstance(c0, dict) else None if not isinstance(d, dict): d = c0.get("message") if isinstance(c0, dict) else None if not isinstance(d, dict): return "" raw = d.get("content") return raw if isinstance(raw, str) else "" def _enqueue_segment_capped(self, seg: str, budget: int) -> int: seg = (seg or "").strip() if not seg or budget <= 0: return budget if len(seg) <= budget: self._enqueue_llm_speak(seg) return budget - len(seg) self._enqueue_llm_speak(seg[: max(0, budget - 1)] + "…") return 0 def _finalize_llm_turn( self, reply: str, finish_wake_after_tts: bool, *, streamed_chat: bool, ) -> None: if not reply: self._enqueue_llm_speak("我没听清,请再说一遍。") if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return mode, payload = parse_flight_intent_reply(reply) with self._chat_session_lock: self._llm_messages.append({"role": "assistant", "content": reply}) print(f"[LLM] 判定={mode}", flush=True) print(f"[LLM] 原文: {reply[:500]}{'…' if len(reply) > 500 else ''}", flush=True) if streamed_chat: if payload is not None and self._flight_payload_requests_takeoff(payload): threading.Thread( target=self._run_takeoff_offboard_and_wavs, daemon=True, ).start() if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return if payload is not None: to_say = str(payload.get("summary") or "好的。").strip() if self._flight_payload_requests_takeoff(payload): threading.Thread( target=self._run_takeoff_offboard_and_wavs, daemon=True, ).start() else: to_say = reply.strip() if len(to_say) > self._llm_tts_max_chars: to_say = to_say[: self._llm_tts_max_chars] + "…" self._enqueue_llm_speak(to_say) if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True def _enqueue_cloud_pcm_playback( self, pcm_int16: np.ndarray, sample_rate_hz: int ) -> None: if pcm_int16 is None or np.asarray(pcm_int16).size == 0: return try: self._llm_playback_queue.put( (_CLOUD_PCM_TAG, np.asarray(pcm_int16, dtype=np.int16), int(sample_rate_hz)), block=False, ) except queue.Full: logger.warning("LLM 播报队列已满,跳过云端 PCM") def _send_socket_command(self, cmd: Command) -> bool: cmd.fill_defaults() if self.socket_client.send_command_with_retry(cmd): logger.info("✅ Socket 已发送: %s", cmd.command) return True logger.warning("Socket 未送达(已达 max_retries): %s", cmd.command) return False def _publish_flight_intent_to_ros_bridge(self, flight: dict) -> None: """校验 flight_intent 后由子进程发布到 ROS std_msgs/String(伴飞桥 ~input)。""" _parsed, errors = parse_flight_intent_dict(flight) if errors or _parsed is None: logger.warning("[飞控-ROS桥] flight_intent 校验失败,未发布: %s", errors) return setup = os.environ.get( "ROCKET_FLIGHT_BRIDGE_SETUP", "source /opt/ros/noetic/setup.bash" ).strip() topic = os.environ.get("ROCKET_FLIGHT_BRIDGE_TOPIC", "/input").strip() or "/input" wait_raw = os.environ.get("ROCKET_FLIGHT_BRIDGE_WAIT_SUB", "2").strip() try: wait_sub = float(wait_raw) except ValueError: wait_sub = 2.0 root = str(_PROJECT_ROOT) body = json.dumps(flight, ensure_ascii=False) fd, tmp_path = tempfile.mkstemp(prefix="flight_intent_", suffix=".json", text=True) try: with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(body) except OSError: try: os.close(fd) except OSError: pass try: os.unlink(tmp_path) except OSError: pass logger.warning("[飞控-ROS桥] 无法写入临时 JSON") return # 须追加 PYTHONPATH:若写成 PYTHONPATH=仅工程根,会覆盖 ROS setup 注入的 /opt/ros/.../dist-packages,导致找不到 rospy。 cmd = ( f"{setup} && cd {shlex.quote(root)} && " f"export PYTHONPATH={shlex.quote(root)}:$PYTHONPATH && " "python3 -m voice_drone.tools.publish_flight_intent_ros_once " f"--topic {shlex.quote(topic)} --wait-subscribers {wait_sub} " f"{shlex.quote(tmp_path)}" ) try: r = subprocess.run( ["bash", "-lc", cmd], capture_output=True, text=True, timeout=60, ) except subprocess.TimeoutExpired: logger.warning("[飞控-ROS桥] 子进程超时(>60s)") return except OSError as e: logger.warning("[飞控-ROS桥] 无法启动 bash: %s", e) return finally: try: os.unlink(tmp_path) except OSError: pass if r.returncode != 0: logger.warning( "[飞控-ROS桥] 发布失败 code=%s stderr=%s", r.returncode, (r.stderr or "").strip()[:800], ) else: logger.info("[飞控-ROS桥] 已发布至 %s", topic) def _run_cloud_flight_intent_sequence(self, flight: dict) -> None: """ 在后台线程中顺序执行云端 flight_intent(校验 v1 + takeoff 走 offboard + 其余 Socket)。 含 takeoff 时:先跑完 offboard 流程,再继续 hover/wait/land 等(修复此前仅触发起飞、后续动作丢失)。 """ parsed, errors = parse_flight_intent_dict(flight) if errors: logger.warning("[飞控] flight_intent 校验失败: %s", errors) return tid = (parsed.trace_id or "").strip() or "-" logger.info("[飞控] 开始执行序列 trace_id=%s steps=%d", tid, len(parsed.actions)) for step, action in enumerate(parsed.actions): if isinstance(action, ActionTakeoff): alt = action.args.relative_altitude_m if alt is not None: logger.info( "[飞控] takeoff 请求相对高度 %.2fm(当前 offboard 脚本是否使用该参数请自行扩展)", alt, ) self._run_takeoff_offboard_and_wavs() elif isinstance(action, ActionLand): cmd = Command.create("land", self._get_next_sequence_id()) self._send_socket_command(cmd) elif isinstance(action, ActionReturnHome): cmd = Command.create("return_home", self._get_next_sequence_id()) self._send_socket_command(cmd) elif isinstance(action, (ActionHover, ActionHold)): cmd = Command.create("hover", self._get_next_sequence_id()) self._send_socket_command(cmd) elif isinstance(action, ActionGoto): cmd, err = goto_action_to_command(action, self._get_next_sequence_id()) if err: logger.warning("[飞控] step %d goto: %s", step, err) continue if cmd is not None: self._send_socket_command(cmd) elif isinstance(action, ActionWait): sec = float(action.args.seconds) logger.info("[飞控] step %d wait %.2fs", step, sec) time.sleep(sec) else: logger.warning("[飞控] step %d 未处理的动作类型: %r", step, action) def _cancel_flight_confirm_timer(self) -> None: with self._flight_confirm_timer_lock: t = self._flight_confirm_timer self._flight_confirm_timer = None if t is not None: try: t.cancel() except Exception: # noqa: BLE001 pass def _begin_flight_confirm_listen(self) -> None: """云端 TTS 播完后进入口头确认窗(cloud_voice_dialog_v1)。""" self._cancel_prompt_listen_timer() with self._flight_confirm_timer_lock: if self._pending_flight_confirm is None: logger.warning("[飞控] 无待确认意图,跳过确认窗") self._finish_wake_cycle() return cd = self._pending_flight_confirm["confirm"] timeout_sec = float(cd["timeout_sec"]) phrases_repr = (cd["confirm_phrases"], cd["cancel_phrases"]) self._cancel_flight_confirm_timer() with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.FLIGHT_CONFIRM_LISTEN) print( f"[飞控] 请口头确认 {phrases_repr[0]!r} 或取消 {phrases_repr[1]!r}," f"超时 {timeout_sec:.0f}s。", flush=True, ) def _fire() -> None: try: self._on_flight_confirm_timeout() except Exception as e: # noqa: BLE001 logger.error("确认窗超时处理异常: %s", e, exc_info=True) with self._flight_confirm_timer_lock: self._flight_confirm_timer = threading.Timer(timeout_sec, _fire) self._flight_confirm_timer.daemon = True self._flight_confirm_timer.start() def _on_flight_confirm_timeout(self) -> None: with self._flight_confirm_timer_lock: if self._pending_flight_confirm is None: return self._pending_flight_confirm = None self._flight_confirm_timer = None logger.info("[飞控] 确认窗超时") self._enqueue_llm_speak(MSG_CONFIRM_TIMEOUT) self._pending_finish_wake_cycle_after_tts = True def _handle_flight_confirm_text(self, raw: str) -> None: utter = (raw or "").strip() if not utter: return norm = normalize_phrase_text(utter) print(f"[飞控-确认窗] {utter!r}", flush=True) action: str = "noop" fi_ok: dict | None = None t: threading.Timer | None = None with self._flight_confirm_timer_lock: pend = self._pending_flight_confirm if pend is None: return cd = pend["confirm"] cancel_hit = match_phrase_list(norm, cd["cancel_phrases"]) confirm_hit = match_phrase_list(norm, cd["confirm_phrases"]) if cancel_hit: action = "cancel" self._pending_flight_confirm = None t = self._flight_confirm_timer self._flight_confirm_timer = None elif confirm_hit: action = "confirm" fi_ok = pend["flight"] self._pending_flight_confirm = None t = self._flight_confirm_timer self._flight_confirm_timer = None else: logger.info("[飞控] 确认窗未命中短语,忽略: %s", utter[:80]) return if t is not None: try: t.cancel() except Exception: # noqa: BLE001 pass if action == "cancel": logger.info("[飞控] 用户取消待执行意图") self._enqueue_llm_speak(MSG_CANCELLED) self._pending_finish_wake_cycle_after_tts = True return if action == "confirm" and fi_ok is not None: logger.info("[飞控] 用户已确认,开始执行 flight_intent") self._start_cloud_flight_execution(fi_ok) self._enqueue_llm_speak(MSG_CONFIRM_EXECUTING) self._pending_finish_wake_cycle_after_tts = True def _start_cloud_flight_execution(self, fi: dict) -> None: """ROCKET_CLOUD_EXECUTE_FLIGHT 已通过校验后,起线程执行。""" if os.environ.get("ROCKET_CLOUD_EXECUTE_FLIGHT", "").lower() not in ( "1", "true", "yes", ): return if os.environ.get("ROCKET_FLIGHT_INTENT_ROS_BRIDGE", "").lower() in ( "1", "true", "yes", ): threading.Thread( target=self._publish_flight_intent_to_ros_bridge, args=(fi,), daemon=True, ).start() else: threading.Thread( target=self._run_cloud_flight_intent_sequence, args=(fi,), daemon=True, ).start() def _handle_llm_turn( self, user_msg: str, *, finish_wake_after_tts: bool = False ) -> None: if self._cloud_voice_enabled and self._cloud_client is not None: self._handle_llm_turn_cloud(user_msg, finish_wake_after_tts=finish_wake_after_tts) return self._handle_llm_turn_local(user_msg, finish_wake_after_tts=finish_wake_after_tts) def _apply_cloud_dialog_result( self, result: dict, *, finish_wake_after_tts: bool, ) -> None: proto = result.get("protocol") routing = result.get("routing") fi = result.get("flight_intent") confirm_raw = result.get("confirm") scheduled_flight_confirm = False if routing == "flight_intent" and isinstance(fi, dict) and fi.get("is_flight_intent"): summary = str(fi.get("summary") or "好的。").strip() actions = fi.get("actions") or [] print(f"[LLM] 判定=飞控意图(云端) summary={summary!r}", flush=True) print(f"[LLM] actions={actions!r}", flush=True) if proto != CLOUD_VOICE_DIALOG_V1: logger.error( "[云端] flight_intent 须 protocol=%r,收到 %r;按 v1 拒执行飞控", CLOUD_VOICE_DIALOG_V1, proto, ) cd = parse_confirm_dict(confirm_raw) if cd is None: logger.error("[云端] flight_intent 须带合法 confirm 对象(v1),拒执行飞控") exec_enabled = os.environ.get("ROCKET_CLOUD_EXECUTE_FLIGHT", "").lower() in ( "1", "true", "yes", ) if ( exec_enabled and proto == CLOUD_VOICE_DIALOG_V1 and cd is not None ): if cd["required"]: scheduled_flight_confirm = True with self._flight_confirm_timer_lock: self._pending_flight_confirm = {"flight": fi, "confirm": cd} self._pending_flight_confirm_after_tts = True logger.info( "[云端] flight_intent 待口头确认(pending_id=%s);" "播完 TTS 后听确认/超时", cd.get("pending_id"), ) else: logger.info( "[云端] flight_intent confirm.required=false,将直接执行(若已开执行开关)" ) self._start_cloud_flight_execution(fi) elif exec_enabled and ( proto != CLOUD_VOICE_DIALOG_V1 or cd is None ): logger.warning( "[云端] 协议或 confirm 不完整,本轮不执行飞控(仍播 TTS)" ) else: logger.info( "[云端] flight_intent 已下发(未设 ROCKET_CLOUD_EXECUTE_FLIGHT,仅播报)" ) elif routing == "chitchat": if proto != CLOUD_VOICE_DIALOG_V1: logger.warning( "[云端] chitchat 期望 protocol=%r,实际=%r", CLOUD_VOICE_DIALOG_V1, proto, ) cr = (result.get("chat_reply") or "").strip() print(f"[LLM] 判定=闲聊(云端) reply={cr[:200]!r}", flush=True) else: logger.warning("未知 routing: %s", routing) pcm = result.get("pcm") sr = int(result.get("sample_rate_hz") or 24000) if pcm is not None and np.asarray(pcm).size > 0: self._enqueue_cloud_pcm_playback(np.asarray(pcm, dtype=np.int16), sr) elif self._cloud_fallback_local: if routing == "flight_intent" and isinstance(fi, dict): fallback_txt = str(fi.get("summary") or "好的。").strip() else: fallback_txt = (result.get("chat_reply") or "好的。").strip() if fallback_txt: self._enqueue_llm_speak(fallback_txt) else: self._enqueue_llm_speak("未收到云端语音。") if routing == "chitchat": self._pending_chitchat_reprompt_after_tts = True elif scheduled_flight_confirm: pass elif finish_wake_after_tts and not scheduled_flight_confirm: self._pending_finish_wake_cycle_after_tts = True elif routing == "flight_intent" and not scheduled_flight_confirm: self._pending_finish_wake_cycle_after_tts = True elif routing not in ("chitchat", "flight_intent"): self._pending_finish_wake_cycle_after_tts = True def _handle_llm_turn_cloud( self, user_msg: str, *, finish_wake_after_tts: bool = False ) -> None: from voice_drone.core.cloud_voice_client import CloudVoiceError assert self._cloud_client is not None t0 = time.monotonic() try: result = self._cloud_client.run_turn(user_msg) except CloudVoiceError as e: print(f"[云端] 失败: {e} (code={e.code!r})", flush=True) logger.error("云端对话失败: %s", e, exc_info=True) self._recover_from_cloud_failure( user_msg, finish_wake_after_tts=finish_wake_after_tts, idle_speak="云端服务不可用,请稍后再试。", ) return except Exception as e: # noqa: BLE001 print(f"[云端] 异常: {e}", flush=True) logger.error("云端对话异常: %s", e, exc_info=True) self._recover_from_cloud_failure( user_msg, finish_wake_after_tts=finish_wake_after_tts, idle_speak="网络异常,请稍后再试。", ) return dt = time.monotonic() - t0 metrics = result.get("metrics") or {} print( f"[计时] 云端一轮(turn.text) {dt:.3f}s " f"(llm_ms={metrics.get('llm_ms')!r}, " f"tts_first_byte_ms={metrics.get('tts_first_byte_ms')!r})", flush=True, ) self._apply_cloud_dialog_result(result, finish_wake_after_tts=finish_wake_after_tts) def _handle_llm_turn_cloud_pcm( self, pcm_i16: np.ndarray, sample_rate_hz: int, *, finish_wake_after_tts: bool = False, ) -> None: from voice_drone.core.cloud_voice_client import CloudVoiceError assert self._cloud_client is not None t0 = time.monotonic() try: result = self._cloud_client.run_turn_audio(pcm_i16, int(sample_rate_hz)) except CloudVoiceError as e: print(f"[云端] turn.audio 失败: {e} (code={e.code!r})", flush=True) logger.error("云端 turn.audio 失败: %s", e, exc_info=True) self._recover_from_cloud_failure( "", finish_wake_after_tts=True, idle_speak="云端语音识别失败,请稍后再试。", ) return except Exception as e: # noqa: BLE001 print(f"[云端] turn.audio 异常: {e}", flush=True) logger.error("云端 turn.audio 异常: %s", e, exc_info=True) self._recover_from_cloud_failure( "", finish_wake_after_tts=True, idle_speak="网络异常,请稍后再试。", ) return dt = time.monotonic() - t0 metrics = result.get("metrics") or {} print( f"[计时] 云端一轮(turn.audio) {dt:.3f}s " f"(llm_ms={metrics.get('llm_ms')!r}, " f"tts_first_byte_ms={metrics.get('tts_first_byte_ms')!r})", flush=True, ) self._apply_cloud_dialog_result(result, finish_wake_after_tts=finish_wake_after_tts) def _handle_llm_turn_local( self, user_msg: str, *, finish_wake_after_tts: bool = False ) -> None: llm = self._ensure_llm() if llm is None: self._enqueue_llm_speak( "大模型未就绪。请确认已下载 GGUF,或设置环境变量 ROCKET_LLM_GGUF 指向模型文件。" ) if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return with self._chat_session_lock: self._llm_messages = [ {"role": "system", "content": FLIGHT_INTENT_CHAT_SYSTEM}, {"role": "user", "content": user_msg}, ] messages_snapshot = list(self._llm_messages) if not self._llm_stream_enabled: t_llm0 = time.monotonic() try: out = llm.create_chat_completion( messages=messages_snapshot, max_tokens=self._llm_max_tokens, ) except Exception as e: # noqa: BLE001 dt_llm = time.monotonic() - t_llm0 print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True) logger.error("LLM 推理失败: %s", e, exc_info=True) with self._chat_session_lock: if self._llm_messages and self._llm_messages[-1].get("role") == "user": self._llm_messages.pop() self._enqueue_llm_speak("推理出错,请稍后再说。") if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return dt_llm = time.monotonic() - t_llm0 print(f"[计时] LLM 推理 {dt_llm:.3f}s", flush=True) reply = ( (out.get("choices") or [{}])[0].get("message") or {} ).get("content", "").strip() self._finalize_llm_turn( reply, finish_wake_after_tts, streamed_chat=False ) return t_llm0 = time.monotonic() try: stream = llm.create_chat_completion( messages=messages_snapshot, max_tokens=self._llm_max_tokens, stream=True, ) except Exception as e: # noqa: BLE001 dt_llm = time.monotonic() - t_llm0 print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True) logger.error("LLM 推理失败: %s", e, exc_info=True) with self._chat_session_lock: if self._llm_messages and self._llm_messages[-1].get("role") == "user": self._llm_messages.pop() self._enqueue_llm_speak("推理出错,请稍后再说。") if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return full_reply = "" pending = "" tts_budget = self._llm_tts_max_chars route: str | None = None try: for chunk in stream: content = self._chunk_delta_text(chunk) if not content: continue full_reply += content if route is None: lead = full_reply.lstrip() if lead: route = "json" if lead[0] == "{" else "chat" if route != "chat" or tts_budget <= 0: continue pending += content while tts_budget > 0 and pending: segs, pending = take_completed_sentences(pending) if segs: for seg in segs: tts_budget = self._enqueue_segment_capped(seg, tts_budget) if tts_budget <= 0: break continue forced, pending = force_soft_split( pending, self._stream_tts_chunk_chars ) if not forced: break for seg in forced: tts_budget = self._enqueue_segment_capped(seg, tts_budget) if tts_budget <= 0: break except Exception as e: # noqa: BLE001 dt_llm = time.monotonic() - t_llm0 print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True) logger.error("LLM 流式推理失败: %s", e, exc_info=True) with self._chat_session_lock: if self._llm_messages and self._llm_messages[-1].get("role") == "user": self._llm_messages.pop() self._enqueue_llm_speak("推理出错,请稍后再说。") if finish_wake_after_tts: self._pending_finish_wake_cycle_after_tts = True return dt_llm = time.monotonic() - t_llm0 print(f"[计时] LLM 推理 {dt_llm:.3f}s", flush=True) reply = full_reply.strip() if route == "chat" and tts_budget > 0: tail = pending.strip() if tail: self._enqueue_segment_capped(tail, tts_budget) self._finalize_llm_turn( reply, finish_wake_after_tts, streamed_chat=(route == "chat") ) def start(self) -> None: if self.running: logger.warning("识别器已在运营") return self.running = True self.stt_thread = threading.Thread(target=self._stt_worker_thread, daemon=True) self.stt_thread.start() self.command_thread = threading.Thread( target=self._takeoff_only_command_worker, daemon=True ) self.command_thread.start() # 先预加载再开麦:否则 PortAudio 回调会一直往 audio_queue 塞数据,而主线程还没进入 # process_audio_stream,默认仅 10 块的队列会迅速满并触发「音频队列已满,丢弃数据块」。 logger.info("voice_drone_assistant: 准备预加载模型(若启用)…") self._preload_llm_and_tts_if_enabled() try: self.audio_capture.start_stream() except BaseException: self.running = False try: self.stt_queue.put(None, timeout=0.5) except Exception: # noqa: BLE001 pass try: self.command_queue.put(None, timeout=0.5) except Exception: # noqa: BLE001 pass if self.stt_thread is not None: self.stt_thread.join(timeout=2.0) if self.command_thread is not None: self.command_thread.join(timeout=2.0) raise if self._cloud_voice_enabled: logger.info( "voice_drone_assistant: 已启动(对话走云端 WebSocket;TTS 为云端 PCM;飞控见 Socket/offboard)" ) else: logger.info( "voice_drone_assistant: 已启动(无试飞控 Socket;大模型答复走 Kokoro TTS)" ) ld = os.environ.get("LD_PRELOAD", "") sys_asound = "libasound.so" in ld and "/usr/" in ld if not sys_asound: print( "\n⚠ 建议用系统 ALSA 启动(conda 下否则常无声或 VAD 不触发):\n" " bash with_system_alsa.sh python main.py\n", flush=True, ) if self._llm_disabled and not self._cloud_voice_enabled: if self._local_keyword_takeoff_enabled: llm_hint = "已 ROCKET_LLM_DISABLE=1:除 keywords.yaml 中 takeoff 关键词外,其它指令仅打印,不调大模型。\n" else: llm_hint = ( "已 ROCKET_LLM_DISABLE=1 且未启用本地口令起飞(assistant.local_keyword_takeoff_enabled / " "ROCKET_LOCAL_KEYWORD_TAKEOFF):指令仅打印,不调大模型。\n" ) elif self._cloud_voice_enabled: if self._local_keyword_takeoff_enabled: llm_hint = "已启用云端对话:非 takeoff 关键词指令经 WebSocket 上云,播报为云端 TTS 流。\n" else: llm_hint = "已启用云端对话:指令经 WebSocket 上云,播报为云端 TTS 流(本地口令起飞已关闭)。\n" else: llm_hint = ( "说「无人机」唤醒后会先播报问候,再听您说一句(不必再带唤醒词);说完后关麦推理,答句播完后再说「" f"{self.wake_word_detector.primary}」开始下一轮。非起飞指令走大模型(" "飞控相关→JSON,否则闲聊)。\n" ) if self._local_keyword_takeoff_enabled: takeoff_banner = ( "\n本地口令起飞已开启:说「无人机」+ keywords.yaml 里 takeoff 词(如「起飞演示」)→ 播提示音、" "启动 scripts/run_px4_offboard_one_terminal.sh(串口真机)、再播返航提示并结束脚本。\n" ) else: takeoff_banner = ( "\n本地口令起飞已关闭(飞控请用云端 flight_intent / ROS 桥等);" "若需恢复 keywords.yaml takeoff → offboard,设 assistant.local_keyword_takeoff_enabled: true 或 " "ROCKET_LOCAL_KEYWORD_TAKEOFF=1。\n" ) print( f"{takeoff_banner}" f"{llm_hint}" "标记说明:[VAD] 已截段送 STT;[STT] 识别文字;[唤醒] 是否含唤醒词;[LLM] 对话与播报。\n" "录音已在启动时选好;扬声器可设 ROCKET_TTS_DEVICE。建议:bash with_system_alsa.sh python …\n" "Ctrl+C 退出。\n", flush=True, ) def _play_wav_serialized(self, path: Path) -> None: if not path.is_file(): logger.warning("WAV 文件不存在,跳过播放: %s", path) return with self._audio_play_lock: try: _play_wav_blocking(path) except Exception as e: # noqa: BLE001 logger.warning("播放 WAV 失败 %s: %s", path, e, exc_info=True) def _run_takeoff_offboard_and_wavs(self) -> None: """独立线程:起 offboard 脚本;播第一段;第一段结束后等 10s;再播第二段;第二段结束后杀掉脚本进程组。""" if not _OFFBOARD_SCRIPT.is_file(): logger.error("未找到 offboard 脚本: %s", _OFFBOARD_SCRIPT) return acquired = self._takeoff_side_task_busy.acquire(blocking=False) if not acquired: logger.warning("起飞联动已在执行,忽略重复触发") return proc: subprocess.Popen | None = None try: log_path = Path( os.environ.get("ROCKET_OFFBOARD_LOG", "/tmp/rocket_drone_offboard_script.log") ).expanduser() log_f = open(log_path, "ab", buffering=0) try: proc = subprocess.Popen( [ "bash", str(_OFFBOARD_SCRIPT), "/dev/ttyACM0", "921600", "20", ], cwd=str(_PROJECT_ROOT), stdout=log_f, stderr=subprocess.STDOUT, start_new_session=True, ) except Exception as e: # noqa: BLE001 logger.error("启动 run_px4_offboard_one_terminal.sh 失败: %s", e, exc_info=True) return finally: log_f.close() with self._offboard_proc_lock: self._active_offboard_proc = proc time.sleep(0.5) early_rc = proc.poll() if early_rc is not None: logger.error( "offboard 一键脚本已立即结束 (exit=%s),未持续运行。日志: %s (常见原因:找不到 " "px4_ctrl_offboard_demo.py、ROS 环境、或串口未连)", early_rc, log_path, ) logger.info( "已启动 offboard 一键脚本 (pid=%s),并播放起飞提示音;脚本输出见 %s", proc.pid, log_path, ) self._play_wav_serialized(_TAKEOFF_ACK_WAV) time.sleep(10.0) self._play_wav_serialized(_TAKEOFF_DONE_WAV) finally: if proc is not None: logger.info("第二段 WAV 已播完,终止 offboard 脚本进程组 (pid=%s)", proc.pid) _terminate_process_group(proc) with self._offboard_proc_lock: if self._active_offboard_proc is proc: self._active_offboard_proc = None self._takeoff_side_task_busy.release() def _takeoff_only_command_worker(self) -> None: """唤醒;同句带指令则直转 LLM/起飞;否则问候+滴声→再问一句→关麦播报。""" logger.info("唤醒流程命令线程已启动") while self.running: try: text = self.command_queue.get(timeout=0.1) except queue.Empty: continue except Exception as e: # noqa: BLE001 logger.error(f"命令处理线程错误: {e}", exc_info=True) continue try: if text is None: break try: if ( isinstance(text, tuple) and len(text) == 3 and text[0] == _PCM_TURN_MARKER ): self._handle_pcm_uplink_turn(text[1], int(text[2])) continue with self._wake_flow_lock: phase = self._wake_phase if phase == int(_WakeFlowPhase.LLM_BUSY): continue if phase == int(_WakeFlowPhase.GREETING_WAIT): continue if phase == int(_WakeFlowPhase.FLIGHT_CONFIRM_LISTEN): self._handle_flight_confirm_text(text) continue if phase == int(_WakeFlowPhase.ONE_SHOT_LISTEN): with self._wake_flow_lock: self._wake_phase = int(_WakeFlowPhase.LLM_BUSY) self._process_one_shot_command(text) continue is_wake, matched = self.wake_word_detector.detect(text) if not is_wake: logger.debug("未检测到唤醒词,忽略: %s", text) if os.environ.get("ROCKET_PRINT_STT", "").lower() in ( "1", "true", "yes", ): print( f"[唤醒] 未命中「{self.wake_word_detector.primary}」,原文: {text!r}", flush=True, ) continue logger.info("唤醒词命中: %s", matched) command_text = self.wake_word_detector.extract_command_text(text) follow = (command_text or "").strip() if follow: if not self._wake_fast_path_process_follow(follow): continue continue self._begin_wake_cycle(None) except Exception as e: # noqa: BLE001 logger.error("命令处理失败: %s", e, exc_info=True) finally: self.command_queue.task_done() logger.info("唤醒流程命令线程已停止") def stop(self) -> None: """停止识别;不重连 Socket(从未连接)。""" if not self.running: return self.running = False self._cancel_prompt_listen_timer() self._cancel_flight_confirm_timer() with self._flight_confirm_timer_lock: self._pending_flight_confirm = None self._pending_flight_confirm_after_tts = False if self.stt_thread is not None: self.stt_queue.put(None) if self.command_thread is not None: self.command_queue.put(None) if self.stt_thread is not None: self.stt_thread.join(timeout=2.0) if self.command_thread is not None: self.command_thread.join(timeout=2.0) # 不在此线程做 speak_text:会阻塞数秒至数十秒,用户多次 Ctrl+C 仍杀不掉进程 self._discard_llm_playback_queue() with self._offboard_proc_lock: op = self._active_offboard_proc self._active_offboard_proc = None if op is not None and op.poll() is None: logger.info("主程序退出:终止仍在运行的 offboard 脚本") _terminate_process_group(op) try: self.audio_capture.stop_stream() except KeyboardInterrupt: logger.info("关闭麦克风流时中断,跳过") except Exception as e: # noqa: BLE001 logger.warning("关闭麦克风流失败: %s", e) if self._cloud_client is not None: try: self._cloud_client.close() except Exception as e: # noqa: BLE001 logger.debug("关闭云端 WebSocket: %s", e) if self.socket_client.connected: self.socket_client.disconnect() logger.info("voice_drone_assistant 已停止") print("\n已退出。", flush=True) def main() -> None: ap = argparse.ArgumentParser( description="无人机语音:唤醒 → 问候 → 一句指令 → 起飞或 LLM 播报 → 再唤醒" ) ap.add_argument( "--input-index", "-I", type=int, default=None, help="跳过交互菜单,直接指定 PyAudio 录音设备索引(与启动时「PyAudio_index=」一致)。", ) ap.add_argument( "--non-interactive", action="store_true", help="不选设备:用 system.yaml 的 audio.input_device_index(为 null 时自动枚举默认可录音设备)。", ) ap.add_argument( "--no-preload", action="store_true", help="不预加载 Qwen/Kokoro,缩短启动时间(首轮对话与首次播报会变慢)。", ) args = ap.parse_args() non_inter = args.non_interactive or os.environ.get( "ROCKET_NON_INTERACTIVE", "" ).lower() in ("1", "true", "yes") idx = args.input_index if idx is None: raw_ix = os.environ.get("ROCKET_INPUT_DEVICE_INDEX", "").strip() if raw_ix.isdigit() or (raw_ix.startswith("-") and raw_ix[1:].isdigit()): idx = int(raw_ix) if idx is not None: from voice_drone.core.mic_device_select import apply_input_device_index_only apply_input_device_index_only(idx) logger.info("录音设备: PyAudio 索引 %s(CLI/环境变量)", idx) elif not non_inter: from voice_drone.core.mic_device_select import ( apply_input_device_index_only, prompt_for_input_device_index, ) chosen = prompt_for_input_device_index() apply_input_device_index_only(chosen) else: logger.info( "非交互模式:使用 system.yaml 的 audio.input_device_index(null=自动探测)" ) app = TakeoffPrintRecognizer(skip_model_preload=args.no_preload) try: app.run() except KeyboardInterrupt: logger.info("用户中断") finally: if app.running: app.stop() if __name__ == "__main__": main()