2272 lines
96 KiB
Python
2272 lines
96 KiB
Python
# 实时检测语音:用「无人机」唤醒 → TTS「你好,我在呢」→ 收音一句指令(关麦)→ 大模型 Kokoro 播报答句 → 再仅听唤醒词。
|
||
# 可选:assistant.local_keyword_takeoff_enabled 或 ROCKET_LOCAL_KEYWORD_TAKEOFF=1 时,「无人机 + keywords.yaml 里 takeoff 词」走本地 offboard + WAV(默认关闭)。
|
||
# 其它指令走云端/本地 LLM → flight_intent 等(设 ROCKET_CLOUD_EXECUTE_FLIGHT=1 才执行机端序列)。
|
||
# 环境变量:ROCKET_LLM_GGUF、ROCKET_LLM_MAX_TOKENS(默认 256)、ROCKET_LLM_CTX(默认 4096,可试 2048 省显存/略提速)、
|
||
# ROCKET_LLM_N_THREADS(llama.cpp 线程数,如 RK3588 可试 6~8)、ROCKET_LLM_N_GPU_LAYERS(有 CUDA/Vulkan 时>0)、ROCKET_LLM_N_BATCH、
|
||
# ROCKET_TTS_ORT_INTRA_OP_THREADS / ROCKET_TTS_ORT_INTER_OP_THREADS(Kokoro ONNXRuntime 线程),
|
||
# ROCKET_CHAT_IDLE_SEC(历史占位,每轮重置上下文)、ROCKET_TTS_DEVICE(同 qwen15b_chat --tts-device)、
|
||
# ROCKET_INPUT_HW=2,0 对应 arecord -l 的 card,device;ROCKET_INPUT_DEVICE_INDEX、ROCKET_INPUT_DEVICE_NAME;
|
||
# 录音:默认交互列出 arecord -l + PyAudio 并选择;--input-index / ROCKET_INPUT_DEVICE_INDEX 跳过交互;--non-interactive 用 yaml 的 input_device_index(可为 null 自动探测)。
|
||
# ROCKET_LLM_DISABLE=1 关闭对话。
|
||
# ROCKET_LLM_STREAM=0 关闭流式输出(整段推理后再单次 TTS,便于对照调试)。
|
||
# ROCKET_STREAM_TTS_CHUNK_CHARS 流式闲聊时、无句末标点则按此长度强制切段(默认 64,过小会听感碎)。
|
||
# 云端语音(见 voice_drone_assistant/clientguide.md):ROCKET_CLOUD_VOICE=1 或 cloud_voice.enabled;
|
||
# ROCKET_CLOUD_WS_URL、ROCKET_CLOUD_AUTH_TOKEN、ROCKET_CLOUD_DEVICE_ID;ROCKET_CLOUD_FALLBACK_LOCAL=0 禁用本地回退。
|
||
# 云端会话固定 pcm_asr_uplink(VAD 截句→turn.audio.*→Fun-ASR);同句快路径仍可用 turn.text。
|
||
# 闲聊「无语音」超时:listen_silence_timeout_sec(默认 5):滴声后仅当 RMS<energy_vad_rms_low 且未在说话时累计,满则播 MSG;非墙上固定 5s。
|
||
# 提示音长 segment_cue_duration_ms / ROCKET_CLOUD_SEGMENT_CUE_MS。
|
||
# 本地字符串走服务端 TTS:cloud_voice.remote_tts_for_local(默认 true)或 ROCKET_CLOUD_REMOTE_TTS=1;设 0 则用 Kokoro。
|
||
# PX4 语境 YAML:cloud_voice.px4_context_file 或 ROCKET_CLOUD_PX4_CONTEXT_FILE,合并进 session.start.client。
|
||
# STT 串行:ROCKET_STT_QUEUE_MAX(默认 1)限制 VAD 排队段数;上一段仍在识别时新段可能因队列满被丢弃,见日志。
|
||
# 问候中仍送 STT:ROCKET_VAD_STT_DURING_GREETING=1(默认不向 STT 排队,减少 TTS 问候期间的无效识别与积压)。
|
||
# 识别到唤醒词后立刻短鸣「滴」:ROCKET_WAKE_ACK_BEEP=0 关闭;ROCKET_WAKE_ACK_BEEP_SEC/HZ/GAIN 未设时参照 ROCKET_WAKE_BEEP_*(时长默认同 0.72 倍略短)。
|
||
# 唤醒问候播完后短鸣「滴」:ROCKET_WAKE_PROMPT_BEEP=0 关闭;ROCKET_WAKE_BEEP_SEC / ROCKET_WAKE_BEEP_HZ / ROCKET_WAKE_BEEP_GAIN 可调。
|
||
# 播完 TTS 后恢复 PyAudio 麦流:ROCKET_MIC_RESTART_SETTLE_MS(默认 150)给 ES8388 等编解码器一点时间稳定,仍为静音可试 yaml recognizer.ack_pause_mic_for_playback=false。
|
||
# 云端飞控 JSON:默认仅日志 + 播放服务端 TTS(见 docs/llmcon.md);要执行机端飞控设 ROCKET_CLOUD_EXECUTE_FLIGHT=1。
|
||
# 方案一(云 → 本程序 → ROS 伴飞桥):同时设 ROCKET_FLIGHT_INTENT_ROS_BRIDGE=1,仅向 ROS 话题发布 JSON(不走 Socket/offboard)。
|
||
# ROCKET_FLIGHT_BRIDGE_TOPIC(默认 /input)、ROCKET_FLIGHT_BRIDGE_SETUP(默认 source /opt/ros/noetic/setup.bash)、
|
||
# ROCKET_FLIGHT_BRIDGE_WAIT_SUB(默认 2,秒,等待订阅者;0 则尽量即发)。
|
||
# 本地口令起飞(keywords.yaml takeoff → offboard):默认关(system.yaml assistant.local_keyword_takeoff_enabled);ROCKET_LOCAL_KEYWORD_TAKEOFF=1/true/yes 优先开启。
|
||
# 唤醒问候「你好,我在呢」:优先播 WAV(ROCKET_WAKE_GREETING_WAV 或 assets/tts_cache/wake_greeting.wav)。
|
||
# 若无文件则在预加载/首次播报时自动生成(须 scipy 写盘);亦可手动 python scripts/generate_wake_greeting_wav.py。
|
||
# 默认启动后预加载 Qwen GGUF + Kokoro ONNX;设 ROCKET_SKIP_MODEL_PRELOAD=1 或传 --no-preload 可改为首次对话时再加载。
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import enum
|
||
import json
|
||
import os
|
||
import queue
|
||
import shlex
|
||
import signal
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
import threading
|
||
import time
|
||
import wave
|
||
from pathlib import Path
|
||
|
||
import numpy as np
|
||
|
||
_PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||
if str(_PROJECT_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(_PROJECT_ROOT))
|
||
try:
|
||
os.chdir(_PROJECT_ROOT)
|
||
except OSError:
|
||
pass
|
||
|
||
from voice_drone.core.portaudio_env import fix_ld_path_for_portaudio
|
||
|
||
fix_ld_path_for_portaudio()
|
||
|
||
# 控制台必显:识别结果 / 语音段提交 / 未命中唤醒(不依赖 logging 级别)
|
||
# 板载麦上 Silero 常截不到段:默认用能量(RMS)VAD;完整系统可用 ROCKET_ENERGY_VAD=0 + vad_backend: silero
|
||
os.environ.setdefault("ROCKET_ENERGY_VAD", "1")
|
||
os.environ.setdefault("ROCKET_PRINT_STT", "1")
|
||
os.environ.setdefault("ROCKET_PRINT_VAD", "1")
|
||
|
||
from voice_drone.core.command import Command
|
||
from voice_drone.core.cloud_dialog_v1 import (
|
||
CLOUD_VOICE_DIALOG_V1,
|
||
MSG_CANCELLED,
|
||
MSG_CONFIRM_EXECUTING,
|
||
MSG_CONFIRM_TIMEOUT,
|
||
MSG_PROMPT_LISTEN_TIMEOUT,
|
||
match_phrase_list,
|
||
normalize_phrase_text,
|
||
parse_confirm_dict,
|
||
)
|
||
from voice_drone.core.flight_intent import (
|
||
ActionGoto,
|
||
ActionHold,
|
||
ActionHover,
|
||
ActionLand,
|
||
ActionReturnHome,
|
||
ActionTakeoff,
|
||
ActionWait,
|
||
goto_action_to_command,
|
||
parse_flight_intent_dict,
|
||
)
|
||
from voice_drone.core.configuration import (
|
||
SYSTEM_ASSISTANT_CONFIG,
|
||
SYSTEM_CLOUD_VOICE_CONFIG,
|
||
SYSTEM_CLOUD_VOICE_PX4_CONTEXT,
|
||
)
|
||
from voice_drone.core.qwen_intent_chat import (
|
||
FLIGHT_INTENT_CHAT_SYSTEM,
|
||
default_qwen_gguf_path,
|
||
load_llama_qwen,
|
||
parse_flight_intent_reply,
|
||
)
|
||
from voice_drone.core.recognizer import VoiceCommandRecognizer
|
||
from voice_drone.core.streaming_llm_tts import force_soft_split, take_completed_sentences
|
||
from voice_drone.logging_ import get_logger
|
||
|
||
logger = get_logger("voice_drone_assistant")
|
||
|
||
_CLOUD_PCM_TAG = "__cloud_pcm__"
|
||
# 唤醒词命中后立即播短音(入队,由主线程 _drain_llm_playback_queue 播放)
|
||
_WAKE_HIT_BEEP_TAG = "__wake_hit_beep__"
|
||
# 云端收音:VAD 截句 → 断句提示音 → turn.audio.*;命令队列用元组标记
|
||
_PCM_TURN_MARKER = "__pcm_turn__"
|
||
_SEGMENT_END_CUE_TAG = "__segment_end_cue__"
|
||
_CHITCHAT_REPROMPT_BEEP_TAG = "__chitchat_reprompt_beep__"
|
||
|
||
_WAKE_GREETING = "你好,我在呢"
|
||
|
||
_TTS_CACHE_DIR = _PROJECT_ROOT / "assets" / "tts_cache"
|
||
_WAKE_GREETING_WAV = _TTS_CACHE_DIR / "wake_greeting.wav"
|
||
|
||
|
||
def _resolve_wake_greeting_wav() -> Path:
|
||
raw = os.environ.get("ROCKET_WAKE_GREETING_WAV", "").strip()
|
||
return Path(raw).expanduser() if raw else _WAKE_GREETING_WAV
|
||
|
||
|
||
_CORE_DIR = _PROJECT_ROOT / "voice_drone" / "core"
|
||
_TAKEOFF_ACK_WAV = _CORE_DIR / "好的收到,开始起飞.wav"
|
||
_TAKEOFF_DONE_WAV = _CORE_DIR / "任务执行完成,开始返航降落.wav"
|
||
_OFFBOARD_SCRIPT = _PROJECT_ROOT / "scripts" / "run_px4_offboard_one_terminal.sh"
|
||
|
||
|
||
def _play_wav_blocking(path: Path) -> None:
|
||
"""与 src/play_wav.py 相同:16-bit PCM 单文件 blocking 播放。"""
|
||
import pyaudio
|
||
|
||
with wave.open(str(path), "rb") as wf:
|
||
ch = wf.getnchannels()
|
||
sw = wf.getsampwidth()
|
||
sr = wf.getframerate()
|
||
nframes = wf.getnframes()
|
||
if sw != 2:
|
||
raise ValueError(f"仅支持 16-bit PCM: {path}")
|
||
pcm = wf.readframes(nframes)
|
||
|
||
p = pyaudio.PyAudio()
|
||
try:
|
||
fmt = p.get_format_from_width(sw)
|
||
chunk = 1024
|
||
stream = p.open(
|
||
format=fmt,
|
||
channels=ch,
|
||
rate=sr,
|
||
output=True,
|
||
frames_per_buffer=chunk,
|
||
)
|
||
stream.start_stream()
|
||
try:
|
||
step = chunk * sw * ch
|
||
for i in range(0, len(pcm), step):
|
||
stream.write(pcm[i : i + step])
|
||
finally:
|
||
stream.stop_stream()
|
||
stream.close()
|
||
finally:
|
||
p.terminate()
|
||
|
||
|
||
def _synthesize_ready_beep(
|
||
sample_rate: int = 24000,
|
||
*,
|
||
duration_sec: float = 0.11,
|
||
frequency_hz: float = 988.0,
|
||
amplitude: float = 0.22,
|
||
) -> np.ndarray:
|
||
"""正弦短鸣 + 淡入淡出,作唤醒后「可以说话」提示。"""
|
||
n = max(8, int(sample_rate * duration_sec))
|
||
x = np.arange(n, dtype=np.float32)
|
||
w = np.sin(2.0 * np.pi * frequency_hz * x / float(sample_rate)).astype(np.float32)
|
||
fade = max(2, min(n // 3, int(0.006 * sample_rate)))
|
||
ramp = np.linspace(0.0, 1.0, fade, dtype=np.float32)
|
||
w[:fade] *= ramp
|
||
w[-fade:] *= ramp[::-1]
|
||
return np.clip(w * np.float32(amplitude), -1.0, 1.0)
|
||
|
||
|
||
def _terminate_process_group(proc: subprocess.Popen) -> None:
|
||
if proc.poll() is not None:
|
||
return
|
||
try:
|
||
os.killpg(proc.pid, signal.SIGTERM)
|
||
except ProcessLookupError:
|
||
return
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("SIGTERM offboard 进程组失败: %s", e)
|
||
try:
|
||
proc.wait(timeout=10)
|
||
except subprocess.TimeoutExpired:
|
||
try:
|
||
os.killpg(proc.pid, signal.SIGKILL)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("SIGKILL offboard 进程组失败: %s", e)
|
||
|
||
|
||
class _WakeFlowPhase(enum.IntEnum):
|
||
IDLE = 0
|
||
GREETING_WAIT = 1
|
||
ONE_SHOT_LISTEN = 2
|
||
LLM_BUSY = 3
|
||
FLIGHT_CONFIRM_LISTEN = 4
|
||
|
||
|
||
class TakeoffPrintRecognizer(VoiceCommandRecognizer):
|
||
"""待机(IDLE)仅识别含唤醒词的句子;唤醒后多轮对话在 ONE_SHOT_LISTEN 等阶段不要求句内唤醒词。
|
||
云端会话为 pcm_asr_uplink:滴声后整句 PCM 上云 Fun-ASR;结束一轮回到 IDLE 再要唤醒词。"""
|
||
|
||
def __init__(self, *, skip_model_preload: bool = False) -> None:
|
||
super().__init__(auto_connect_socket=False)
|
||
self.ack_tts_enabled = False
|
||
self._audio_play_lock = threading.Lock()
|
||
self._offboard_proc_lock = threading.Lock()
|
||
self._active_offboard_proc: subprocess.Popen | None = None
|
||
self._takeoff_side_task_busy = threading.Lock()
|
||
self._model_warm_lock = threading.Lock()
|
||
|
||
# 流式闲聊会按句/块多次入队,队列过小易丢段
|
||
self._llm_playback_queue: queue.Queue[str] = queue.Queue(maxsize=64)
|
||
self._chat_session_lock = threading.Lock()
|
||
self._chat_session_until: float = 0.0
|
||
self._llm_messages: list = []
|
||
self._llm = None
|
||
self._llm_tts_engine = None
|
||
self._llm_model_path = Path(
|
||
os.environ.get(
|
||
"ROCKET_LLM_GGUF",
|
||
str(default_qwen_gguf_path(_PROJECT_ROOT)),
|
||
)
|
||
)
|
||
self._chat_idle_sec = float(os.environ.get("ROCKET_CHAT_IDLE_SEC", "120"))
|
||
self._llm_max_tokens = int(os.environ.get("ROCKET_LLM_MAX_TOKENS", "256"))
|
||
self._llm_ctx = int(os.environ.get("ROCKET_LLM_CTX", "4096"))
|
||
self._llm_tts_max_chars = int(os.environ.get("ROCKET_LLM_TTS_MAX_CHARS", "800"))
|
||
self._llm_stream_enabled = os.environ.get(
|
||
"ROCKET_LLM_STREAM", "1"
|
||
).lower() not in ("0", "false", "no")
|
||
self._stream_tts_chunk_chars = max(
|
||
16,
|
||
int(os.environ.get("ROCKET_STREAM_TTS_CHUNK_CHARS", "64")),
|
||
)
|
||
self._llm_disabled = os.environ.get("ROCKET_LLM_DISABLE", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
)
|
||
_kw_raw = os.environ.get("ROCKET_LOCAL_KEYWORD_TAKEOFF", "").strip()
|
||
if _kw_raw:
|
||
self._local_keyword_takeoff_enabled = _kw_raw.lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
)
|
||
else:
|
||
_ac = (
|
||
SYSTEM_ASSISTANT_CONFIG
|
||
if isinstance(SYSTEM_ASSISTANT_CONFIG, dict)
|
||
else {}
|
||
)
|
||
self._local_keyword_takeoff_enabled = bool(
|
||
_ac.get("local_keyword_takeoff_enabled", False)
|
||
)
|
||
self._skip_model_preload = skip_model_preload or os.environ.get(
|
||
"ROCKET_SKIP_MODEL_PRELOAD", ""
|
||
).lower() in ("1", "true", "yes")
|
||
|
||
cv = SYSTEM_CLOUD_VOICE_CONFIG if isinstance(SYSTEM_CLOUD_VOICE_CONFIG, dict) else {}
|
||
env_cloud = os.environ.get("ROCKET_CLOUD_VOICE", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
)
|
||
self._cloud_voice_enabled = bool(env_cloud or cv.get("enabled"))
|
||
self._cloud_fallback_local = os.environ.get(
|
||
"ROCKET_CLOUD_FALLBACK_LOCAL", ""
|
||
).lower() not in ("0", "false", "no") and bool(
|
||
cv.get("fallback_to_local", True)
|
||
)
|
||
# 唤醒词仅在 IDLE 由命令线程强制;ONE_SHOT_LISTEN 整句直接上行或处理,不要求句内唤醒词。
|
||
try:
|
||
self._listen_silence_timeout_sec = max(
|
||
0.5,
|
||
float(
|
||
os.environ.get("ROCKET_PROMPT_LISTEN_TIMEOUT_SEC")
|
||
or cv.get("listen_silence_timeout_sec")
|
||
or 5.0
|
||
),
|
||
)
|
||
except ValueError:
|
||
self._listen_silence_timeout_sec = 5.0
|
||
try:
|
||
self._post_cue_mic_mute_ms = float(
|
||
os.environ.get("ROCKET_POST_CUE_MIC_MUTE_MS")
|
||
or cv.get("post_cue_mic_mute_ms")
|
||
or 200.0
|
||
)
|
||
except ValueError:
|
||
self._post_cue_mic_mute_ms = 200.0
|
||
self._post_cue_mic_mute_ms = max(0.0, min(2000.0, self._post_cue_mic_mute_ms))
|
||
try:
|
||
self._segment_cue_duration_ms = float(
|
||
os.environ.get("ROCKET_SEGMENT_CUE_DURATION_MS")
|
||
or cv.get("segment_cue_duration_ms")
|
||
or 120.0
|
||
)
|
||
except ValueError:
|
||
self._segment_cue_duration_ms = 120.0
|
||
self._segment_cue_duration_ms = max(20.0, min(500.0, self._segment_cue_duration_ms))
|
||
ws_url = (os.environ.get("ROCKET_CLOUD_WS_URL") or cv.get("server_url") or "").strip()
|
||
auth_tok = (
|
||
os.environ.get("ROCKET_CLOUD_AUTH_TOKEN") or cv.get("auth_token") or ""
|
||
).strip()
|
||
dev_id = (
|
||
os.environ.get("ROCKET_CLOUD_DEVICE_ID") or cv.get("device_id") or "drone-001"
|
||
).strip()
|
||
self._cloud_client = None
|
||
self._cloud_remote_tts_for_local = False
|
||
if self._cloud_voice_enabled:
|
||
if ws_url and auth_tok:
|
||
from voice_drone.core.cloud_voice_client import CloudVoiceClient
|
||
|
||
self._cloud_client = CloudVoiceClient(
|
||
server_url=ws_url,
|
||
auth_token=auth_tok,
|
||
device_id=dev_id,
|
||
recv_timeout=float(cv.get("timeout") or 120),
|
||
session_client_extensions=dict(SYSTEM_CLOUD_VOICE_PX4_CONTEXT)
|
||
if SYSTEM_CLOUD_VOICE_PX4_CONTEXT
|
||
else None,
|
||
)
|
||
_env_rt = os.environ.get("ROCKET_CLOUD_REMOTE_TTS", "").strip().lower()
|
||
if _env_rt in ("0", "false", "no"):
|
||
self._cloud_remote_tts_for_local = False
|
||
elif _env_rt in ("1", "true", "yes"):
|
||
self._cloud_remote_tts_for_local = True
|
||
else:
|
||
self._cloud_remote_tts_for_local = bool(
|
||
cv.get("remote_tts_for_local", True)
|
||
)
|
||
print(
|
||
f"[云端] 已启用 WebSocket 对话: {ws_url} device_id={dev_id}",
|
||
flush=True,
|
||
)
|
||
if self._cloud_remote_tts_for_local:
|
||
print(
|
||
"[云端] 本地文案播报将走 tts.synthesize(失败回退 Kokoro)。",
|
||
flush=True,
|
||
)
|
||
print(
|
||
f"[云端] Fun-ASR 上行 turn.audio.*;仅待机时说唤醒词;"
|
||
f"滴声后累计静默 {self._listen_silence_timeout_sec:.1f}s(低于 yaml energy_vad_rms_low 才计);"
|
||
f"断句提示 {self._segment_cue_duration_ms:.0f}ms、消抖 {self._post_cue_mic_mute_ms:.0f}ms。",
|
||
flush=True,
|
||
)
|
||
else:
|
||
logger.warning("cloud_voice 已启用但缺少 server_url/auth_token,将使用本地 LLM")
|
||
self._cloud_voice_enabled = False
|
||
|
||
self._wake_flow_lock = threading.Lock()
|
||
self._wake_phase: int = int(_WakeFlowPhase.IDLE)
|
||
self._greeting_done = threading.Event()
|
||
self._playback_batch_is_greeting = False
|
||
self._pending_finish_wake_cycle_after_tts = False
|
||
self._pending_flight_confirm_after_tts = False
|
||
self._pending_flight_confirm: dict | None = None
|
||
self._flight_confirm_timer: threading.Timer | None = None
|
||
self._flight_confirm_timer_lock = threading.Lock()
|
||
self._staged_one_shot_after_greeting: str | None = None
|
||
self._mic_op_queue: queue.Queue[str] = queue.Queue(maxsize=8)
|
||
|
||
# 默认仅 1 段在 STT 队列等待;可设 ROCKET_STT_QUEUE_MAX=2~8 允许少量排队
|
||
_raw_sq = os.environ.get("ROCKET_STT_QUEUE_MAX", "1").strip()
|
||
try:
|
||
_stn = max(1, min(16, int(_raw_sq)))
|
||
except ValueError:
|
||
_stn = 1
|
||
self.stt_queue = queue.Queue(maxsize=_stn)
|
||
|
||
# PROMPT_LISTEN:v1 §4 为「RMS 低于阈值持续累计」,不是滴声后固定墙上时钟 5s
|
||
self._prompt_listen_watch_armed: bool = False
|
||
self._prompt_silence_accum_sec: float = 0.0
|
||
self._segment_cue_done = threading.Event()
|
||
self._pending_chitchat_reprompt_after_tts = False
|
||
if self._cloud_client is not None:
|
||
self._vad_speech_start_hook = self._on_vad_speech_start_prompt_listen
|
||
self._after_processed_audio_chunk = self._tick_prompt_listen_silence_accum
|
||
|
||
def _cancel_prompt_listen_timer(self) -> None:
|
||
"""停止「滴声后静默监听」累计(飞控/结束唤醒/起 PCM 上行前等)。"""
|
||
self._prompt_listen_watch_armed = False
|
||
self._prompt_silence_accum_sec = 0.0
|
||
|
||
def _arm_prompt_listen_timeout(self) -> None:
|
||
"""滴声后进 PROMPT_LISTEN:仅在麦克持续低于 energy_vad_rms_low 时累加,超时再播 MSG。"""
|
||
if self._cloud_client is None:
|
||
return
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
return
|
||
self._prompt_silence_accum_sec = 0.0
|
||
self._prompt_listen_watch_armed = True
|
||
logger.debug(
|
||
"PROMPT_LISTEN: 已启用 RMS 累计静默 %.1fs(低于 rms_low 才计时;说话或 rms≥low 清零)",
|
||
self._listen_silence_timeout_sec,
|
||
)
|
||
|
||
def _on_prompt_listen_timeout(self) -> None:
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
return
|
||
self._prompt_listen_watch_armed = False
|
||
self._prompt_silence_accum_sec = 0.0
|
||
logger.info(
|
||
"[会话] 滴声后持续静默 ≥%.1fs(未截句),播超时提示并回待机",
|
||
self._listen_silence_timeout_sec,
|
||
)
|
||
self._enqueue_llm_speak(MSG_PROMPT_LISTEN_TIMEOUT)
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _tick_prompt_listen_silence_accum(self, processed_chunk: np.ndarray) -> None:
|
||
if not self._prompt_listen_watch_armed or self._cloud_client is None:
|
||
return
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
return
|
||
rms = self._int16_chunk_rms(processed_chunk)
|
||
dt = float(len(processed_chunk)) / float(self.audio_capture.sample_rate)
|
||
speaking = (
|
||
self._ev_speaking
|
||
if self._use_energy_vad
|
||
else self.vad.is_speaking
|
||
)
|
||
if speaking or rms >= self._energy_rms_low:
|
||
self._prompt_silence_accum_sec = 0.0
|
||
return
|
||
self._prompt_silence_accum_sec += dt
|
||
if self._prompt_silence_accum_sec >= self._listen_silence_timeout_sec:
|
||
try:
|
||
self._on_prompt_listen_timeout()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error("PROMPT_LISTEN 静默超时处理异常: %s", e, exc_info=True)
|
||
|
||
def _on_vad_speech_start_prompt_listen(self) -> None:
|
||
"""VAD 判「开始说话」时清零静默累计(v1 §4,与 RMS≥rms_low 并行)。"""
|
||
if self._cloud_client is None:
|
||
return
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
return
|
||
self._prompt_silence_accum_sec = 0.0
|
||
|
||
def _submit_concatenated_speech_to_stt(self) -> None:
|
||
"""在唤醒/一问一答流程中节流 VAD:避免问候或云端推理时继续向 STT 积压整句。"""
|
||
allow_greeting_stt = os.environ.get(
|
||
"ROCKET_VAD_STT_DURING_GREETING", ""
|
||
).lower() in ("1", "true", "yes")
|
||
with self._wake_flow_lock:
|
||
phase = self._wake_phase
|
||
if phase == int(_WakeFlowPhase.GREETING_WAIT) and not allow_greeting_stt:
|
||
with self.speech_buffer_lock:
|
||
self.speech_buffer.clear()
|
||
if os.environ.get("ROCKET_PRINT_VAD", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
print(
|
||
"[VAD] 问候播放中,本段不送 STT(说完问候后再说指令;"
|
||
"若需在问候同时识别请设 ROCKET_VAD_STT_DURING_GREETING=1",
|
||
flush=True,
|
||
)
|
||
return
|
||
if phase == int(_WakeFlowPhase.LLM_BUSY):
|
||
with self.speech_buffer_lock:
|
||
self.speech_buffer.clear()
|
||
if os.environ.get("ROCKET_PRINT_VAD", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
print(
|
||
"[VAD] 大模型/云端处理中,本段不送 STT(请等本轮播报结束后再说)",
|
||
flush=True,
|
||
)
|
||
return
|
||
if (
|
||
self._cloud_client is not None
|
||
and phase == int(_WakeFlowPhase.ONE_SHOT_LISTEN)
|
||
):
|
||
if len(self.speech_buffer) == 0:
|
||
return
|
||
speech_audio = np.concatenate(self.speech_buffer)
|
||
self.speech_buffer.clear()
|
||
min_samples = int(self.audio_capture.sample_rate * 0.5)
|
||
if len(speech_audio) >= min_samples:
|
||
try:
|
||
self.command_queue.put(
|
||
(
|
||
_PCM_TURN_MARKER,
|
||
speech_audio.copy(),
|
||
int(self.audio_capture.sample_rate),
|
||
),
|
||
block=False,
|
||
)
|
||
if os.environ.get("ROCKET_PRINT_VAD", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
print(
|
||
f"[VAD] turn.audio 已排队,{len(speech_audio)} 采样点"
|
||
f"(≈{len(speech_audio) / float(self.audio_capture.sample_rate):.2f}s)",
|
||
flush=True,
|
||
)
|
||
except queue.Full:
|
||
logger.warning("命令队列已满,跳过 PCM 上行")
|
||
elif os.environ.get("ROCKET_PRINT_VAD", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
print(
|
||
f"[VAD] 语音段太短已丢弃({len(speech_audio)} < {min_samples} 采样)",
|
||
flush=True,
|
||
)
|
||
return
|
||
super()._submit_concatenated_speech_to_stt()
|
||
|
||
def _llm_tts_output_device(self) -> str | int | None:
|
||
raw = os.environ.get("ROCKET_TTS_DEVICE", "").strip()
|
||
if raw.isdigit():
|
||
return int(raw)
|
||
if raw:
|
||
return raw
|
||
return None
|
||
|
||
def _before_audio_iteration(self) -> None:
|
||
self._drain_mic_ops()
|
||
super()._before_audio_iteration()
|
||
self._drain_llm_playback_queue()
|
||
|
||
def _drain_mic_ops(self) -> None:
|
||
"""主线程:执行命令线程请求的麦克风流 stop/start。"""
|
||
while True:
|
||
try:
|
||
op = self._mic_op_queue.get_nowait()
|
||
except queue.Empty:
|
||
break
|
||
try:
|
||
if op == "stop":
|
||
if self.audio_capture.stream is not None:
|
||
self.audio_capture.stop_stream()
|
||
elif op == "start" and self.running:
|
||
if self.audio_capture.stream is None:
|
||
self.audio_capture.start_stream()
|
||
self.vad.reset()
|
||
with self.speech_buffer_lock:
|
||
self.speech_buffer.clear()
|
||
self.pre_speech_buffer.clear()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("麦克风流控制失败 (%r): %s", op, e)
|
||
|
||
def _finish_wake_cycle(self) -> None:
|
||
self._cancel_prompt_listen_timer()
|
||
self._cancel_flight_confirm_timer()
|
||
with self._flight_confirm_timer_lock:
|
||
self._pending_flight_confirm = None
|
||
self._pending_flight_confirm_after_tts = False
|
||
self._pending_finish_wake_cycle_after_tts = False
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.IDLE)
|
||
self._reset_llm_history()
|
||
print("[唤醒] 本轮结束。请说「无人机」再次唤醒。", flush=True)
|
||
|
||
def _reset_llm_history(self) -> None:
|
||
with self._chat_session_lock:
|
||
self._llm_messages.clear()
|
||
self._chat_session_until = 0.0
|
||
|
||
def _flush_llm_playback_queue_silent(self) -> None:
|
||
"""丢弃 LLM 播报队列(无日志);新一轮唤醒前清空,避免与问候语或上一轮残段叠播。"""
|
||
while True:
|
||
try:
|
||
self._llm_playback_queue.get_nowait()
|
||
except queue.Empty:
|
||
break
|
||
|
||
def _prepare_wake_session_resources(self) -> None:
|
||
"""新一轮唤醒:清空对话状态、播报队列与待 STT 段(问候/快路径共用)。"""
|
||
self._reset_llm_history()
|
||
self._flush_llm_playback_queue_silent()
|
||
self.discard_pending_stt_segments()
|
||
|
||
def _recover_from_cloud_failure(
|
||
self,
|
||
user_msg: str,
|
||
*,
|
||
finish_wake_after_tts: bool,
|
||
idle_speak: str,
|
||
) -> None:
|
||
"""云端 run_turn 失败后:按需回退本地 LLM 或播一句占位。"""
|
||
if self._cloud_fallback_local:
|
||
print("[云端] 回退本地 LLM…", flush=True)
|
||
self._handle_llm_turn_local(user_msg, finish_wake_after_tts=finish_wake_after_tts)
|
||
return
|
||
self._enqueue_llm_speak(idle_speak)
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _begin_wake_cycle(self, staged_followup: str | None) -> None:
|
||
"""命中唤醒后:排队问候语,并在主线程播完后由 _after_greeting_pipeline 继续。"""
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.IDLE):
|
||
logger.info(
|
||
"唤醒忽略:当前非 IDLE(phase=%s),不重复排队问候",
|
||
_WakeFlowPhase(self._wake_phase).name,
|
||
)
|
||
return
|
||
self._wake_phase = int(_WakeFlowPhase.GREETING_WAIT)
|
||
self._prepare_wake_session_resources()
|
||
s = (staged_followup or "").strip()
|
||
self._staged_one_shot_after_greeting = s if s else None
|
||
self._greeting_done.clear()
|
||
self._playback_batch_is_greeting = True
|
||
self._enqueue_wake_word_ack_beep()
|
||
self._enqueue_llm_speak(_WAKE_GREETING)
|
||
threading.Thread(
|
||
target=self._after_greeting_pipeline,
|
||
daemon=True,
|
||
name="wake-after-greeting",
|
||
).start()
|
||
|
||
def _wake_fast_path_process_follow(self, follow: str) -> bool:
|
||
"""同一句已含唤醒词+指令时:跳过问候与滴声,清队列后直接 _process_one_shot_command。"""
|
||
follow = (follow or "").strip()
|
||
if not follow:
|
||
return False
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.IDLE):
|
||
logger.info(
|
||
"唤醒连带指令忽略:当前非 IDLE(phase=%s)",
|
||
_WakeFlowPhase(self._wake_phase).name,
|
||
)
|
||
return False
|
||
self._wake_phase = int(_WakeFlowPhase.LLM_BUSY)
|
||
self._prepare_wake_session_resources()
|
||
self._staged_one_shot_after_greeting = None
|
||
self._enqueue_wake_word_ack_beep()
|
||
logger.info("唤醒含指令,跳过问候与提示音,直接处理: %s", follow[:120])
|
||
self._process_one_shot_command(follow)
|
||
return True
|
||
|
||
def _after_greeting_pipeline(self) -> None:
|
||
if not self._greeting_done.wait(timeout=120):
|
||
logger.error("问候语播放超时,回到 IDLE")
|
||
self._finish_wake_cycle()
|
||
return
|
||
self._greeting_done.clear()
|
||
staged: str | None = None
|
||
with self._wake_flow_lock:
|
||
staged = self._staged_one_shot_after_greeting
|
||
self._staged_one_shot_after_greeting = None
|
||
if staged is not None:
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.LLM_BUSY)
|
||
self._process_one_shot_command(staged)
|
||
else:
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.ONE_SHOT_LISTEN)
|
||
print("[唤醒] 请说您的指令(一句)。", flush=True)
|
||
self._arm_prompt_listen_timeout()
|
||
|
||
def _process_one_shot_command(self, raw: str) -> None:
|
||
"""已关麦或准备关麦:处理一句指令(起飞 / LLM),结束后再切回 IDLE。"""
|
||
user_msg = (raw or "").strip()
|
||
if not user_msg:
|
||
self._finish_wake_cycle()
|
||
return
|
||
iw, _ = self.wake_word_detector.detect(user_msg)
|
||
if iw:
|
||
user_msg = (
|
||
self.wake_word_detector.extract_command_text(user_msg) or user_msg
|
||
).strip()
|
||
if not user_msg:
|
||
self._finish_wake_cycle()
|
||
return
|
||
print(f"[指令] {user_msg}", flush=True)
|
||
try:
|
||
self._mic_op_queue.put_nowait("stop")
|
||
except queue.Full:
|
||
pass
|
||
time.sleep(0.12)
|
||
|
||
_, params = self.text_preprocessor.preprocess_fast(user_msg)
|
||
if (
|
||
self._local_keyword_takeoff_enabled
|
||
and params.command_keyword == "takeoff"
|
||
):
|
||
threading.Thread(
|
||
target=self._run_takeoff_offboard_and_wavs,
|
||
daemon=True,
|
||
).start()
|
||
self._finish_wake_cycle()
|
||
try:
|
||
self._mic_op_queue.put_nowait("start")
|
||
except queue.Full:
|
||
pass
|
||
return
|
||
|
||
if self._llm_disabled and not self._cloud_voice_enabled:
|
||
print("[LLM] 已禁用(ROCKET_LLM_DISABLE)。", flush=True)
|
||
self._finish_wake_cycle()
|
||
try:
|
||
self._mic_op_queue.put_nowait("start")
|
||
except queue.Full:
|
||
pass
|
||
return
|
||
|
||
self._handle_llm_turn(
|
||
user_msg, finish_wake_after_tts=(self._cloud_client is None)
|
||
)
|
||
|
||
@staticmethod
|
||
def _flight_payload_requests_takeoff(payload: dict) -> bool:
|
||
for a in payload.get("actions") or []:
|
||
if isinstance(a, dict) and a.get("type") == "takeoff":
|
||
return True
|
||
return False
|
||
|
||
def _enqueue_llm_speak(self, line: str) -> None:
|
||
t = (line or "").strip()
|
||
if not t:
|
||
return
|
||
try:
|
||
self._llm_playback_queue.put(t, block=False)
|
||
except queue.Full:
|
||
logger.warning("LLM 播报队列已满,跳过: %s…", t[:40])
|
||
|
||
def _ensure_llm(self):
|
||
if self._llm is not None:
|
||
return self._llm
|
||
with self._model_warm_lock:
|
||
if self._llm is not None:
|
||
return self._llm
|
||
if not self._llm_model_path.is_file():
|
||
logger.error("未找到 GGUF: %s", self._llm_model_path)
|
||
return None
|
||
logger.info("正在加载 LLM: %s", self._llm_model_path)
|
||
print("[LLM] 正在加载 Qwen(GGUF)…", flush=True)
|
||
self._llm = load_llama_qwen(self._llm_model_path, n_ctx=self._llm_ctx)
|
||
if self._llm is None:
|
||
logger.error("llama-cpp-python 未安装或加载失败")
|
||
else:
|
||
print("[LLM] Qwen 已载入。", flush=True)
|
||
return self._llm
|
||
|
||
def _ensure_llm_tts(self):
|
||
if self._llm_tts_engine is not None:
|
||
return self._llm_tts_engine
|
||
with self._model_warm_lock:
|
||
if self._llm_tts_engine is not None:
|
||
return self._llm_tts_engine
|
||
from voice_drone.core.tts import KokoroOnnxTTS
|
||
|
||
print("[LLM] 正在加载 Kokoro TTS(ONNX)…", flush=True)
|
||
self._llm_tts_engine = KokoroOnnxTTS()
|
||
print("[LLM] Kokoro 已载入。", flush=True)
|
||
return self._llm_tts_engine
|
||
|
||
def _preload_llm_and_tts_if_enabled(self) -> None:
|
||
"""启动后预加载,避免首轮对话/播报长时间卡顿。"""
|
||
if self._cloud_voice_enabled:
|
||
print(
|
||
"[云端] 跳过本地 Qwen 预加载;对话 TTS 以云端 PCM 为主。",
|
||
flush=True,
|
||
)
|
||
try:
|
||
p = _resolve_wake_greeting_wav()
|
||
if not p.is_file():
|
||
if (
|
||
not self._llm_disabled
|
||
and not self._cloud_remote_tts_for_local
|
||
):
|
||
self._ensure_wake_greeting_wav_on_disk()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("云端模式下预热问候 WAV 跳过: %s", e)
|
||
if self._cloud_remote_tts_for_local:
|
||
print(
|
||
"[云端] 本地字符串播报由 tts.synthesize 提供,跳过 Kokoro 预加载"
|
||
"(失败时会临场加载 Kokoro)。",
|
||
flush=True,
|
||
)
|
||
return
|
||
# 飞控确认超时/取消、云端 fallback 等仍走本地 Kokoro;启动时加载一次,
|
||
# 避免超时播报时现场冷启动模型(数秒卡顿)。
|
||
if self._skip_model_preload:
|
||
print(
|
||
"[云端] 已跳过 Kokoro 预加载(--no-preload / ROCKET_SKIP_MODEL_PRELOAD);"
|
||
"首次本地提示时再加载。",
|
||
flush=True,
|
||
)
|
||
else:
|
||
t0 = time.monotonic()
|
||
try:
|
||
print(
|
||
"[LLM] 云端模式:预加载 Kokoro(确认超时/取消等本地语音)…",
|
||
flush=True,
|
||
)
|
||
self._ensure_llm_tts()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning(
|
||
"云端模式 Kokoro 预加载失败(将在首次本地播报时重试): %s",
|
||
e,
|
||
exc_info=True,
|
||
)
|
||
print(f"[LLM] Kokoro 预加载失败: {e}", flush=True)
|
||
else:
|
||
dt = time.monotonic() - t0
|
||
print(f"[LLM] Kokoro 预加载完成(约 {dt:.1f}s)。", flush=True)
|
||
return
|
||
|
||
if self._llm_disabled or self._skip_model_preload:
|
||
if self._skip_model_preload and not self._llm_disabled:
|
||
print(
|
||
"[LLM] 已跳过预加载(--no-preload 或 ROCKET_SKIP_MODEL_PRELOAD),将在首次使用时加载。",
|
||
flush=True,
|
||
)
|
||
return
|
||
if not self._llm_model_path.is_file():
|
||
print(
|
||
f"[LLM] 未找到 GGUF,跳过预加载: {self._llm_model_path}",
|
||
flush=True,
|
||
)
|
||
return
|
||
print(
|
||
"[LLM] 预加载 Qwen + Kokoro(数十秒属正常,完成后的首轮对话会快很多)…",
|
||
flush=True,
|
||
)
|
||
t0 = time.monotonic()
|
||
try:
|
||
if self._ensure_llm() is None:
|
||
return
|
||
self._ensure_llm_tts()
|
||
self._ensure_wake_greeting_wav_on_disk()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("预加载模型失败(将在首次使用时重试): %s", e, exc_info=True)
|
||
print(f"[LLM] 预加载失败: {e}", flush=True)
|
||
return
|
||
dt = time.monotonic() - t0
|
||
print(f"[LLM] 预加载完成(耗时约 {dt:.1f}s)。", flush=True)
|
||
|
||
def _ensure_wake_greeting_wav_on_disk(self) -> Path:
|
||
"""若尚无问候 WAV,则用 Kokoro 合成一次并写入;之后只走 play_wav_path。"""
|
||
p = _resolve_wake_greeting_wav()
|
||
if p.is_file():
|
||
return p
|
||
try:
|
||
p.parent.mkdir(parents=True, exist_ok=True)
|
||
except OSError as e:
|
||
logger.warning("无法创建问候缓存目录 %s: %s", p.parent, e)
|
||
return p
|
||
try:
|
||
tts = self._ensure_llm_tts()
|
||
tts.synthesize_to_file(_WAKE_GREETING, str(p))
|
||
logger.info("已自动生成唤醒问候缓存(此后只播此文件): %s", p)
|
||
print(f"[TTS] 已写入问候缓存,下次起不再合成: {p}", flush=True)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning(
|
||
"自动生成问候 WAV 失败(需 scipy 写盘;将本次仍用实时合成): %s",
|
||
e,
|
||
exc_info=True,
|
||
)
|
||
return p
|
||
|
||
def _play_wake_ready_beep(self, output_device: object | None) -> None:
|
||
"""问候语播完后短鸣一声,提示用户再开口下指令。"""
|
||
from voice_drone.core.tts import play_tts_audio
|
||
|
||
if os.environ.get("ROCKET_WAKE_PROMPT_BEEP", "1").lower() in (
|
||
"0",
|
||
"false",
|
||
"no",
|
||
):
|
||
return
|
||
sr = 24000
|
||
try:
|
||
dur = float(os.environ.get("ROCKET_WAKE_BEEP_SEC", "0.11"))
|
||
except ValueError:
|
||
dur = 0.11
|
||
dur = max(0.04, min(0.25, dur))
|
||
try:
|
||
hz = float(os.environ.get("ROCKET_WAKE_BEEP_HZ", "988"))
|
||
except ValueError:
|
||
hz = 988.0
|
||
try:
|
||
amp = float(os.environ.get("ROCKET_WAKE_BEEP_GAIN", "0.22"))
|
||
except ValueError:
|
||
amp = 0.22
|
||
amp = max(0.05, min(0.45, amp))
|
||
audio = _synthesize_ready_beep(
|
||
sr, duration_sec=dur, frequency_hz=hz, amplitude=amp
|
||
)
|
||
try:
|
||
play_tts_audio(audio, sr, output_device=output_device)
|
||
print("[唤醒] 提示音已播,请说指令。", flush=True)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("唤醒提示音播放跳过: %s", e)
|
||
|
||
def _enqueue_wake_word_ack_beep(self) -> None:
|
||
"""唤醒词命中后立即排队一声短鸣,主线程播报(与云 TTS 同队列,不阻塞命令线程)。"""
|
||
if os.environ.get("ROCKET_WAKE_ACK_BEEP", "1").lower() in (
|
||
"0",
|
||
"false",
|
||
"no",
|
||
):
|
||
return
|
||
try:
|
||
self._llm_playback_queue.put_nowait(_WAKE_HIT_BEEP_TAG)
|
||
except queue.Full:
|
||
logger.warning("播报队列已满,跳过唤醒确认短音")
|
||
|
||
def _play_wake_word_hit_beep(self, output_device: object | None) -> None:
|
||
"""刚识别到唤醒词时的一声「滴」,默认略短于问候后的滴声。"""
|
||
from voice_drone.core.tts import play_tts_audio
|
||
|
||
if os.environ.get("ROCKET_WAKE_ACK_BEEP", "1").lower() in (
|
||
"0",
|
||
"false",
|
||
"no",
|
||
):
|
||
return
|
||
sr = 24000
|
||
try:
|
||
raw = os.environ.get("ROCKET_WAKE_ACK_BEEP_SEC", "").strip()
|
||
if raw:
|
||
dur = float(raw)
|
||
else:
|
||
dur = float(os.environ.get("ROCKET_WAKE_BEEP_SEC", "0.11")) * 0.72
|
||
except ValueError:
|
||
dur = 0.08
|
||
dur = max(0.04, min(0.25, dur))
|
||
try:
|
||
raw_h = os.environ.get("ROCKET_WAKE_ACK_BEEP_HZ", "").strip()
|
||
hz = float(raw_h) if raw_h else float(os.environ.get("ROCKET_WAKE_BEEP_HZ", "988"))
|
||
except ValueError:
|
||
hz = 1100.0
|
||
try:
|
||
raw_g = os.environ.get("ROCKET_WAKE_ACK_BEEP_GAIN", "").strip()
|
||
amp = float(raw_g) if raw_g else float(os.environ.get("ROCKET_WAKE_BEEP_GAIN", "0.22"))
|
||
except ValueError:
|
||
amp = 0.22
|
||
amp = max(0.05, min(0.45, amp))
|
||
audio = _synthesize_ready_beep(
|
||
sr, duration_sec=dur, frequency_hz=hz, amplitude=amp
|
||
)
|
||
try:
|
||
play_tts_audio(audio, sr, output_device=output_device)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("唤醒确认短音播放失败: %s", e)
|
||
return
|
||
print("[唤醒] 确认短音已播。", flush=True)
|
||
|
||
def _try_play_line_via_cloud_tts(self, s: str, dev: object | None) -> bool:
|
||
"""docs/API.md §3.3 tts.synthesize:成功播放返回 True,否则 False(调用方回退 Kokoro)。"""
|
||
if not self._cloud_remote_tts_for_local or self._cloud_client is None:
|
||
return False
|
||
txt = (s or "").strip()
|
||
if not txt:
|
||
return False
|
||
from voice_drone.core.cloud_voice_client import CloudVoiceError
|
||
from voice_drone.core.tts import play_tts_audio
|
||
|
||
t0 = time.monotonic()
|
||
try:
|
||
out = self._cloud_client.run_tts_synthesize(txt)
|
||
except CloudVoiceError as e:
|
||
logger.warning("云端 tts.synthesize 失败: %s", e)
|
||
return False
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("云端 tts.synthesize 异常: %s", e, exc_info=True)
|
||
return False
|
||
pcm = out.get("pcm")
|
||
try:
|
||
sr = int(out.get("sample_rate_hz") or 24000)
|
||
except (TypeError, ValueError):
|
||
sr = 24000
|
||
if pcm is None or np.asarray(pcm).size == 0:
|
||
logger.warning("云端 tts.synthesize 返回空 PCM")
|
||
return False
|
||
pcm_i16 = np.asarray(pcm, dtype=np.int16).reshape(-1)
|
||
logger.info(
|
||
"云端 tts.synthesize: samples=%s int16_max_abs=%s elapsed=%.3fs",
|
||
pcm_i16.size,
|
||
int(np.max(np.abs(pcm_i16))),
|
||
time.monotonic() - t0,
|
||
)
|
||
audio_f32 = pcm_i16.astype(np.float32) / 32768.0
|
||
try:
|
||
play_tts_audio(audio_f32, sr, output_device=dev)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("播放云端 tts.synthesize 结果失败: %s", e, exc_info=True)
|
||
return False
|
||
return True
|
||
|
||
def _play_segment_end_cue(self, dev: object | None) -> None:
|
||
"""断句后极短提示(§5);不计入闲聊再滴声。"""
|
||
from voice_drone.core.tts import play_tts_audio
|
||
|
||
sr = 24000
|
||
dur = self._segment_cue_duration_ms / 1000.0
|
||
dur = max(0.02, min(0.5, dur))
|
||
audio = _synthesize_ready_beep(
|
||
sr,
|
||
duration_sec=dur,
|
||
frequency_hz=1420.0,
|
||
amplitude=0.18,
|
||
)
|
||
try:
|
||
play_tts_audio(audio, sr, output_device=dev)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("断句提示音: %s", e)
|
||
|
||
def _play_chitchat_reprompt_beep(self, dev: object | None) -> None:
|
||
"""闲聊 TTS 播完后再滴一声,进入下一轮 PROMPT_LISTEN。"""
|
||
self._play_wake_word_hit_beep(dev)
|
||
|
||
def _handle_pcm_uplink_turn(self, pcm: np.ndarray, sample_rate_hz: int) -> None:
|
||
"""SEGMENT_END:断句提示 + 消抖 → turn.audio 上行一轮。"""
|
||
with self._wake_flow_lock:
|
||
if self._wake_phase != int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
logger.debug("PCM 上行忽略:当前非 PROMPT_LISTEN")
|
||
return
|
||
self._cancel_prompt_listen_timer()
|
||
try:
|
||
self._mic_op_queue.put_nowait("stop")
|
||
except queue.Full:
|
||
pass
|
||
self._segment_cue_done.clear()
|
||
try:
|
||
self._llm_playback_queue.put_nowait(_SEGMENT_END_CUE_TAG)
|
||
except queue.Full:
|
||
logger.error("播报队列满,无法播断句提示")
|
||
try:
|
||
self._mic_op_queue.put_nowait("start")
|
||
except queue.Full:
|
||
pass
|
||
return
|
||
if not self._segment_cue_done.wait(timeout=15.0):
|
||
logger.error("断句提示音同步超时")
|
||
try:
|
||
self._mic_op_queue.put_nowait("start")
|
||
except queue.Full:
|
||
pass
|
||
return
|
||
time.sleep(self._post_cue_mic_mute_ms / 1000.0)
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.LLM_BUSY)
|
||
self._handle_llm_turn_cloud_pcm(
|
||
pcm, sample_rate_hz, finish_wake_after_tts=False
|
||
)
|
||
|
||
def _drain_llm_playback_queue(self, recover_mic: bool = True) -> None:
|
||
from voice_drone.core.tts import play_tts_audio, play_wav_path
|
||
|
||
lines: list[str] = []
|
||
while True:
|
||
try:
|
||
lines.append(self._llm_playback_queue.get_nowait())
|
||
except queue.Empty:
|
||
break
|
||
if not lines:
|
||
# 流式分段 TTS 时:最后一次 drain 可能在 _finalize_llm_turn 设置
|
||
# _pending_finish_wake_cycle_after_tts 之前就把队列播空;此处补上结束本轮唤醒。
|
||
# 注意:飞控确认窗须在「播完含本轮云端 TTS 的一批队列」之后在 finally 里进入,
|
||
# 不可在此处用 _pending_flight_confirm_after_tts,否则主线程可能在 PCM 入队前
|
||
# 空跑 drain,抢先 begin_confirm 并清掉标志,命令线程末尾又会设 _pending_finish_wake_cycle。
|
||
if self._pending_finish_wake_cycle_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = False
|
||
self._finish_wake_cycle()
|
||
return
|
||
greeting_batch = self._playback_batch_is_greeting
|
||
self._playback_batch_is_greeting = False
|
||
mic_stopped = False
|
||
if self.ack_pause_mic_for_playback:
|
||
# 关麦前再丢一次队列:唤醒到 drain 之间 VAD 可能又提交了片段
|
||
self.discard_pending_stt_segments()
|
||
try:
|
||
self.audio_capture.stop_stream()
|
||
mic_stopped = True
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("暂停麦克风失败: %s", e)
|
||
try:
|
||
tts = None
|
||
dev = self._llm_tts_output_device()
|
||
for line in lines:
|
||
if line == _WAKE_HIT_BEEP_TAG:
|
||
self._play_wake_word_hit_beep(dev)
|
||
continue
|
||
if line == _SEGMENT_END_CUE_TAG:
|
||
self._play_segment_end_cue(dev)
|
||
self._segment_cue_done.set()
|
||
continue
|
||
if line == _CHITCHAT_REPROMPT_BEEP_TAG:
|
||
self._play_chitchat_reprompt_beep(dev)
|
||
self._arm_prompt_listen_timeout()
|
||
continue
|
||
if (
|
||
isinstance(line, tuple)
|
||
and len(line) == 3
|
||
and line[0] == _CLOUD_PCM_TAG
|
||
):
|
||
_, pcm_i16, sr_cloud = line
|
||
try:
|
||
pcm_i16 = np.asarray(pcm_i16, dtype=np.int16).reshape(-1)
|
||
if pcm_i16.size == 0:
|
||
continue
|
||
dbg_max = int(np.max(np.abs(pcm_i16)))
|
||
logger.info(
|
||
"云端 PCM 解码: samples=%s int16_max_abs=%s (若 max_abs=0 则为全零或"
|
||
"协议/端序与云端不一致;请在服务端导出同段 WAV 对比)",
|
||
pcm_i16.size,
|
||
dbg_max,
|
||
)
|
||
audio_f32 = pcm_i16.astype(np.float32) / 32768.0
|
||
t_play0 = time.monotonic()
|
||
play_tts_audio(
|
||
audio_f32, int(sr_cloud), output_device=dev
|
||
)
|
||
print(
|
||
f"[计时] 云端 TTS 播放 {time.monotonic() - t_play0:.3f}s "
|
||
f"({pcm_i16.size / int(sr_cloud):.2f}s 音频)",
|
||
flush=True,
|
||
)
|
||
print("[LLM] 已播报。", flush=True)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("云端 PCM 播放失败: %s", e, exc_info=True)
|
||
continue
|
||
|
||
s = (line or "").strip()
|
||
if not s:
|
||
continue
|
||
try:
|
||
if s == _WAKE_GREETING:
|
||
t_w0 = time.monotonic()
|
||
cloud_ok = self._try_play_line_via_cloud_tts(s, dev)
|
||
if not cloud_ok:
|
||
greet_wav = self._ensure_wake_greeting_wav_on_disk()
|
||
if greet_wav.is_file():
|
||
play_wav_path(greet_wav, output_device=dev)
|
||
print(
|
||
f"[计时] TTS 预生成问候 WAV 播完,耗时 "
|
||
f"{time.monotonic() - t_w0:.3f}s",
|
||
flush=True,
|
||
)
|
||
else:
|
||
if tts is None:
|
||
tts = self._ensure_llm_tts()
|
||
logger.info("TTS: 开始合成并播放: %r", s)
|
||
t_syn0 = time.monotonic()
|
||
audio, sr = tts.synthesize(s)
|
||
t_syn1 = time.monotonic()
|
||
play_tts_audio(audio, sr, output_device=dev)
|
||
t_play1 = time.monotonic()
|
||
print(
|
||
f"[计时] TTS 合成 {t_syn1 - t_syn0:.3f}s,"
|
||
f"播放 {t_play1 - t_syn1:.3f}s"
|
||
f"(本段合计 {t_play1 - t_syn0:.3f}s)",
|
||
flush=True,
|
||
)
|
||
logger.info("TTS: 播放完成")
|
||
else:
|
||
print(
|
||
f"[计时] 云端 tts.synthesize 问候,耗时 "
|
||
f"{time.monotonic() - t_w0:.3f}s",
|
||
flush=True,
|
||
)
|
||
if greeting_batch:
|
||
self._play_wake_ready_beep(dev)
|
||
else:
|
||
t_line0 = time.monotonic()
|
||
cloud_ok = self._try_play_line_via_cloud_tts(s, dev)
|
||
if not cloud_ok:
|
||
if tts is None:
|
||
tts = self._ensure_llm_tts()
|
||
logger.info("TTS: 开始合成并播放: %r", s)
|
||
t_syn0 = time.monotonic()
|
||
audio, sr = tts.synthesize(s)
|
||
t_syn1 = time.monotonic()
|
||
play_tts_audio(audio, sr, output_device=dev)
|
||
t_play1 = time.monotonic()
|
||
print(
|
||
f"[计时] TTS 合成 {t_syn1 - t_syn0:.3f}s,"
|
||
f"播放 {t_play1 - t_syn1:.3f}s"
|
||
f"(本段合计 {t_play1 - t_syn0:.3f}s)",
|
||
flush=True,
|
||
)
|
||
logger.info("TTS: 播放完成")
|
||
else:
|
||
print(
|
||
f"[计时] 云端 tts.synthesize 本段合计 "
|
||
f"{time.monotonic() - t_line0:.3f}s",
|
||
flush=True,
|
||
)
|
||
print("[LLM] 已播报。", flush=True)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("LLM 播报失败: %s", e, exc_info=True)
|
||
finally:
|
||
if mic_stopped and recover_mic:
|
||
try:
|
||
self.audio_capture.start_stream()
|
||
try:
|
||
settle_ms = float(
|
||
os.environ.get("ROCKET_MIC_RESTART_SETTLE_MS", "150")
|
||
)
|
||
except ValueError:
|
||
settle_ms = 150.0
|
||
settle_ms = max(0.0, min(2000.0, settle_ms))
|
||
if settle_ms > 0:
|
||
time.sleep(settle_ms / 1000.0)
|
||
try:
|
||
self.audio_preprocessor.reset()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("audio_preprocessor.reset: %s", e)
|
||
self.vad.reset()
|
||
with self.speech_buffer_lock:
|
||
self.speech_buffer.clear()
|
||
self.pre_speech_buffer.clear()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error("麦克风恢复失败: %s", e)
|
||
if greeting_batch:
|
||
self._greeting_done.set()
|
||
if self._pending_flight_confirm_after_tts:
|
||
self._pending_flight_confirm_after_tts = False
|
||
self._begin_flight_confirm_listen()
|
||
elif self._pending_chitchat_reprompt_after_tts:
|
||
self._pending_chitchat_reprompt_after_tts = False
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.ONE_SHOT_LISTEN)
|
||
try:
|
||
self._llm_playback_queue.put_nowait(_CHITCHAT_REPROMPT_BEEP_TAG)
|
||
except queue.Full:
|
||
logger.warning("播报队列已满,跳过闲聊再滴声")
|
||
elif self._pending_finish_wake_cycle_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = False
|
||
self._finish_wake_cycle()
|
||
|
||
def _discard_llm_playback_queue(self) -> None:
|
||
"""退出时丢弃未播完的大模型 TTS,避免 stop() 里 speak_text/sounddevice 长时间阻塞导致 Ctrl+C 无法结束进程。"""
|
||
dropped = 0
|
||
while True:
|
||
try:
|
||
self._llm_playback_queue.get_nowait()
|
||
dropped += 1
|
||
except queue.Empty:
|
||
break
|
||
if dropped:
|
||
logger.info("退出:已丢弃 %s 条待播 LLM 语音", dropped)
|
||
|
||
@staticmethod
|
||
def _chunk_delta_text(chunk: object) -> str:
|
||
if not isinstance(chunk, dict):
|
||
return ""
|
||
choices = chunk.get("choices") or []
|
||
if not choices:
|
||
return ""
|
||
c0 = choices[0]
|
||
d = c0.get("delta") if isinstance(c0, dict) else None
|
||
if not isinstance(d, dict):
|
||
d = c0.get("message") if isinstance(c0, dict) else None
|
||
if not isinstance(d, dict):
|
||
return ""
|
||
raw = d.get("content")
|
||
return raw if isinstance(raw, str) else ""
|
||
|
||
def _enqueue_segment_capped(self, seg: str, budget: int) -> int:
|
||
seg = (seg or "").strip()
|
||
if not seg or budget <= 0:
|
||
return budget
|
||
if len(seg) <= budget:
|
||
self._enqueue_llm_speak(seg)
|
||
return budget - len(seg)
|
||
self._enqueue_llm_speak(seg[: max(0, budget - 1)] + "…")
|
||
return 0
|
||
|
||
def _finalize_llm_turn(
|
||
self,
|
||
reply: str,
|
||
finish_wake_after_tts: bool,
|
||
*,
|
||
streamed_chat: bool,
|
||
) -> None:
|
||
if not reply:
|
||
self._enqueue_llm_speak("我没听清,请再说一遍。")
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
mode, payload = parse_flight_intent_reply(reply)
|
||
with self._chat_session_lock:
|
||
self._llm_messages.append({"role": "assistant", "content": reply})
|
||
|
||
print(f"[LLM] 判定={mode}", flush=True)
|
||
print(f"[LLM] 原文: {reply[:500]}{'…' if len(reply) > 500 else ''}", flush=True)
|
||
|
||
if streamed_chat:
|
||
if payload is not None and self._flight_payload_requests_takeoff(payload):
|
||
threading.Thread(
|
||
target=self._run_takeoff_offboard_and_wavs,
|
||
daemon=True,
|
||
).start()
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
|
||
if payload is not None:
|
||
to_say = str(payload.get("summary") or "好的。").strip()
|
||
if self._flight_payload_requests_takeoff(payload):
|
||
threading.Thread(
|
||
target=self._run_takeoff_offboard_and_wavs,
|
||
daemon=True,
|
||
).start()
|
||
else:
|
||
to_say = reply.strip()
|
||
|
||
if len(to_say) > self._llm_tts_max_chars:
|
||
to_say = to_say[: self._llm_tts_max_chars] + "…"
|
||
self._enqueue_llm_speak(to_say)
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _enqueue_cloud_pcm_playback(
|
||
self, pcm_int16: np.ndarray, sample_rate_hz: int
|
||
) -> None:
|
||
if pcm_int16 is None or np.asarray(pcm_int16).size == 0:
|
||
return
|
||
try:
|
||
self._llm_playback_queue.put(
|
||
(_CLOUD_PCM_TAG, np.asarray(pcm_int16, dtype=np.int16), int(sample_rate_hz)),
|
||
block=False,
|
||
)
|
||
except queue.Full:
|
||
logger.warning("LLM 播报队列已满,跳过云端 PCM")
|
||
|
||
def _send_socket_command(self, cmd: Command) -> bool:
|
||
cmd.fill_defaults()
|
||
if self.socket_client.send_command_with_retry(cmd):
|
||
logger.info("✅ Socket 已发送: %s", cmd.command)
|
||
return True
|
||
logger.warning("Socket 未送达(已达 max_retries): %s", cmd.command)
|
||
return False
|
||
|
||
def _publish_flight_intent_to_ros_bridge(self, flight: dict) -> None:
|
||
"""校验 flight_intent 后由子进程发布到 ROS std_msgs/String(伴飞桥 ~input)。"""
|
||
_parsed, errors = parse_flight_intent_dict(flight)
|
||
if errors or _parsed is None:
|
||
logger.warning("[飞控-ROS桥] flight_intent 校验失败,未发布: %s", errors)
|
||
return
|
||
setup = os.environ.get(
|
||
"ROCKET_FLIGHT_BRIDGE_SETUP", "source /opt/ros/noetic/setup.bash"
|
||
).strip()
|
||
topic = os.environ.get("ROCKET_FLIGHT_BRIDGE_TOPIC", "/input").strip() or "/input"
|
||
wait_raw = os.environ.get("ROCKET_FLIGHT_BRIDGE_WAIT_SUB", "2").strip()
|
||
try:
|
||
wait_sub = float(wait_raw)
|
||
except ValueError:
|
||
wait_sub = 2.0
|
||
|
||
root = str(_PROJECT_ROOT)
|
||
body = json.dumps(flight, ensure_ascii=False)
|
||
fd, tmp_path = tempfile.mkstemp(prefix="flight_intent_", suffix=".json", text=True)
|
||
try:
|
||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||
f.write(body)
|
||
except OSError:
|
||
try:
|
||
os.close(fd)
|
||
except OSError:
|
||
pass
|
||
try:
|
||
os.unlink(tmp_path)
|
||
except OSError:
|
||
pass
|
||
logger.warning("[飞控-ROS桥] 无法写入临时 JSON")
|
||
return
|
||
|
||
# 须追加 PYTHONPATH:若写成 PYTHONPATH=仅工程根,会覆盖 ROS setup 注入的 /opt/ros/.../dist-packages,导致找不到 rospy。
|
||
cmd = (
|
||
f"{setup} && cd {shlex.quote(root)} && "
|
||
f"export PYTHONPATH={shlex.quote(root)}:$PYTHONPATH && "
|
||
"python3 -m voice_drone.tools.publish_flight_intent_ros_once "
|
||
f"--topic {shlex.quote(topic)} --wait-subscribers {wait_sub} "
|
||
f"{shlex.quote(tmp_path)}"
|
||
)
|
||
try:
|
||
r = subprocess.run(
|
||
["bash", "-lc", cmd],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=60,
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
logger.warning("[飞控-ROS桥] 子进程超时(>60s)")
|
||
return
|
||
except OSError as e:
|
||
logger.warning("[飞控-ROS桥] 无法启动 bash: %s", e)
|
||
return
|
||
finally:
|
||
try:
|
||
os.unlink(tmp_path)
|
||
except OSError:
|
||
pass
|
||
|
||
if r.returncode != 0:
|
||
logger.warning(
|
||
"[飞控-ROS桥] 发布失败 code=%s stderr=%s",
|
||
r.returncode,
|
||
(r.stderr or "").strip()[:800],
|
||
)
|
||
else:
|
||
logger.info("[飞控-ROS桥] 已发布至 %s", topic)
|
||
|
||
def _run_cloud_flight_intent_sequence(self, flight: dict) -> None:
|
||
"""
|
||
在后台线程中顺序执行云端 flight_intent(校验 v1 + takeoff 走 offboard + 其余 Socket)。
|
||
含 takeoff 时:先跑完 offboard 流程,再继续 hover/wait/land 等(修复此前仅触发起飞、后续动作丢失)。
|
||
"""
|
||
parsed, errors = parse_flight_intent_dict(flight)
|
||
if errors:
|
||
logger.warning("[飞控] flight_intent 校验失败: %s", errors)
|
||
return
|
||
tid = (parsed.trace_id or "").strip() or "-"
|
||
logger.info("[飞控] 开始执行序列 trace_id=%s steps=%d", tid, len(parsed.actions))
|
||
|
||
for step, action in enumerate(parsed.actions):
|
||
if isinstance(action, ActionTakeoff):
|
||
alt = action.args.relative_altitude_m
|
||
if alt is not None:
|
||
logger.info(
|
||
"[飞控] takeoff 请求相对高度 %.2fm(当前 offboard 脚本是否使用该参数请自行扩展)",
|
||
alt,
|
||
)
|
||
self._run_takeoff_offboard_and_wavs()
|
||
elif isinstance(action, ActionLand):
|
||
cmd = Command.create("land", self._get_next_sequence_id())
|
||
self._send_socket_command(cmd)
|
||
elif isinstance(action, ActionReturnHome):
|
||
cmd = Command.create("return_home", self._get_next_sequence_id())
|
||
self._send_socket_command(cmd)
|
||
elif isinstance(action, (ActionHover, ActionHold)):
|
||
cmd = Command.create("hover", self._get_next_sequence_id())
|
||
self._send_socket_command(cmd)
|
||
elif isinstance(action, ActionGoto):
|
||
cmd, err = goto_action_to_command(action, self._get_next_sequence_id())
|
||
if err:
|
||
logger.warning("[飞控] step %d goto: %s", step, err)
|
||
continue
|
||
if cmd is not None:
|
||
self._send_socket_command(cmd)
|
||
elif isinstance(action, ActionWait):
|
||
sec = float(action.args.seconds)
|
||
logger.info("[飞控] step %d wait %.2fs", step, sec)
|
||
time.sleep(sec)
|
||
else:
|
||
logger.warning("[飞控] step %d 未处理的动作类型: %r", step, action)
|
||
|
||
def _cancel_flight_confirm_timer(self) -> None:
|
||
with self._flight_confirm_timer_lock:
|
||
t = self._flight_confirm_timer
|
||
self._flight_confirm_timer = None
|
||
if t is not None:
|
||
try:
|
||
t.cancel()
|
||
except Exception: # noqa: BLE001
|
||
pass
|
||
|
||
def _begin_flight_confirm_listen(self) -> None:
|
||
"""云端 TTS 播完后进入口头确认窗(cloud_voice_dialog_v1)。"""
|
||
self._cancel_prompt_listen_timer()
|
||
with self._flight_confirm_timer_lock:
|
||
if self._pending_flight_confirm is None:
|
||
logger.warning("[飞控] 无待确认意图,跳过确认窗")
|
||
self._finish_wake_cycle()
|
||
return
|
||
cd = self._pending_flight_confirm["confirm"]
|
||
timeout_sec = float(cd["timeout_sec"])
|
||
phrases_repr = (cd["confirm_phrases"], cd["cancel_phrases"])
|
||
self._cancel_flight_confirm_timer()
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.FLIGHT_CONFIRM_LISTEN)
|
||
print(
|
||
f"[飞控] 请口头确认 {phrases_repr[0]!r} 或取消 {phrases_repr[1]!r},"
|
||
f"超时 {timeout_sec:.0f}s。",
|
||
flush=True,
|
||
)
|
||
|
||
def _fire() -> None:
|
||
try:
|
||
self._on_flight_confirm_timeout()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error("确认窗超时处理异常: %s", e, exc_info=True)
|
||
|
||
with self._flight_confirm_timer_lock:
|
||
self._flight_confirm_timer = threading.Timer(timeout_sec, _fire)
|
||
self._flight_confirm_timer.daemon = True
|
||
self._flight_confirm_timer.start()
|
||
|
||
def _on_flight_confirm_timeout(self) -> None:
|
||
with self._flight_confirm_timer_lock:
|
||
if self._pending_flight_confirm is None:
|
||
return
|
||
self._pending_flight_confirm = None
|
||
self._flight_confirm_timer = None
|
||
logger.info("[飞控] 确认窗超时")
|
||
self._enqueue_llm_speak(MSG_CONFIRM_TIMEOUT)
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _handle_flight_confirm_text(self, raw: str) -> None:
|
||
utter = (raw or "").strip()
|
||
if not utter:
|
||
return
|
||
norm = normalize_phrase_text(utter)
|
||
print(f"[飞控-确认窗] {utter!r}", flush=True)
|
||
|
||
action: str = "noop"
|
||
fi_ok: dict | None = None
|
||
t: threading.Timer | None = None
|
||
with self._flight_confirm_timer_lock:
|
||
pend = self._pending_flight_confirm
|
||
if pend is None:
|
||
return
|
||
cd = pend["confirm"]
|
||
cancel_hit = match_phrase_list(norm, cd["cancel_phrases"])
|
||
confirm_hit = match_phrase_list(norm, cd["confirm_phrases"])
|
||
if cancel_hit:
|
||
action = "cancel"
|
||
self._pending_flight_confirm = None
|
||
t = self._flight_confirm_timer
|
||
self._flight_confirm_timer = None
|
||
elif confirm_hit:
|
||
action = "confirm"
|
||
fi_ok = pend["flight"]
|
||
self._pending_flight_confirm = None
|
||
t = self._flight_confirm_timer
|
||
self._flight_confirm_timer = None
|
||
else:
|
||
logger.info("[飞控] 确认窗未命中短语,忽略: %s", utter[:80])
|
||
return
|
||
|
||
if t is not None:
|
||
try:
|
||
t.cancel()
|
||
except Exception: # noqa: BLE001
|
||
pass
|
||
|
||
if action == "cancel":
|
||
logger.info("[飞控] 用户取消待执行意图")
|
||
self._enqueue_llm_speak(MSG_CANCELLED)
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
|
||
if action == "confirm" and fi_ok is not None:
|
||
logger.info("[飞控] 用户已确认,开始执行 flight_intent")
|
||
self._start_cloud_flight_execution(fi_ok)
|
||
self._enqueue_llm_speak(MSG_CONFIRM_EXECUTING)
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _start_cloud_flight_execution(self, fi: dict) -> None:
|
||
"""ROCKET_CLOUD_EXECUTE_FLIGHT 已通过校验后,起线程执行。"""
|
||
if os.environ.get("ROCKET_CLOUD_EXECUTE_FLIGHT", "").lower() not in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
return
|
||
if os.environ.get("ROCKET_FLIGHT_INTENT_ROS_BRIDGE", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
threading.Thread(
|
||
target=self._publish_flight_intent_to_ros_bridge,
|
||
args=(fi,),
|
||
daemon=True,
|
||
).start()
|
||
else:
|
||
threading.Thread(
|
||
target=self._run_cloud_flight_intent_sequence,
|
||
args=(fi,),
|
||
daemon=True,
|
||
).start()
|
||
|
||
def _handle_llm_turn(
|
||
self, user_msg: str, *, finish_wake_after_tts: bool = False
|
||
) -> None:
|
||
if self._cloud_voice_enabled and self._cloud_client is not None:
|
||
self._handle_llm_turn_cloud(user_msg, finish_wake_after_tts=finish_wake_after_tts)
|
||
return
|
||
self._handle_llm_turn_local(user_msg, finish_wake_after_tts=finish_wake_after_tts)
|
||
|
||
def _apply_cloud_dialog_result(
|
||
self,
|
||
result: dict,
|
||
*,
|
||
finish_wake_after_tts: bool,
|
||
) -> None:
|
||
proto = result.get("protocol")
|
||
routing = result.get("routing")
|
||
fi = result.get("flight_intent")
|
||
confirm_raw = result.get("confirm")
|
||
scheduled_flight_confirm = False
|
||
|
||
if routing == "flight_intent" and isinstance(fi, dict) and fi.get("is_flight_intent"):
|
||
summary = str(fi.get("summary") or "好的。").strip()
|
||
actions = fi.get("actions") or []
|
||
print(f"[LLM] 判定=飞控意图(云端) summary={summary!r}", flush=True)
|
||
print(f"[LLM] actions={actions!r}", flush=True)
|
||
if proto != CLOUD_VOICE_DIALOG_V1:
|
||
logger.error(
|
||
"[云端] flight_intent 须 protocol=%r,收到 %r;按 v1 拒执行飞控",
|
||
CLOUD_VOICE_DIALOG_V1,
|
||
proto,
|
||
)
|
||
cd = parse_confirm_dict(confirm_raw)
|
||
if cd is None:
|
||
logger.error("[云端] flight_intent 须带合法 confirm 对象(v1),拒执行飞控")
|
||
exec_enabled = os.environ.get("ROCKET_CLOUD_EXECUTE_FLIGHT", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
)
|
||
if (
|
||
exec_enabled
|
||
and proto == CLOUD_VOICE_DIALOG_V1
|
||
and cd is not None
|
||
):
|
||
if cd["required"]:
|
||
scheduled_flight_confirm = True
|
||
with self._flight_confirm_timer_lock:
|
||
self._pending_flight_confirm = {"flight": fi, "confirm": cd}
|
||
self._pending_flight_confirm_after_tts = True
|
||
logger.info(
|
||
"[云端] flight_intent 待口头确认(pending_id=%s);"
|
||
"播完 TTS 后听确认/超时",
|
||
cd.get("pending_id"),
|
||
)
|
||
else:
|
||
logger.info(
|
||
"[云端] flight_intent confirm.required=false,将直接执行(若已开执行开关)"
|
||
)
|
||
self._start_cloud_flight_execution(fi)
|
||
elif exec_enabled and (
|
||
proto != CLOUD_VOICE_DIALOG_V1 or cd is None
|
||
):
|
||
logger.warning(
|
||
"[云端] 协议或 confirm 不完整,本轮不执行飞控(仍播 TTS)"
|
||
)
|
||
else:
|
||
logger.info(
|
||
"[云端] flight_intent 已下发(未设 ROCKET_CLOUD_EXECUTE_FLIGHT,仅播报)"
|
||
)
|
||
elif routing == "chitchat":
|
||
if proto != CLOUD_VOICE_DIALOG_V1:
|
||
logger.warning(
|
||
"[云端] chitchat 期望 protocol=%r,实际=%r",
|
||
CLOUD_VOICE_DIALOG_V1,
|
||
proto,
|
||
)
|
||
cr = (result.get("chat_reply") or "").strip()
|
||
print(f"[LLM] 判定=闲聊(云端) reply={cr[:200]!r}", flush=True)
|
||
else:
|
||
logger.warning("未知 routing: %s", routing)
|
||
|
||
pcm = result.get("pcm")
|
||
sr = int(result.get("sample_rate_hz") or 24000)
|
||
if pcm is not None and np.asarray(pcm).size > 0:
|
||
self._enqueue_cloud_pcm_playback(np.asarray(pcm, dtype=np.int16), sr)
|
||
elif self._cloud_fallback_local:
|
||
if routing == "flight_intent" and isinstance(fi, dict):
|
||
fallback_txt = str(fi.get("summary") or "好的。").strip()
|
||
else:
|
||
fallback_txt = (result.get("chat_reply") or "好的。").strip()
|
||
if fallback_txt:
|
||
self._enqueue_llm_speak(fallback_txt)
|
||
else:
|
||
self._enqueue_llm_speak("未收到云端语音。")
|
||
|
||
if routing == "chitchat":
|
||
self._pending_chitchat_reprompt_after_tts = True
|
||
elif scheduled_flight_confirm:
|
||
pass
|
||
elif finish_wake_after_tts and not scheduled_flight_confirm:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
elif routing == "flight_intent" and not scheduled_flight_confirm:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
elif routing not in ("chitchat", "flight_intent"):
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
|
||
def _handle_llm_turn_cloud(
|
||
self, user_msg: str, *, finish_wake_after_tts: bool = False
|
||
) -> None:
|
||
from voice_drone.core.cloud_voice_client import CloudVoiceError
|
||
|
||
assert self._cloud_client is not None
|
||
t0 = time.monotonic()
|
||
try:
|
||
result = self._cloud_client.run_turn(user_msg)
|
||
except CloudVoiceError as e:
|
||
print(f"[云端] 失败: {e} (code={e.code!r})", flush=True)
|
||
logger.error("云端对话失败: %s", e, exc_info=True)
|
||
self._recover_from_cloud_failure(
|
||
user_msg,
|
||
finish_wake_after_tts=finish_wake_after_tts,
|
||
idle_speak="云端服务不可用,请稍后再试。",
|
||
)
|
||
return
|
||
except Exception as e: # noqa: BLE001
|
||
print(f"[云端] 异常: {e}", flush=True)
|
||
logger.error("云端对话异常: %s", e, exc_info=True)
|
||
self._recover_from_cloud_failure(
|
||
user_msg,
|
||
finish_wake_after_tts=finish_wake_after_tts,
|
||
idle_speak="网络异常,请稍后再试。",
|
||
)
|
||
return
|
||
|
||
dt = time.monotonic() - t0
|
||
metrics = result.get("metrics") or {}
|
||
print(
|
||
f"[计时] 云端一轮(turn.text) {dt:.3f}s "
|
||
f"(llm_ms={metrics.get('llm_ms')!r}, "
|
||
f"tts_first_byte_ms={metrics.get('tts_first_byte_ms')!r})",
|
||
flush=True,
|
||
)
|
||
self._apply_cloud_dialog_result(result, finish_wake_after_tts=finish_wake_after_tts)
|
||
|
||
def _handle_llm_turn_cloud_pcm(
|
||
self,
|
||
pcm_i16: np.ndarray,
|
||
sample_rate_hz: int,
|
||
*,
|
||
finish_wake_after_tts: bool = False,
|
||
) -> None:
|
||
from voice_drone.core.cloud_voice_client import CloudVoiceError
|
||
|
||
assert self._cloud_client is not None
|
||
t0 = time.monotonic()
|
||
try:
|
||
result = self._cloud_client.run_turn_audio(pcm_i16, int(sample_rate_hz))
|
||
except CloudVoiceError as e:
|
||
print(f"[云端] turn.audio 失败: {e} (code={e.code!r})", flush=True)
|
||
logger.error("云端 turn.audio 失败: %s", e, exc_info=True)
|
||
self._recover_from_cloud_failure(
|
||
"",
|
||
finish_wake_after_tts=True,
|
||
idle_speak="云端语音识别失败,请稍后再试。",
|
||
)
|
||
return
|
||
except Exception as e: # noqa: BLE001
|
||
print(f"[云端] turn.audio 异常: {e}", flush=True)
|
||
logger.error("云端 turn.audio 异常: %s", e, exc_info=True)
|
||
self._recover_from_cloud_failure(
|
||
"",
|
||
finish_wake_after_tts=True,
|
||
idle_speak="网络异常,请稍后再试。",
|
||
)
|
||
return
|
||
|
||
dt = time.monotonic() - t0
|
||
metrics = result.get("metrics") or {}
|
||
print(
|
||
f"[计时] 云端一轮(turn.audio) {dt:.3f}s "
|
||
f"(llm_ms={metrics.get('llm_ms')!r}, "
|
||
f"tts_first_byte_ms={metrics.get('tts_first_byte_ms')!r})",
|
||
flush=True,
|
||
)
|
||
self._apply_cloud_dialog_result(result, finish_wake_after_tts=finish_wake_after_tts)
|
||
|
||
def _handle_llm_turn_local(
|
||
self, user_msg: str, *, finish_wake_after_tts: bool = False
|
||
) -> None:
|
||
llm = self._ensure_llm()
|
||
if llm is None:
|
||
self._enqueue_llm_speak(
|
||
"大模型未就绪。请确认已下载 GGUF,或设置环境变量 ROCKET_LLM_GGUF 指向模型文件。"
|
||
)
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
|
||
with self._chat_session_lock:
|
||
self._llm_messages = [
|
||
{"role": "system", "content": FLIGHT_INTENT_CHAT_SYSTEM},
|
||
{"role": "user", "content": user_msg},
|
||
]
|
||
messages_snapshot = list(self._llm_messages)
|
||
|
||
if not self._llm_stream_enabled:
|
||
t_llm0 = time.monotonic()
|
||
try:
|
||
out = llm.create_chat_completion(
|
||
messages=messages_snapshot,
|
||
max_tokens=self._llm_max_tokens,
|
||
)
|
||
except Exception as e: # noqa: BLE001
|
||
dt_llm = time.monotonic() - t_llm0
|
||
print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True)
|
||
logger.error("LLM 推理失败: %s", e, exc_info=True)
|
||
with self._chat_session_lock:
|
||
if self._llm_messages and self._llm_messages[-1].get("role") == "user":
|
||
self._llm_messages.pop()
|
||
self._enqueue_llm_speak("推理出错,请稍后再说。")
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
dt_llm = time.monotonic() - t_llm0
|
||
print(f"[计时] LLM 推理 {dt_llm:.3f}s", flush=True)
|
||
|
||
reply = (
|
||
(out.get("choices") or [{}])[0].get("message") or {}
|
||
).get("content", "").strip()
|
||
self._finalize_llm_turn(
|
||
reply, finish_wake_after_tts, streamed_chat=False
|
||
)
|
||
return
|
||
|
||
t_llm0 = time.monotonic()
|
||
try:
|
||
stream = llm.create_chat_completion(
|
||
messages=messages_snapshot,
|
||
max_tokens=self._llm_max_tokens,
|
||
stream=True,
|
||
)
|
||
except Exception as e: # noqa: BLE001
|
||
dt_llm = time.monotonic() - t_llm0
|
||
print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True)
|
||
logger.error("LLM 推理失败: %s", e, exc_info=True)
|
||
with self._chat_session_lock:
|
||
if self._llm_messages and self._llm_messages[-1].get("role") == "user":
|
||
self._llm_messages.pop()
|
||
self._enqueue_llm_speak("推理出错,请稍后再说。")
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
|
||
full_reply = ""
|
||
pending = ""
|
||
tts_budget = self._llm_tts_max_chars
|
||
route: str | None = None
|
||
|
||
try:
|
||
for chunk in stream:
|
||
content = self._chunk_delta_text(chunk)
|
||
if not content:
|
||
continue
|
||
full_reply += content
|
||
if route is None:
|
||
lead = full_reply.lstrip()
|
||
if lead:
|
||
route = "json" if lead[0] == "{" else "chat"
|
||
if route != "chat" or tts_budget <= 0:
|
||
continue
|
||
pending += content
|
||
while tts_budget > 0 and pending:
|
||
segs, pending = take_completed_sentences(pending)
|
||
if segs:
|
||
for seg in segs:
|
||
tts_budget = self._enqueue_segment_capped(seg, tts_budget)
|
||
if tts_budget <= 0:
|
||
break
|
||
continue
|
||
forced, pending = force_soft_split(
|
||
pending, self._stream_tts_chunk_chars
|
||
)
|
||
if not forced:
|
||
break
|
||
for seg in forced:
|
||
tts_budget = self._enqueue_segment_capped(seg, tts_budget)
|
||
if tts_budget <= 0:
|
||
break
|
||
except Exception as e: # noqa: BLE001
|
||
dt_llm = time.monotonic() - t_llm0
|
||
print(f"[计时] LLM 推理 {dt_llm:.3f}s(失败)", flush=True)
|
||
logger.error("LLM 流式推理失败: %s", e, exc_info=True)
|
||
with self._chat_session_lock:
|
||
if self._llm_messages and self._llm_messages[-1].get("role") == "user":
|
||
self._llm_messages.pop()
|
||
self._enqueue_llm_speak("推理出错,请稍后再说。")
|
||
if finish_wake_after_tts:
|
||
self._pending_finish_wake_cycle_after_tts = True
|
||
return
|
||
|
||
dt_llm = time.monotonic() - t_llm0
|
||
print(f"[计时] LLM 推理 {dt_llm:.3f}s", flush=True)
|
||
|
||
reply = full_reply.strip()
|
||
if route == "chat" and tts_budget > 0:
|
||
tail = pending.strip()
|
||
if tail:
|
||
self._enqueue_segment_capped(tail, tts_budget)
|
||
|
||
self._finalize_llm_turn(
|
||
reply, finish_wake_after_tts, streamed_chat=(route == "chat")
|
||
)
|
||
|
||
def start(self) -> None:
|
||
if self.running:
|
||
logger.warning("识别器已在运营")
|
||
return
|
||
|
||
self.running = True
|
||
|
||
self.stt_thread = threading.Thread(target=self._stt_worker_thread, daemon=True)
|
||
self.stt_thread.start()
|
||
|
||
self.command_thread = threading.Thread(
|
||
target=self._takeoff_only_command_worker, daemon=True
|
||
)
|
||
self.command_thread.start()
|
||
|
||
# 先预加载再开麦:否则 PortAudio 回调会一直往 audio_queue 塞数据,而主线程还没进入
|
||
# process_audio_stream,默认仅 10 块的队列会迅速满并触发「音频队列已满,丢弃数据块」。
|
||
logger.info("voice_drone_assistant: 准备预加载模型(若启用)…")
|
||
self._preload_llm_and_tts_if_enabled()
|
||
|
||
try:
|
||
self.audio_capture.start_stream()
|
||
except BaseException:
|
||
self.running = False
|
||
try:
|
||
self.stt_queue.put(None, timeout=0.5)
|
||
except Exception: # noqa: BLE001
|
||
pass
|
||
try:
|
||
self.command_queue.put(None, timeout=0.5)
|
||
except Exception: # noqa: BLE001
|
||
pass
|
||
if self.stt_thread is not None:
|
||
self.stt_thread.join(timeout=2.0)
|
||
if self.command_thread is not None:
|
||
self.command_thread.join(timeout=2.0)
|
||
raise
|
||
|
||
if self._cloud_voice_enabled:
|
||
logger.info(
|
||
"voice_drone_assistant: 已启动(对话走云端 WebSocket;TTS 为云端 PCM;飞控见 Socket/offboard)"
|
||
)
|
||
else:
|
||
logger.info(
|
||
"voice_drone_assistant: 已启动(无试飞控 Socket;大模型答复走 Kokoro TTS)"
|
||
)
|
||
ld = os.environ.get("LD_PRELOAD", "")
|
||
sys_asound = "libasound.so" in ld and "/usr/" in ld
|
||
if not sys_asound:
|
||
print(
|
||
"\n⚠ 建议用系统 ALSA 启动(conda 下否则常无声或 VAD 不触发):\n"
|
||
" bash with_system_alsa.sh python main.py\n",
|
||
flush=True,
|
||
)
|
||
if self._llm_disabled and not self._cloud_voice_enabled:
|
||
if self._local_keyword_takeoff_enabled:
|
||
llm_hint = "已 ROCKET_LLM_DISABLE=1:除 keywords.yaml 中 takeoff 关键词外,其它指令仅打印,不调大模型。\n"
|
||
else:
|
||
llm_hint = (
|
||
"已 ROCKET_LLM_DISABLE=1 且未启用本地口令起飞(assistant.local_keyword_takeoff_enabled / "
|
||
"ROCKET_LOCAL_KEYWORD_TAKEOFF):指令仅打印,不调大模型。\n"
|
||
)
|
||
elif self._cloud_voice_enabled:
|
||
if self._local_keyword_takeoff_enabled:
|
||
llm_hint = "已启用云端对话:非 takeoff 关键词指令经 WebSocket 上云,播报为云端 TTS 流。\n"
|
||
else:
|
||
llm_hint = "已启用云端对话:指令经 WebSocket 上云,播报为云端 TTS 流(本地口令起飞已关闭)。\n"
|
||
else:
|
||
llm_hint = (
|
||
"说「无人机」唤醒后会先播报问候,再听您说一句(不必再带唤醒词);说完后关麦推理,答句播完后再说「"
|
||
f"{self.wake_word_detector.primary}」开始下一轮。非起飞指令走大模型("
|
||
"飞控相关→JSON,否则闲聊)。\n"
|
||
)
|
||
if self._local_keyword_takeoff_enabled:
|
||
takeoff_banner = (
|
||
"\n本地口令起飞已开启:说「无人机」+ keywords.yaml 里 takeoff 词(如「起飞演示」)→ 播提示音、"
|
||
"启动 scripts/run_px4_offboard_one_terminal.sh(串口真机)、再播返航提示并结束脚本。\n"
|
||
)
|
||
else:
|
||
takeoff_banner = (
|
||
"\n本地口令起飞已关闭(飞控请用云端 flight_intent / ROS 桥等);"
|
||
"若需恢复 keywords.yaml takeoff → offboard,设 assistant.local_keyword_takeoff_enabled: true 或 "
|
||
"ROCKET_LOCAL_KEYWORD_TAKEOFF=1。\n"
|
||
)
|
||
print(
|
||
f"{takeoff_banner}"
|
||
f"{llm_hint}"
|
||
"标记说明:[VAD] 已截段送 STT;[STT] 识别文字;[唤醒] 是否含唤醒词;[LLM] 对话与播报。\n"
|
||
"录音已在启动时选好;扬声器可设 ROCKET_TTS_DEVICE。建议:bash with_system_alsa.sh python …\n"
|
||
"Ctrl+C 退出。\n",
|
||
flush=True,
|
||
)
|
||
|
||
def _play_wav_serialized(self, path: Path) -> None:
|
||
if not path.is_file():
|
||
logger.warning("WAV 文件不存在,跳过播放: %s", path)
|
||
return
|
||
with self._audio_play_lock:
|
||
try:
|
||
_play_wav_blocking(path)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("播放 WAV 失败 %s: %s", path, e, exc_info=True)
|
||
|
||
def _run_takeoff_offboard_and_wavs(self) -> None:
|
||
"""独立线程:起 offboard 脚本;播第一段;第一段结束后等 10s;再播第二段;第二段结束后杀掉脚本进程组。"""
|
||
if not _OFFBOARD_SCRIPT.is_file():
|
||
logger.error("未找到 offboard 脚本: %s", _OFFBOARD_SCRIPT)
|
||
return
|
||
|
||
acquired = self._takeoff_side_task_busy.acquire(blocking=False)
|
||
if not acquired:
|
||
logger.warning("起飞联动已在执行,忽略重复触发")
|
||
return
|
||
|
||
proc: subprocess.Popen | None = None
|
||
try:
|
||
log_path = Path(
|
||
os.environ.get("ROCKET_OFFBOARD_LOG", "/tmp/rocket_drone_offboard_script.log")
|
||
).expanduser()
|
||
log_f = open(log_path, "ab", buffering=0)
|
||
try:
|
||
proc = subprocess.Popen(
|
||
[
|
||
"bash",
|
||
str(_OFFBOARD_SCRIPT),
|
||
"/dev/ttyACM0",
|
||
"921600",
|
||
"20",
|
||
],
|
||
cwd=str(_PROJECT_ROOT),
|
||
stdout=log_f,
|
||
stderr=subprocess.STDOUT,
|
||
start_new_session=True,
|
||
)
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error("启动 run_px4_offboard_one_terminal.sh 失败: %s", e, exc_info=True)
|
||
return
|
||
finally:
|
||
log_f.close()
|
||
|
||
with self._offboard_proc_lock:
|
||
self._active_offboard_proc = proc
|
||
|
||
time.sleep(0.5)
|
||
early_rc = proc.poll()
|
||
if early_rc is not None:
|
||
logger.error(
|
||
"offboard 一键脚本已立即结束 (exit=%s),未持续运行。日志: %s (常见原因:找不到 "
|
||
"px4_ctrl_offboard_demo.py、ROS 环境、或串口未连)",
|
||
early_rc,
|
||
log_path,
|
||
)
|
||
|
||
logger.info(
|
||
"已启动 offboard 一键脚本 (pid=%s),并播放起飞提示音;脚本输出见 %s",
|
||
proc.pid,
|
||
log_path,
|
||
)
|
||
|
||
self._play_wav_serialized(_TAKEOFF_ACK_WAV)
|
||
time.sleep(10.0)
|
||
self._play_wav_serialized(_TAKEOFF_DONE_WAV)
|
||
finally:
|
||
if proc is not None:
|
||
logger.info("第二段 WAV 已播完,终止 offboard 脚本进程组 (pid=%s)", proc.pid)
|
||
_terminate_process_group(proc)
|
||
with self._offboard_proc_lock:
|
||
if self._active_offboard_proc is proc:
|
||
self._active_offboard_proc = None
|
||
self._takeoff_side_task_busy.release()
|
||
|
||
def _takeoff_only_command_worker(self) -> None:
|
||
"""唤醒;同句带指令则直转 LLM/起飞;否则问候+滴声→再问一句→关麦播报。"""
|
||
logger.info("唤醒流程命令线程已启动")
|
||
while self.running:
|
||
try:
|
||
text = self.command_queue.get(timeout=0.1)
|
||
except queue.Empty:
|
||
continue
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error(f"命令处理线程错误: {e}", exc_info=True)
|
||
continue
|
||
|
||
try:
|
||
if text is None:
|
||
break
|
||
|
||
try:
|
||
if (
|
||
isinstance(text, tuple)
|
||
and len(text) == 3
|
||
and text[0] == _PCM_TURN_MARKER
|
||
):
|
||
self._handle_pcm_uplink_turn(text[1], int(text[2]))
|
||
continue
|
||
|
||
with self._wake_flow_lock:
|
||
phase = self._wake_phase
|
||
|
||
if phase == int(_WakeFlowPhase.LLM_BUSY):
|
||
continue
|
||
if phase == int(_WakeFlowPhase.GREETING_WAIT):
|
||
continue
|
||
|
||
if phase == int(_WakeFlowPhase.FLIGHT_CONFIRM_LISTEN):
|
||
self._handle_flight_confirm_text(text)
|
||
continue
|
||
|
||
if phase == int(_WakeFlowPhase.ONE_SHOT_LISTEN):
|
||
with self._wake_flow_lock:
|
||
self._wake_phase = int(_WakeFlowPhase.LLM_BUSY)
|
||
self._process_one_shot_command(text)
|
||
continue
|
||
|
||
is_wake, matched = self.wake_word_detector.detect(text)
|
||
if not is_wake:
|
||
logger.debug("未检测到唤醒词,忽略: %s", text)
|
||
if os.environ.get("ROCKET_PRINT_STT", "").lower() in (
|
||
"1",
|
||
"true",
|
||
"yes",
|
||
):
|
||
print(
|
||
f"[唤醒] 未命中「{self.wake_word_detector.primary}」,原文: {text!r}",
|
||
flush=True,
|
||
)
|
||
continue
|
||
|
||
logger.info("唤醒词命中: %s", matched)
|
||
command_text = self.wake_word_detector.extract_command_text(text)
|
||
follow = (command_text or "").strip()
|
||
if follow:
|
||
if not self._wake_fast_path_process_follow(follow):
|
||
continue
|
||
continue
|
||
self._begin_wake_cycle(None)
|
||
|
||
except Exception as e: # noqa: BLE001
|
||
logger.error("命令处理失败: %s", e, exc_info=True)
|
||
finally:
|
||
self.command_queue.task_done()
|
||
|
||
logger.info("唤醒流程命令线程已停止")
|
||
|
||
def stop(self) -> None:
|
||
"""停止识别;不重连 Socket(从未连接)。"""
|
||
if not self.running:
|
||
return
|
||
|
||
self.running = False
|
||
|
||
self._cancel_prompt_listen_timer()
|
||
self._cancel_flight_confirm_timer()
|
||
with self._flight_confirm_timer_lock:
|
||
self._pending_flight_confirm = None
|
||
self._pending_flight_confirm_after_tts = False
|
||
|
||
if self.stt_thread is not None:
|
||
self.stt_queue.put(None)
|
||
if self.command_thread is not None:
|
||
self.command_queue.put(None)
|
||
if self.stt_thread is not None:
|
||
self.stt_thread.join(timeout=2.0)
|
||
if self.command_thread is not None:
|
||
self.command_thread.join(timeout=2.0)
|
||
|
||
# 不在此线程做 speak_text:会阻塞数秒至数十秒,用户多次 Ctrl+C 仍杀不掉进程
|
||
self._discard_llm_playback_queue()
|
||
|
||
with self._offboard_proc_lock:
|
||
op = self._active_offboard_proc
|
||
self._active_offboard_proc = None
|
||
if op is not None and op.poll() is None:
|
||
logger.info("主程序退出:终止仍在运行的 offboard 脚本")
|
||
_terminate_process_group(op)
|
||
|
||
try:
|
||
self.audio_capture.stop_stream()
|
||
except KeyboardInterrupt:
|
||
logger.info("关闭麦克风流时中断,跳过")
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("关闭麦克风流失败: %s", e)
|
||
|
||
if self._cloud_client is not None:
|
||
try:
|
||
self._cloud_client.close()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.debug("关闭云端 WebSocket: %s", e)
|
||
|
||
if self.socket_client.connected:
|
||
self.socket_client.disconnect()
|
||
|
||
logger.info("voice_drone_assistant 已停止")
|
||
print("\n已退出。", flush=True)
|
||
|
||
|
||
def main() -> None:
|
||
ap = argparse.ArgumentParser(
|
||
description="无人机语音:唤醒 → 问候 → 一句指令 → 起飞或 LLM 播报 → 再唤醒"
|
||
)
|
||
ap.add_argument(
|
||
"--input-index",
|
||
"-I",
|
||
type=int,
|
||
default=None,
|
||
help="跳过交互菜单,直接指定 PyAudio 录音设备索引(与启动时「PyAudio_index=」一致)。",
|
||
)
|
||
ap.add_argument(
|
||
"--non-interactive",
|
||
action="store_true",
|
||
help="不选设备:用 system.yaml 的 audio.input_device_index(为 null 时自动枚举默认可录音设备)。",
|
||
)
|
||
ap.add_argument(
|
||
"--no-preload",
|
||
action="store_true",
|
||
help="不预加载 Qwen/Kokoro,缩短启动时间(首轮对话与首次播报会变慢)。",
|
||
)
|
||
args = ap.parse_args()
|
||
non_inter = args.non_interactive or os.environ.get(
|
||
"ROCKET_NON_INTERACTIVE", ""
|
||
).lower() in ("1", "true", "yes")
|
||
|
||
idx = args.input_index
|
||
if idx is None:
|
||
raw_ix = os.environ.get("ROCKET_INPUT_DEVICE_INDEX", "").strip()
|
||
if raw_ix.isdigit() or (raw_ix.startswith("-") and raw_ix[1:].isdigit()):
|
||
idx = int(raw_ix)
|
||
|
||
if idx is not None:
|
||
from voice_drone.core.mic_device_select import apply_input_device_index_only
|
||
|
||
apply_input_device_index_only(idx)
|
||
logger.info("录音设备: PyAudio 索引 %s(CLI/环境变量)", idx)
|
||
elif not non_inter:
|
||
from voice_drone.core.mic_device_select import (
|
||
apply_input_device_index_only,
|
||
prompt_for_input_device_index,
|
||
)
|
||
|
||
chosen = prompt_for_input_device_index()
|
||
apply_input_device_index_only(chosen)
|
||
else:
|
||
logger.info(
|
||
"非交互模式:使用 system.yaml 的 audio.input_device_index(null=自动探测)"
|
||
)
|
||
|
||
app = TakeoffPrintRecognizer(skip_model_preload=args.no_preload)
|
||
try:
|
||
app.run()
|
||
except KeyboardInterrupt:
|
||
logger.info("用户中断")
|
||
finally:
|
||
if app.running:
|
||
app.stop()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|