""" 应答 TTS 波形磁盘缓存:文案与 TTS 配置未变时跳过逐条合成,加快启动。 缓存目录:项目根下 cache/ack_tts_pcm/ """ from __future__ import annotations import hashlib import json from pathlib import Path from typing import Dict, List, Optional, Tuple import numpy as np from voice_drone.core.configuration import SYSTEM_TTS_CONFIG # 与 src/core/configuration.py 一致:src/core/tts_ack_cache.py -> parents[2] _PROJECT_ROOT = Path(__file__).resolve().parents[2] ACK_PCM_CACHE_DIR = _PROJECT_ROOT / "cache" / "ack_tts_pcm" MANIFEST_NAME = "manifest.json" CACHE_FORMAT = 1 def _tts_signature() -> dict: tts = SYSTEM_TTS_CONFIG or {} return { "model_dir": str(tts.get("model_dir", "")), "model_name": str(tts.get("model_name", "")), "voice": str(tts.get("voice", "")), "speed": round(float(tts.get("speed", 1.0)), 6), "sample_rate": int(tts.get("sample_rate", 24000)), } def compute_ack_pcm_fingerprint( unique_phrases: List[str], *, global_text: Optional[str] = None, mode_phrases: bool = True, ) -> str: """文案 + TTS 签名变化则指纹变,磁盘缓存失效。""" payload = { "cache_format": CACHE_FORMAT, "tts": _tts_signature(), "mode_phrases": mode_phrases, } if mode_phrases: payload["phrases"] = sorted(unique_phrases) else: payload["global_text"] = (global_text or "").strip() raw = json.dumps(payload, sort_keys=True, ensure_ascii=False) return hashlib.sha256(raw.encode("utf-8")).hexdigest() def _phrase_file_stem(fingerprint: str, phrase: str) -> str: h = hashlib.sha256(fingerprint.encode("utf-8")) h.update(b"\0") h.update(phrase.encode("utf-8")) return h.hexdigest()[:40] def _load_one_npz(path: Path) -> Optional[Tuple[np.ndarray, int]]: try: z = np.load(path, allow_pickle=False) audio = np.asarray(z["audio"], dtype=np.float32).squeeze() sr = int(np.asarray(z["sr"]).reshape(-1)[0]) if audio.size == 0 or sr <= 0: return None return (audio, sr) except Exception: return None def load_cached_phrases( unique_phrases: List[str], fingerprint: str, ) -> Tuple[Dict[str, Tuple[np.ndarray, int]], List[str]]: """ 从磁盘加载与 fingerprint 匹配的缓存。 Returns: (已加载的 phrase -> (audio, sr), 仍需合成的 phrase 列表) """ out: Dict[str, Tuple[np.ndarray, int]] = {} if not unique_phrases: return {}, [] cache_dir = ACK_PCM_CACHE_DIR manifest_path = cache_dir / MANIFEST_NAME if not manifest_path.is_file(): return {}, list(unique_phrases) try: with open(manifest_path, "r", encoding="utf-8") as f: manifest = json.load(f) except Exception: return {}, list(unique_phrases) if int(manifest.get("format", 0)) != CACHE_FORMAT: return {}, list(unique_phrases) if manifest.get("fingerprint") != fingerprint: return {}, list(unique_phrases) files: Dict[str, str] = manifest.get("files") or {} missing: List[str] = [] for phrase in unique_phrases: fname = files.get(phrase) if not fname: missing.append(phrase) continue path = cache_dir / fname if not path.is_file(): missing.append(phrase) continue loaded = _load_one_npz(path) if loaded is None: missing.append(phrase) continue out[phrase] = loaded return out, missing def persist_phrases(fingerprint: str, phrase_pcm: Dict[str, Tuple[np.ndarray, int]]) -> None: """写入/更新整包 manifest 与各句 npz(覆盖同名 manifest)。""" if not phrase_pcm: return cache_dir = ACK_PCM_CACHE_DIR cache_dir.mkdir(parents=True, exist_ok=True) files: Dict[str, str] = {} for phrase, (audio, sr) in phrase_pcm.items(): stem = _phrase_file_stem(fingerprint, phrase) fname = f"{stem}.npz" path = cache_dir / fname audio = np.asarray(audio, dtype=np.float32).squeeze() np.savez_compressed(path, audio=audio, sr=np.array([int(sr)], dtype=np.int32)) files[phrase] = fname manifest = { "format": CACHE_FORMAT, "fingerprint": fingerprint, "files": files, } tmp = cache_dir / (MANIFEST_NAME + ".tmp") with open(tmp, "w", encoding="utf-8") as f: json.dump(manifest, f, ensure_ascii=False, indent=0) tmp.replace(cache_dir / MANIFEST_NAME)