DroneMind/voice_drone/core/streaming_llm_tts.py
2026-04-14 09:54:26 +08:00

47 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""流式闲聊:按句切分文本入队 TTS飞控 JSON 路径由调用方整块推理后再播。"""
from __future__ import annotations
# 句末:切段送合成
_SENTENCE_END = frozenset("。!?;\n")
# 过长且无句末时,优先在以下标点处断开
_SOFT_BREAK = ",、,"
def take_completed_sentences(buffer: str) -> tuple[list[str], str]:
"""从 buffer 开头取出所有「以句末标点结尾」的完整小段。"""
segments: list[str] = []
i = 0
n = len(buffer)
while i < n:
j = i
while j < n and buffer[j] not in _SENTENCE_END:
j += 1
if j >= n:
break
raw = buffer[i : j + 1].strip()
if raw:
segments.append(raw)
i = j + 1
return segments, buffer[i:]
def force_soft_split(remainder: str, max_chars: int) -> tuple[list[str], str]:
"""remainder 长度 >= max_chars 且无句末时,强制切下第一段。"""
if max_chars <= 0 or len(remainder) < max_chars:
return [], remainder
window = remainder[:max_chars]
cut = -1
for sep in _SOFT_BREAK:
p = window.rfind(sep)
if p > cut:
cut = p
if cut <= 0:
cut = max_chars
first = remainder[: cut + 1].strip()
rest = remainder[cut + 1 :]
out: list[str] = []
if first:
out.append(first)
return out, rest