47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
"""流式闲聊:按句切分文本入队 TTS;飞控 JSON 路径由调用方整块推理后再播。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
# 句末:切段送合成
|
||
_SENTENCE_END = frozenset("。!?;\n")
|
||
# 过长且无句末时,优先在以下标点处断开
|
||
_SOFT_BREAK = ",、,"
|
||
|
||
|
||
def take_completed_sentences(buffer: str) -> tuple[list[str], str]:
|
||
"""从 buffer 开头取出所有「以句末标点结尾」的完整小段。"""
|
||
segments: list[str] = []
|
||
i = 0
|
||
n = len(buffer)
|
||
while i < n:
|
||
j = i
|
||
while j < n and buffer[j] not in _SENTENCE_END:
|
||
j += 1
|
||
if j >= n:
|
||
break
|
||
raw = buffer[i : j + 1].strip()
|
||
if raw:
|
||
segments.append(raw)
|
||
i = j + 1
|
||
return segments, buffer[i:]
|
||
|
||
|
||
def force_soft_split(remainder: str, max_chars: int) -> tuple[list[str], str]:
|
||
"""remainder 长度 >= max_chars 且无句末时,强制切下第一段。"""
|
||
if max_chars <= 0 or len(remainder) < max_chars:
|
||
return [], remainder
|
||
window = remainder[:max_chars]
|
||
cut = -1
|
||
for sep in _SOFT_BREAK:
|
||
p = window.rfind(sep)
|
||
if p > cut:
|
||
cut = p
|
||
if cut <= 0:
|
||
cut = max_chars
|
||
first = remainder[: cut + 1].strip()
|
||
rest = remainder[cut + 1 :]
|
||
out: list[str] = []
|
||
if first:
|
||
out.append(first)
|
||
return out, rest
|