feat(assistant): стриминг ответов Квантика (фича 1/6)

Ответ модели «печатается» вживую через SSE поверх POST (fetch-stream,
не EventSource). Бэкенд: callLLMStream (stream:true, парсинг SSE upstream) +
callLLMStreamFailover (failover только до первого куска) + endpoint
POST /assistant/ask/stream (события meta|delta|done; быстрые пути FAQ/кэш/мета
отдаются одним done). buildAskMessages выделен из askModel (DRY).
Клиент: LS.assistantAskStream (fetch-stream + парсер SSE). Виджет: send()
стримит дельты как plain-текст с CSS-кареткой, на done — KaTeX-рендер,
источники, ссылки, оценка. Фоллбэк на sendNonStream (старый путь) если
стриминг недоступен/упал до первого куска. Cache-Control: no-transform
отключает буферизацию compression.

Проверено против живого шлюза: 24 дельты, первый текст ~1.3с, 100% русский.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Maxim Dolgolyov
2026-06-24 14:50:11 +03:00
parent 5b4d9324a4
commit 089f93b8ee
4 changed files with 240 additions and 3 deletions
+36 -1
View File
@@ -1183,7 +1183,7 @@ window.LS = {
customSimsList, customSimGet, customSimCreate, customSimUpdate, customSimDelete,
customSimShare, customSimClone, customSimRelated, customSimAddLink, customSimDelLink,
gameProgressList, gameProgressSubmit,
assistantContext, assistantSeen, assistantDismiss, assistantSettings, assistantAsk, assistantFlashcards, assistantFeedback, assistantMemory, assistantMemoryClear, imageGen, imageGenStatus,
assistantContext, assistantSeen, assistantDismiss, assistantSettings, assistantAsk, assistantAskStream, assistantFlashcards, assistantFeedback, assistantMemory, assistantMemoryClear, imageGen, imageGenStatus,
adminGetAssistant, adminSaveAssistant, adminTestAssistant, adminReindexTextbooks,
adminSaveProvider, adminDeleteProvider, adminSetActiveProvider, adminAssistantModels,
adminAssistantScan, adminAssistantProbe, adminAssistantApplyModels,
@@ -1423,6 +1423,41 @@ async function assistantSeen(ruleId) { return req('POST', '/assistant/seen', {
async function assistantDismiss(rid) { return req('POST', '/assistant/dismiss', { ruleId: rid }); }
async function assistantSettings(d) { return req('PATCH', '/assistant/settings', d); }
async function assistantAsk(q, context, history, mode) { return req('POST', '/assistant/ask', { q, context: context || undefined, history: history || undefined, mode: mode || undefined }); }
// Стриминговый ask: SSE поверх POST (fetch-stream). cbs: { onMeta, onDelta, onDone }.
async function assistantAskStream(q, context, history, mode, cbs) {
cbs = cbs || {};
const token = getToken();
const headers = { 'Content-Type': 'application/json' };
if (token) headers['Authorization'] = `Bearer ${token}`;
const res = await fetch(API + '/assistant/ask/stream', {
method: 'POST', headers,
body: JSON.stringify({ q, context: context || undefined, history: history || undefined, mode: mode || undefined }),
});
if (!res.ok || !res.body) throw Object.assign(new Error('stream failed'), { status: res.status });
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '';
const handle = (block) => {
let ev = 'message', data = '';
block.split('\n').forEach((ln) => {
if (ln.indexOf('event:') === 0) ev = ln.slice(6).trim();
else if (ln.indexOf('data:') === 0) data += ln.slice(5).trim();
});
if (!data) return;
let obj; try { obj = JSON.parse(data); } catch (e) { return; }
if (ev === 'delta' && cbs.onDelta) cbs.onDelta(obj.t || '');
else if (ev === 'meta' && cbs.onMeta) cbs.onMeta(obj);
else if (ev === 'done' && cbs.onDone) cbs.onDone(obj);
};
for (;;) {
const { value, done } = await reader.read();
if (done) break;
buf += dec.decode(value, { stream: true });
let idx;
while ((idx = buf.indexOf('\n\n')) >= 0) { const block = buf.slice(0, idx); buf = buf.slice(idx + 2); if (block.trim()) handle(block); }
}
if (buf.trim()) handle(buf);
}
async function assistantFlashcards(text, title, count) { return req('POST', '/assistant/flashcards', { text, title, count }); }
async function assistantFeedback(rating, q) { return req('POST', '/assistant/feedback', { rating, q: q || undefined }); }
async function assistantMemory() { return req('GET', '/assistant/memory'); }