From 089f93b8eed715499d5786cc758fdce6d1b853de Mon Sep 17 00:00:00 2001 From: Maxim Dolgolyov Date: Wed, 24 Jun 2026 14:50:11 +0300 Subject: [PATCH] =?UTF-8?q?feat(assistant):=20=D1=81=D1=82=D1=80=D0=B8?= =?UTF-8?q?=D0=BC=D0=B8=D0=BD=D0=B3=20=D0=BE=D1=82=D0=B2=D0=B5=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=9A=D0=B2=D0=B0=D0=BD=D1=82=D0=B8=D0=BA=D0=B0=20(?= =?UTF-8?q?=D1=84=D0=B8=D1=87=D0=B0=201/6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ответ модели «печатается» вживую через SSE поверх POST (fetch-stream, не EventSource). Бэкенд: callLLMStream (stream:true, парсинг SSE upstream) + callLLMStreamFailover (failover только до первого куска) + endpoint POST /assistant/ask/stream (события meta|delta|done; быстрые пути FAQ/кэш/мета отдаются одним done). buildAskMessages выделен из askModel (DRY). Клиент: LS.assistantAskStream (fetch-stream + парсер SSE). Виджет: send() стримит дельты как plain-текст с CSS-кареткой, на done — KaTeX-рендер, источники, ссылки, оценка. Фоллбэк на sendNonStream (старый путь) если стриминг недоступен/упал до первого куска. Cache-Control: no-transform отключает буферизацию compression. Проверено против живого шлюза: 24 дельты, первый текст ~1.3с, 100% русский. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/controllers/assistantController.js | 127 +++++++++++++++++- backend/src/routes/assistant.js | 1 + frontend/js/assistant.js | 78 +++++++++++ js/api.js | 37 ++++- 4 files changed, 240 insertions(+), 3 deletions(-) diff --git a/backend/src/controllers/assistantController.js b/backend/src/controllers/assistantController.js index a31f897..cc1a244 100644 --- a/backend/src/controllers/assistantController.js +++ b/backend/src/controllers/assistantController.js @@ -453,6 +453,64 @@ async function callLLMFailover(messages, maxTokens) { return last; } +/* Потоковый вызов OpenAI-совместимого chat/completions (stream:true). + * onDelta(piece) — на каждый кусок текста. Возвращает { text, any, error }. */ +async function callLLMStream(messages, maxTokens, cfg, onDelta) { + if (typeof fetch !== 'function' || !cfg.on) return { text: null, any: false, error: 'off' }; + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), 60000); // стриминг длиннее обычного + try { + const r = await fetch(cfg.url, { + method: 'POST', + headers: Object.assign({ 'Content-Type': 'application/json' }, cfg.key ? { Authorization: `Bearer ${cfg.key}` } : {}), + body: JSON.stringify({ model: cfg.model, temperature: 0.3, max_tokens: maxTokens || 1200, messages, stream: true }), + signal: ctrl.signal, + }); + if (!r.ok) return { text: null, any: false, error: r.status === 429 ? 'rate_limit' : 'http', status: r.status }; + if (!r.body) return { text: null, any: false, error: 'empty' }; + const dec = new TextDecoder(); + let buf = '', full = '', any = false; + for await (const chunk of r.body) { + buf += dec.decode(chunk, { stream: true }); + let nl; + while ((nl = buf.indexOf('\n')) >= 0) { + const line = buf.slice(0, nl).trim(); buf = buf.slice(nl + 1); + if (!line.startsWith('data:')) continue; + const data = line.slice(5).trim(); + if (data === '[DONE]') return { text: full || null, any, error: full ? null : 'empty' }; + try { + const j = JSON.parse(data); + const d = j.choices && j.choices[0] && j.choices[0].delta; + const piece = d && d.content; + if (piece) { full += piece; any = true; onDelta(piece); } + } catch (e) { /* частичный/служебный кусок — пропускаем */ } + } + } + return { text: full || null, any, error: full ? null : 'empty' }; + } catch (e) { return { text: null, any: false, error: e.name === 'AbortError' ? 'timeout' : 'network' }; } + finally { clearTimeout(timer); } +} + +/* Стриминг с перебором провайдеров. Failover возможен ТОЛЬКО до первого куска; + * как только клиенту ушёл текст (any) — остаёмся на этом провайдере. */ +async function callLLMStreamFailover(messages, maxTokens, onDelta) { + const cfgs = providersOrdered(); + if (!cfgs.length) return { text: null, error: 'off' }; + let firstErr = null; + for (let i = 0; i < cfgs.length; i++) { + const res = await callLLMStream(messages, maxTokens, cfgs[i], onDelta); + if (i === 0) firstErr = res.error; + if (res.text) { + if (i === 0) _clearFailover(); else _recordFailover(cfgs[0], cfgs[i], firstErr); + return res; + } + if (res.any) return res; // часть уже улетела клиенту — переключиться нельзя + if (!_RETRYABLE[res.error]) break; + } + if (_RETRYABLE[firstErr]) _recordFailover(cfgs[0], null, firstErr); + return { text: null, error: firstErr || 'error' }; +} + /* Тест-пинг для админки: подробный статус (status/ошибка/пример ответа). */ async function pingLLM(override) { const cfg = override || llmConfig(); @@ -498,7 +556,8 @@ const META_RE = new RegExp('(' + _SELF + '[\\sа-яёa-z0-9,?!.-]{0,25}' + _TERM '|на\\s+ч[её]м\\s+ты\\s+(?:работа|сдела|постро|основ)|кто\\s+тебя\\s+(?:сделал|создал|обуч|разработ|написал)|систем[а-яё]*\\s+промпт|what\\s+model\\s+are\\s+you|which\\s+(?:ai\\s+)?model|your\\s+system\\s+prompt)', 'i'); const META_ANSWER = 'Я — Квантик, помощник LearnSpace. Помогаю с учёбой и навигацией по платформе. Давай вернёмся к делу — что объяснить или подсказать?'; -async function askModel(q, hits, context, history, role, mode, mem) { +// Сборка messages+cap для модели — общая для обычного и стримингового ответа. +function buildAskMessages(q, hits, context, history, role, mode, mem) { const ref = hits.map((h, i) => `${i + 1}. ${h.q}\n${h.a}${h.url ? ` (раздел: ${h.url})` : ''}`).join('\n') || '(пусто)'; const user = (context ? `Контекст (опирайся на него, если относится к вопросу):\n${context}\n\n` : '') + `Справка по платформе:\n${ref}\n\nВопрос: ${q}`; @@ -518,6 +577,11 @@ async function askModel(q, hits, context, history, role, mode, mem) { msgs.push({ role: 'user', content: user }); // подсказка короткая; ответ/проверка — длиннее, чтобы пошаговое решение с формулами не обрезалось на середине const cap = mode === 'hint' ? 320 : (mode === 'check' ? 900 : 1200); + return { msgs, cap }; +} + +async function askModel(q, hits, context, history, role, mode, mem) { + const { msgs, cap } = buildAskMessages(q, hits, context, history, role, mode, mem); return callLLMFailover(msgs, cap); } @@ -574,6 +638,65 @@ async function ask(req, res) { res.json({ source: 'faq', answer: null, answers: faqJson, sources: [] }); } +/* ── POST /api/assistant/ask/stream ── то же, что ask, но ответ модели стримится + * по SSE (event: meta|delta|done). Быстрые пути (FAQ/кэш/мета) отдаются одним done. */ +async function askStream(req, res) { + res.setHeader('Content-Type', 'text/event-stream; charset=utf-8'); + res.setHeader('Cache-Control', 'no-cache, no-transform'); + res.setHeader('Connection', 'keep-alive'); + res.setHeader('X-Accel-Buffering', 'no'); // не буферизовать за прокси + if (res.flushHeaders) res.flushHeaders(); + const sse = (event, data) => { try { res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); } catch (e) {} }; + + const q = String((req.body && req.body.q) || '').trim().slice(0, 500); + if (!q || q.length < 2) { sse('done', { source: 'faq', answer: null, answers: [] }); return res.end(); } + if (META_RE.test(q)) { sse('delta', { t: META_ANSWER }); sse('done', { source: 'model', answers: [], sources: [] }); return res.end(); } + const pageCtx = String((req.body && req.body.context) || '').slice(0, 4000); + const mode = ['hint', 'check'].includes(req.body && req.body.mode) ? req.body.mode : 'answer'; + let history = (req.body && req.body.history); + history = Array.isArray(history) ? history.slice(-6) : []; + const hits = searchFaq(q, 3); + const faqJson = hits.map(h => ({ id: h.id, q: h.q, a: h.a, url: h.url || null })); + sse('meta', { answers: faqJson }); + + if (!providersOrdered().length) { bumpUsage('faq'); sse('done', { source: 'faq', answer: null, answers: faqJson, sources: [] }); return res.end(); } + + const rag = ragContext(q); + const mem = _memoryBlock(req.user.id); + const cacheable = mode === 'answer' && !pageCtx && !history.length && !mem; + const qhash = q.toLowerCase().replace(/\s+/g, ' ').trim(); + if (cacheable) { + try { + const c = db.prepare("SELECT answer FROM assistant_cache WHERE qhash = ? AND created_at > datetime('now','-7 days')").get(qhash); + if (c) { bumpUsage('cache_hits'); sse('delta', { t: c.answer }); sse('done', { source: 'model', answers: faqJson, sources: rag.sources, cached: true }); return res.end(); } + } catch (e) {} + } + if (rag.sources && rag.sources.length) sse('meta', { sources: rag.sources }); + + let context = pageCtx; + if (rag.text) context = (context ? context + '\n\n' : '') + 'Из учебников:\n' + rag.text; + const { msgs, cap } = buildAskMessages(q, hits, context, history, req.user && req.user.role, mode, mem); + + let full = ''; + let r = { text: null, error: 'network' }; + try { r = await callLLMStreamFailover(msgs, cap, (piece) => { full += piece; sse('delta', { t: piece }); }); } + catch (e) { r = { text: null, error: 'network' }; } + + const answer = (r && r.text) || full; + if (answer) { + bumpUsage('model_calls'); + if (cacheable) { try { db.prepare("INSERT OR REPLACE INTO assistant_cache (qhash, answer, created_at) VALUES (?, ?, datetime('now'))").run(qhash, answer); } catch (e) {} } + if (_setting('assistant_memory') !== '0' && (mode === 'check' || history.length >= 4)) _extractMemory(req.user.id, q, answer); + sse('done', { source: 'model', answers: faqJson, sources: rag.sources }); + return res.end(); + } + bumpUsage('faq'); + if (r && r.error === 'rate_limit') sse('done', { source: 'limit', answer: 'Сейчас слишком много запросов к ИИ за короткое время — подожди минутку и спроси снова. Память диалога не потеряется.', answers: faqJson, sources: [] }); + else if (r && (r.error === 'timeout' || r.error === 'network' || r.error === 'http')) sse('done', { source: 'error', answer: 'Не получилось обратиться к ИИ. Попробуй ещё раз чуть позже.', answers: faqJson, sources: [] }); + else sse('done', { source: 'faq', answer: null, answers: faqJson, sources: [] }); + res.end(); +} + /* ── POST /api/assistant/feedback { rating, q? } ── лайк/дизлайк ответа ── */ function feedback(req, res) { const rating = (req.body && req.body.rating) === 1 ? 1 : ((req.body && req.body.rating) === -1 ? -1 : 0); @@ -623,4 +746,4 @@ async function flashcardsFromText(req, res) { res.json({ title, cards }); } -module.exports = { getContext, markSeen, dismiss, setSettings, ask, flashcardsFromText, feedback, getMemory, clearMemory, getStudentProfile, llmConfig, pingLLM, clearFailover: _clearFailover, callLLMFailover }; +module.exports = { getContext, markSeen, dismiss, setSettings, ask, askStream, flashcardsFromText, feedback, getMemory, clearMemory, getStudentProfile, llmConfig, pingLLM, clearFailover: _clearFailover, callLLMFailover }; diff --git a/backend/src/routes/assistant.js b/backend/src/routes/assistant.js index ddfb4f6..56d40fd 100644 --- a/backend/src/routes/assistant.js +++ b/backend/src/routes/assistant.js @@ -17,6 +17,7 @@ router.post('/seen', ctrl.markSeen); router.post('/dismiss', ctrl.dismiss); router.patch('/settings', ctrl.setSettings); router.post('/ask', requirePermissionForStudents('assistant.use'), askLimiter, ctrl.ask); +router.post('/ask/stream', requirePermissionForStudents('assistant.use'), askLimiter, ctrl.askStream); router.post('/flashcards', requirePermissionForStudents('assistant.use'), fcLimiter, ctrl.flashcardsFromText); router.post('/feedback', ctrl.feedback); router.get('/memory', ctrl.getMemory); diff --git a/frontend/js/assistant.js b/frontend/js/assistant.js index 6118e5c..2424781 100644 --- a/frontend/js/assistant.js +++ b/frontend/js/assistant.js @@ -322,6 +322,10 @@ '.asst-rich .katex-display::-webkit-scrollbar{height:6px;}', '.asst-rich .katex-display::-webkit-scrollbar-thumb{background:rgba(15,23,42,.18);border-radius:99px;}', '.asst-rich .katex{max-width:100%;}', + // мигающий курсор во время стриминга ответа (CSS-каретка, без глифа) + '.asst-streaming{white-space:pre-wrap;}', + '.asst-streaming::after{content:"";display:inline-block;width:2px;height:1em;vertical-align:-2px;margin-left:2px;background:#9B5DE5;animation:asst-blink 1s steps(2) infinite;}', + '@keyframes asst-blink{50%{opacity:0;}}', '.asst-md-h{font-weight:800;color:#0F172A;margin:6px 0 2px;}', '.asst-chat{max-height:46vh;overflow:auto;display:flex;flex-direction:column;gap:8px;margin-bottom:8px;}', '.asst-chat:empty{display:none;}', @@ -604,6 +608,80 @@ }); } function send(q, context, chatEl, mode) { + q = (q || '').trim(); + if (q.length < 2) return; + if (mode === 'draw') return drawInChat(q, chatEl); + // стриминг недоступен (старый кэш api.js / нет ReadableStream) — обычный путь + if (!LS.assistantAskStream || typeof ReadableStream === 'undefined') return sendNonStream(q, context, chatEl, mode); + + var history = _chat.slice(-6); + _chat.push({ role: 'user', content: q }); + var u = msgEl('user'); u.textContent = q; chatEl.appendChild(u); + var ph = msgEl('assistant'); ph.className += ' asst-msg-ph'; ph.textContent = mode === 'check' ? 'Проверяю…' : 'Думаю…'; chatEl.appendChild(ph); + chatEl.scrollTop = chatEl.scrollHeight; + + var searchP = (LS.globalSearch ? LS.globalSearch(q, 'all', 3) : Promise.resolve({ results: [] })).catch(function () { return { results: [] }; }); + var meta = { answers: [], sources: [] }, full = '', msgD = null, richEl = null, streamed = false, finalized = false; + + function ensureMsg() { + if (msgD) return; + if (ph.parentNode) ph.remove(); + msgD = msgEl('assistant'); msgD.innerHTML = '
'; + richEl = msgD.querySelector('.asst-rich'); chatEl.appendChild(msgD); + } + function finalize(done) { + if (finalized) return; finalized = true; + done = done || {}; + var src = done.source; + if ((src === 'limit' || src === 'error') && !full) { + _chat.pop(); + if (msgD) msgD.remove(); if (ph.parentNode) ph.remove(); + var em = msgEl('assistant'); em.className += ' asst-msg-ph'; em.textContent = done.answer || 'Сейчас не получилось. Попробуй ещё раз.'; + chatEl.appendChild(em); chatEl.scrollTop = chatEl.scrollHeight; return; + } + var isModel = src === 'model' && (full || done.answer); + searchP.then(function (sres) { + var found = (sres && sres.results) || []; + var ansArr = (done.answers && done.answers.length ? done.answers : meta.answers) || []; + var sources = done.sources || meta.sources || []; + var content = isModel ? (full || done.answer) : ((ansArr[0] && (ansArr[0].q + '\n' + ansArr[0].a)) || 'Не нашёл точного ответа. Попробуй переформулировать или поищи (Ctrl+K).'); + ensureMsg(); richEl.classList.remove('asst-streaming'); + _chat.push({ role: 'assistant', content: content }); + renderRich(richEl, content); + if (isModel && sources.length) { + var sc = document.createElement('div'); sc.className = 'asst-src'; + sc.innerHTML = 'Источник: ' + sources.map(function (s) { return '' + esc(s.title) + (s.section ? ', ' + esc(s.section) : '') + ''; }).join('; '); + chatEl.appendChild(sc); + } + var links = ''; + if (!isModel && ansArr.length) links += ansArr.slice(0, 2).filter(function (a) { return a.url; }).map(function (a) { return '' + esc(a.q) + ''; }).join(' · '); + if (found.length) links += (links ? '
' : '') + 'На платформе: ' + found.slice(0, 3).map(function (f) { return '' + esc(f.title || '…') + ''; }).join(' · '); + if (links) { var l = document.createElement('div'); l.className = 'asst-msg-links'; l.innerHTML = links; chatEl.appendChild(l); } + if (isModel) { + var fb = document.createElement('div'); fb.className = 'asst-fb'; + fb.innerHTML = ''; + fb.querySelectorAll('button').forEach(function (b) { + b.addEventListener('click', function () { if (fb.dataset.done) return; fb.dataset.done = '1'; b.classList.add('on'); try { LS.assistantFeedback(Number(b.getAttribute('data-r')), q); } catch (e) {} }); + }); + chatEl.appendChild(fb); + } + chatEl.scrollTop = chatEl.scrollHeight; + }); + } + + LS.assistantAskStream(q, context, history, mode, { + onMeta: function (m) { if (m.answers) meta.answers = m.answers; if (m.sources) meta.sources = m.sources; }, + onDelta: function (t) { streamed = true; ensureMsg(); full += t; richEl.textContent = full; chatEl.scrollTop = chatEl.scrollHeight; }, + onDone: function (o) { finalize(o); }, + }).then(function () { if (!finalized) finalize({ source: full ? 'model' : 'faq' }); }) + .catch(function () { + if (finalized) return; + if (!streamed) { if (ph.parentNode) ph.remove(); _chat.pop(); sendNonStream(q, context, chatEl, mode); } + else finalize({ source: 'model' }); + }); + } + + function sendNonStream(q, context, chatEl, mode) { q = (q || '').trim(); if (q.length < 2) return; if (mode === 'draw') return drawInChat(q, chatEl); diff --git a/js/api.js b/js/api.js index a6b9a9b..887c0ea 100644 --- a/js/api.js +++ b/js/api.js @@ -1183,7 +1183,7 @@ window.LS = { customSimsList, customSimGet, customSimCreate, customSimUpdate, customSimDelete, customSimShare, customSimClone, customSimRelated, customSimAddLink, customSimDelLink, gameProgressList, gameProgressSubmit, - assistantContext, assistantSeen, assistantDismiss, assistantSettings, assistantAsk, assistantFlashcards, assistantFeedback, assistantMemory, assistantMemoryClear, imageGen, imageGenStatus, + assistantContext, assistantSeen, assistantDismiss, assistantSettings, assistantAsk, assistantAskStream, assistantFlashcards, assistantFeedback, assistantMemory, assistantMemoryClear, imageGen, imageGenStatus, adminGetAssistant, adminSaveAssistant, adminTestAssistant, adminReindexTextbooks, adminSaveProvider, adminDeleteProvider, adminSetActiveProvider, adminAssistantModels, adminAssistantScan, adminAssistantProbe, adminAssistantApplyModels, @@ -1423,6 +1423,41 @@ async function assistantSeen(ruleId) { return req('POST', '/assistant/seen', { async function assistantDismiss(rid) { return req('POST', '/assistant/dismiss', { ruleId: rid }); } async function assistantSettings(d) { return req('PATCH', '/assistant/settings', d); } async function assistantAsk(q, context, history, mode) { return req('POST', '/assistant/ask', { q, context: context || undefined, history: history || undefined, mode: mode || undefined }); } +// Стриминговый ask: SSE поверх POST (fetch-stream). cbs: { onMeta, onDelta, onDone }. +async function assistantAskStream(q, context, history, mode, cbs) { + cbs = cbs || {}; + const token = getToken(); + const headers = { 'Content-Type': 'application/json' }; + if (token) headers['Authorization'] = `Bearer ${token}`; + const res = await fetch(API + '/assistant/ask/stream', { + method: 'POST', headers, + body: JSON.stringify({ q, context: context || undefined, history: history || undefined, mode: mode || undefined }), + }); + if (!res.ok || !res.body) throw Object.assign(new Error('stream failed'), { status: res.status }); + const reader = res.body.getReader(); + const dec = new TextDecoder(); + let buf = ''; + const handle = (block) => { + let ev = 'message', data = ''; + block.split('\n').forEach((ln) => { + if (ln.indexOf('event:') === 0) ev = ln.slice(6).trim(); + else if (ln.indexOf('data:') === 0) data += ln.slice(5).trim(); + }); + if (!data) return; + let obj; try { obj = JSON.parse(data); } catch (e) { return; } + if (ev === 'delta' && cbs.onDelta) cbs.onDelta(obj.t || ''); + else if (ev === 'meta' && cbs.onMeta) cbs.onMeta(obj); + else if (ev === 'done' && cbs.onDone) cbs.onDone(obj); + }; + for (;;) { + const { value, done } = await reader.read(); + if (done) break; + buf += dec.decode(value, { stream: true }); + let idx; + while ((idx = buf.indexOf('\n\n')) >= 0) { const block = buf.slice(0, idx); buf = buf.slice(idx + 2); if (block.trim()) handle(block); } + } + if (buf.trim()) handle(buf); +} async function assistantFlashcards(text, title, count) { return req('POST', '/assistant/flashcards', { text, title, count }); } async function assistantFeedback(rating, q) { return req('POST', '/assistant/feedback', { rating, q: q || undefined }); } async function assistantMemory() { return req('GET', '/assistant/memory'); }