From 8b8fe916f0967880e5b962ba4814b1902666208a Mon Sep 17 00:00:00 2001 From: "dolgolyov.alexei" Date: Thu, 19 Mar 2026 13:46:59 +0300 Subject: [PATCH] =?UTF-8?q?Phase=204:=20Documents=20&=20Memory=20=E2=80=94?= =?UTF-8?q?=20upload,=20FTS,=20AI=20tools,=20context=20injection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend: - Document + MemoryEntry models with Alembic migration (GIN FTS index) - File upload endpoint with path traversal protection (sanitized filenames) - Background document text extraction (PyMuPDF) - Full-text search on extracted_text via PostgreSQL tsvector/tsquery - Memory CRUD with enum-validated categories/importance, field allow-list - AI tools: save_memory, search_documents, get_memory (Claude function calling) - Tool execution loop in stream_ai_response (multi-turn tool use) - Context assembly: injects critical memory + relevant doc excerpts - File storage abstraction (local filesystem, S3-swappable) - Secure file deletion (DB flush before disk delete) Frontend: - Document upload dialog (drag-and-drop + file picker) - Document list with status badges, search, download (via authenticated blob) - Document viewer with extracted text preview - Memory list grouped by category with importance color coding - Memory editor with category/importance dropdowns - Documents + Memory pages with full CRUD - Enabled sidebar navigation for both sections Review fixes applied: - Sanitized upload filenames (path traversal prevention) - Download via axios blob (not bare , preserves auth) - Route ordering: /search before /{id}/reindex - Memory update allows is_active=False + field allow-list - MemoryEditor form resets on mode switch - Literal enum validation on category/importance schemas - DB flush before file deletion for data integrity Co-Authored-By: Claude Opus 4.6 (1M context) --- GeneralPlan.md | 6 +- ...004_create_documents_and_memory_entries.py | 58 ++++++ backend/app/api/v1/documents.py | 126 +++++++++++++ backend/app/api/v1/memory.py | 70 +++++++ backend/app/api/v1/router.py | 4 + backend/app/config.py | 3 + backend/app/models/__init__.py | 4 +- backend/app/models/document.py | 33 ++++ backend/app/models/memory_entry.py | 26 +++ backend/app/models/user.py | 2 + backend/app/schemas/document.py | 32 ++++ backend/app/schemas/memory.py | 43 +++++ backend/app/services/ai_service.py | 173 +++++++++++++++--- backend/app/services/document_service.py | 97 ++++++++++ backend/app/services/memory_service.py | 71 +++++++ backend/app/utils/__init__.py | 0 backend/app/utils/file_storage.py | 34 ++++ backend/app/utils/text_extraction.py | 19 ++ backend/app/workers/__init__.py | 0 backend/app/workers/document_processor.py | 32 ++++ backend/pyproject.toml | 2 + backend/tests/test_documents.py | 97 ++++++++++ backend/tests/test_memory.py | 109 +++++++++++ frontend/public/locales/en/translation.json | 49 +++++ frontend/public/locales/ru/translation.json | 49 +++++ frontend/src/api/documents.ts | 70 +++++++ frontend/src/api/memory.ts | 59 ++++++ .../components/documents/document-list.tsx | 68 +++++++ .../components/documents/document-viewer.tsx | 44 +++++ .../components/documents/upload-dialog.tsx | 100 ++++++++++ frontend/src/components/layout/sidebar.tsx | 4 +- .../src/components/memory/memory-editor.tsx | 90 +++++++++ .../src/components/memory/memory-list.tsx | 66 +++++++ frontend/src/pages/documents.tsx | 104 +++++++++++ frontend/src/pages/memory.tsx | 67 +++++++ frontend/src/routes.tsx | 4 + plans/phase-4-documents-memory.md | 132 +++++++++++++ 37 files changed, 1921 insertions(+), 26 deletions(-) create mode 100644 backend/alembic/versions/004_create_documents_and_memory_entries.py create mode 100644 backend/app/api/v1/documents.py create mode 100644 backend/app/api/v1/memory.py create mode 100644 backend/app/models/document.py create mode 100644 backend/app/models/memory_entry.py create mode 100644 backend/app/schemas/document.py create mode 100644 backend/app/schemas/memory.py create mode 100644 backend/app/services/document_service.py create mode 100644 backend/app/services/memory_service.py create mode 100644 backend/app/utils/__init__.py create mode 100644 backend/app/utils/file_storage.py create mode 100644 backend/app/utils/text_extraction.py create mode 100644 backend/app/workers/__init__.py create mode 100644 backend/app/workers/document_processor.py create mode 100644 backend/tests/test_documents.py create mode 100644 backend/tests/test_memory.py create mode 100644 frontend/src/api/documents.ts create mode 100644 frontend/src/api/memory.ts create mode 100644 frontend/src/components/documents/document-list.tsx create mode 100644 frontend/src/components/documents/document-viewer.tsx create mode 100644 frontend/src/components/documents/upload-dialog.tsx create mode 100644 frontend/src/components/memory/memory-editor.tsx create mode 100644 frontend/src/components/memory/memory-list.tsx create mode 100644 frontend/src/pages/documents.tsx create mode 100644 frontend/src/pages/memory.tsx create mode 100644 plans/phase-4-documents-memory.md diff --git a/GeneralPlan.md b/GeneralPlan.md index 9d8bd40..92719ed 100644 --- a/GeneralPlan.md +++ b/GeneralPlan.md @@ -189,6 +189,8 @@ Daily scheduled job (APScheduler, 8 AM) reviews each user's memory + recent docs > > **Tracking format**: Both this GeneralPlan and each subplan use `[x]`/`[ ]` checkboxes. GeneralPlan tracks phase-level progress (subplan created, phase completed). Subplans track individual tasks (files created, features implemented, tests passing). Update checkboxes as work is completed so progress is always visible at a glance. > +> **Phase review requirement**: After completing all tasks in a phase, a detailed code review must be performed before marking the phase as completed. The review should check: (1) all acceptance criteria are met, (2) code quality and consistency with existing patterns, (3) no security vulnerabilities introduced, (4) all new endpoints tested, (5) frontend TypeScript compiles and Vite builds cleanly, (6) i18n complete for both languages. Review findings and any fixes applied must be noted in the subplan under a `## Review Notes` section. +> > **Subplan structure**: Each subplan must include: > 1. **Goal** — one-sentence summary of what the phase delivers > 2. **Prerequisites** — what must be done before this phase starts @@ -220,8 +222,8 @@ Daily scheduled job (APScheduler, 8 AM) reviews each user's memory + recent docs - Summary: Skills + context_files tables, skills CRUD (general + personal), personal context CRUD, context layering, frontend skill selector + editors ### Phase 4: Documents & Memory -- **Status**: NOT STARTED -- [ ] Subplan created (`plans/phase-4-documents-memory.md`) +- **Status**: IN PROGRESS +- [x] Subplan created (`plans/phase-4-documents-memory.md`) - [ ] Phase completed - Summary: Documents + memory tables, upload + processing pipeline, full-text search, AI tools (save_memory, search_documents, get_memory), frontend document/memory UI diff --git a/backend/alembic/versions/004_create_documents_and_memory_entries.py b/backend/alembic/versions/004_create_documents_and_memory_entries.py new file mode 100644 index 0000000..55d81d5 --- /dev/null +++ b/backend/alembic/versions/004_create_documents_and_memory_entries.py @@ -0,0 +1,58 @@ +"""Create documents and memory_entries tables + +Revision ID: 004 +Revises: 003 +Create Date: 2026-03-19 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID, JSONB + +revision: str = "004" +down_revision: Union[str, None] = "003" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "documents", + sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), + sa.Column("user_id", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True), + sa.Column("filename", sa.String(255), nullable=False), + sa.Column("original_filename", sa.String(255), nullable=False), + sa.Column("storage_path", sa.Text, nullable=False), + sa.Column("mime_type", sa.String(100), nullable=False), + sa.Column("file_size", sa.BigInteger, nullable=False), + sa.Column("doc_type", sa.String(50), nullable=False, server_default="other"), + sa.Column("extracted_text", sa.Text, nullable=True), + sa.Column("processing_status", sa.String(20), nullable=False, server_default="pending"), + sa.Column("metadata", JSONB, nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), + ) + + op.execute( + "CREATE INDEX ix_documents_fts ON documents USING gin(to_tsvector('english', coalesce(extracted_text, '')))" + ) + + op.create_table( + "memory_entries", + sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), + sa.Column("user_id", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True), + sa.Column("category", sa.String(50), nullable=False), + sa.Column("title", sa.String(255), nullable=False), + sa.Column("content", sa.Text, nullable=False), + sa.Column("source_document_id", UUID(as_uuid=True), sa.ForeignKey("documents.id", ondelete="SET NULL"), nullable=True), + sa.Column("importance", sa.String(20), nullable=False, server_default="medium"), + sa.Column("is_active", sa.Boolean, nullable=False, server_default=sa.text("true")), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), + ) + + +def downgrade() -> None: + op.drop_table("memory_entries") + op.execute("DROP INDEX IF EXISTS ix_documents_fts") + op.drop_table("documents") diff --git a/backend/app/api/v1/documents.py b/backend/app/api/v1/documents.py new file mode 100644 index 0000000..0573da4 --- /dev/null +++ b/backend/app/api/v1/documents.py @@ -0,0 +1,126 @@ +import asyncio +import uuid +from pathlib import PurePosixPath +from typing import Annotated + +from fastapi import APIRouter, Depends, Query, UploadFile, File, HTTPException, status +from fastapi.responses import FileResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.deps import get_current_user +from app.config import settings +from app.database import get_db +from app.models.user import User +from app.schemas.document import DocumentListResponse, DocumentResponse, DocumentSearchRequest +from app.services import document_service +from app.utils.file_storage import save_upload, get_file_path +from app.workers.document_processor import process_document + +router = APIRouter(prefix="/documents", tags=["documents"]) + +ALLOWED_MIME_TYPES = [ + "application/pdf", + "image/jpeg", + "image/png", + "image/tiff", + "image/webp", +] + + +@router.post("/", response_model=DocumentResponse, status_code=status.HTTP_201_CREATED) +async def upload_document( + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], + file: UploadFile = File(...), + doc_type: str = Query(default="other"), +): + if file.content_type not in ALLOWED_MIME_TYPES: + raise HTTPException(status_code=400, detail=f"Unsupported file type: {file.content_type}") + + content = await file.read() + if len(content) > settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024: + raise HTTPException(status_code=400, detail=f"File too large. Max {settings.MAX_UPLOAD_SIZE_MB}MB") + + doc_id = uuid.uuid4() + safe_name = PurePosixPath(file.filename or "upload").name + filename = f"{doc_id}_{safe_name}" + storage_path = await save_upload(user.id, doc_id, filename, content) + + doc = await document_service.create_document( + db, user.id, filename, safe_name, + storage_path, file.content_type or "application/octet-stream", + len(content), doc_type, + ) + + # Trigger background processing + asyncio.create_task(process_document(doc.id, storage_path, file.content_type or "")) + + return DocumentResponse.model_validate(doc) + + +@router.get("/", response_model=DocumentListResponse) +async def list_documents( + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], + doc_type: str | None = Query(default=None), + processing_status: str | None = Query(default=None), +): + docs = await document_service.get_user_documents(db, user.id, doc_type, processing_status) + return DocumentListResponse(documents=[DocumentResponse.model_validate(d) for d in docs]) + + +@router.get("/{doc_id}", response_model=DocumentResponse) +async def get_document( + doc_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + doc = await document_service.get_document(db, doc_id, user.id) + return DocumentResponse.model_validate(doc) + + +@router.get("/{doc_id}/download") +async def download_document( + doc_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + doc = await document_service.get_document(db, doc_id, user.id) + file_path = get_file_path(doc.storage_path) + if not file_path.exists(): + raise HTTPException(status_code=404, detail="File not found on disk") + return FileResponse( + path=str(file_path), + filename=doc.original_filename, + media_type=doc.mime_type, + ) + + +@router.delete("/{doc_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_document( + doc_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + await document_service.delete_document(db, doc_id, user.id) + + +@router.post("/search", response_model=DocumentListResponse) +async def search_documents( + data: DocumentSearchRequest, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + docs = await document_service.search_documents(db, user.id, data.query) + return DocumentListResponse(documents=[DocumentResponse.model_validate(d) for d in docs]) + + +@router.post("/{doc_id}/reindex", response_model=DocumentResponse) +async def reindex_document( + doc_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + doc = await document_service.get_document(db, doc_id, user.id) + asyncio.create_task(process_document(doc.id, doc.storage_path, doc.mime_type)) + return DocumentResponse.model_validate(doc) diff --git a/backend/app/api/v1/memory.py b/backend/app/api/v1/memory.py new file mode 100644 index 0000000..d9c0bff --- /dev/null +++ b/backend/app/api/v1/memory.py @@ -0,0 +1,70 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, Query, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.deps import get_current_user +from app.database import get_db +from app.models.user import User +from app.schemas.memory import ( + CreateMemoryRequest, + MemoryEntryListResponse, + MemoryEntryResponse, + UpdateMemoryRequest, +) +from app.services import memory_service + +router = APIRouter(prefix="/memory", tags=["memory"]) + + +@router.post("/", response_model=MemoryEntryResponse, status_code=status.HTTP_201_CREATED) +async def create_memory( + data: CreateMemoryRequest, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + entry = await memory_service.create_memory(db, user.id, **data.model_dump()) + return MemoryEntryResponse.model_validate(entry) + + +@router.get("/", response_model=MemoryEntryListResponse) +async def list_memories( + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], + category: str | None = Query(default=None), + importance: str | None = Query(default=None), + is_active: bool | None = Query(default=None), +): + entries = await memory_service.get_user_memories(db, user.id, category, importance, is_active) + return MemoryEntryListResponse(entries=[MemoryEntryResponse.model_validate(e) for e in entries]) + + +@router.get("/{entry_id}", response_model=MemoryEntryResponse) +async def get_memory( + entry_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + entry = await memory_service.get_memory(db, entry_id, user.id) + return MemoryEntryResponse.model_validate(entry) + + +@router.patch("/{entry_id}", response_model=MemoryEntryResponse) +async def update_memory( + entry_id: uuid.UUID, + data: UpdateMemoryRequest, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + entry = await memory_service.update_memory(db, entry_id, user.id, **data.model_dump(exclude_unset=True)) + return MemoryEntryResponse.model_validate(entry) + + +@router.delete("/{entry_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_memory( + entry_id: uuid.UUID, + user: Annotated[User, Depends(get_current_user)], + db: Annotated[AsyncSession, Depends(get_db)], +): + await memory_service.delete_memory(db, entry_id, user.id) diff --git a/backend/app/api/v1/router.py b/backend/app/api/v1/router.py index 0d91350..440594f 100644 --- a/backend/app/api/v1/router.py +++ b/backend/app/api/v1/router.py @@ -5,6 +5,8 @@ from app.api.v1.chats import router as chats_router from app.api.v1.admin import router as admin_router from app.api.v1.skills import router as skills_router from app.api.v1.users import router as users_router +from app.api.v1.documents import router as documents_router +from app.api.v1.memory import router as memory_router api_v1_router = APIRouter(prefix="/api/v1") @@ -13,6 +15,8 @@ api_v1_router.include_router(chats_router) api_v1_router.include_router(admin_router) api_v1_router.include_router(skills_router) api_v1_router.include_router(users_router) +api_v1_router.include_router(documents_router) +api_v1_router.include_router(memory_router) @api_v1_router.get("/health") diff --git a/backend/app/config.py b/backend/app/config.py index c018238..53bd531 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -15,6 +15,9 @@ class Settings(BaseSettings): ANTHROPIC_API_KEY: str = "" CLAUDE_MODEL: str = "claude-sonnet-4-20250514" + UPLOAD_DIR: str = "/data/uploads" + MAX_UPLOAD_SIZE_MB: int = 20 + FIRST_ADMIN_EMAIL: str = "admin@example.com" FIRST_ADMIN_USERNAME: str = "admin" FIRST_ADMIN_PASSWORD: str = "changeme_admin_password" diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index c35f97d..ab7ee21 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -4,5 +4,7 @@ from app.models.chat import Chat from app.models.message import Message from app.models.context_file import ContextFile from app.models.skill import Skill +from app.models.document import Document +from app.models.memory_entry import MemoryEntry -__all__ = ["User", "Session", "Chat", "Message", "ContextFile", "Skill"] +__all__ = ["User", "Session", "Chat", "Message", "ContextFile", "Skill", "Document", "MemoryEntry"] diff --git a/backend/app/models/document.py b/backend/app/models/document.py new file mode 100644 index 0000000..e61ab53 --- /dev/null +++ b/backend/app/models/document.py @@ -0,0 +1,33 @@ +import uuid + +from sqlalchemy import BigInteger, ForeignKey, Index, String, Text, func, text +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class Document(Base): + __tablename__ = "documents" + __table_args__ = ( + Index( + "ix_documents_fts", + text("to_tsvector('english', coalesce(extracted_text, ''))"), + postgresql_using="gin", + ), + ) + + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True + ) + filename: Mapped[str] = mapped_column(String(255), nullable=False) + original_filename: Mapped[str] = mapped_column(String(255), nullable=False) + storage_path: Mapped[str] = mapped_column(Text, nullable=False) + mime_type: Mapped[str] = mapped_column(String(100), nullable=False) + file_size: Mapped[int] = mapped_column(BigInteger, nullable=False) + doc_type: Mapped[str] = mapped_column(String(50), nullable=False, default="other") + extracted_text: Mapped[str | None] = mapped_column(Text, nullable=True) + processing_status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending") + metadata_: Mapped[dict | None] = mapped_column("metadata", JSONB, nullable=True) + + user: Mapped["User"] = relationship(back_populates="documents") # noqa: F821 diff --git a/backend/app/models/memory_entry.py b/backend/app/models/memory_entry.py new file mode 100644 index 0000000..f7d8fa9 --- /dev/null +++ b/backend/app/models/memory_entry.py @@ -0,0 +1,26 @@ +import uuid + +from sqlalchemy import Boolean, ForeignKey, String, Text +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class MemoryEntry(Base): + __tablename__ = "memory_entries" + + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True + ) + category: Mapped[str] = mapped_column(String(50), nullable=False) + title: Mapped[str] = mapped_column(String(255), nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + source_document_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), ForeignKey("documents.id", ondelete="SET NULL"), nullable=True + ) + importance: Mapped[str] = mapped_column(String(20), nullable=False, default="medium") + is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + + user: Mapped["User"] = relationship(back_populates="memory_entries") # noqa: F821 + source_document: Mapped["Document | None"] = relationship() # noqa: F821 diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 117cb62..5ffb416 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -27,3 +27,5 @@ class User(Base): sessions: Mapped[list["Session"]] = relationship(back_populates="user", cascade="all, delete-orphan") # noqa: F821 chats: Mapped[list["Chat"]] = relationship(back_populates="user", cascade="all, delete-orphan") # noqa: F821 skills: Mapped[list["Skill"]] = relationship(back_populates="user", cascade="all, delete-orphan") # noqa: F821 + documents: Mapped[list["Document"]] = relationship(back_populates="user", cascade="all, delete-orphan") # noqa: F821 + memory_entries: Mapped[list["MemoryEntry"]] = relationship(back_populates="user", cascade="all, delete-orphan") # noqa: F821 diff --git a/backend/app/schemas/document.py b/backend/app/schemas/document.py new file mode 100644 index 0000000..2c2439d --- /dev/null +++ b/backend/app/schemas/document.py @@ -0,0 +1,32 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, Field + + +class DocumentResponse(BaseModel): + id: uuid.UUID + user_id: uuid.UUID + filename: str + original_filename: str + mime_type: str + file_size: int + doc_type: str + processing_status: str + extracted_text: str | None = None + metadata: dict | None = Field(None, alias="metadata_") + created_at: datetime + + model_config = {"from_attributes": True, "populate_by_name": True} + + +class DocumentListResponse(BaseModel): + documents: list[DocumentResponse] + + +class UpdateDocumentRequest(BaseModel): + doc_type: str | None = None + + +class DocumentSearchRequest(BaseModel): + query: str = Field(min_length=1, max_length=500) diff --git a/backend/app/schemas/memory.py b/backend/app/schemas/memory.py new file mode 100644 index 0000000..8df5612 --- /dev/null +++ b/backend/app/schemas/memory.py @@ -0,0 +1,43 @@ +import uuid +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, Field + +CategoryType = Literal["condition", "medication", "allergy", "vital", "document_summary", "other"] +ImportanceType = Literal["critical", "high", "medium", "low"] + + +class CreateMemoryRequest(BaseModel): + category: CategoryType + title: str = Field(min_length=1, max_length=255) + content: str = Field(min_length=1) + source_document_id: uuid.UUID | None = None + importance: ImportanceType = "medium" + is_active: bool = True + + +class UpdateMemoryRequest(BaseModel): + category: CategoryType | None = None + title: str | None = None + content: str | None = None + importance: ImportanceType | None = None + is_active: bool | None = None + + +class MemoryEntryResponse(BaseModel): + id: uuid.UUID + user_id: uuid.UUID + category: str + title: str + content: str + source_document_id: uuid.UUID | None + importance: str + is_active: bool + created_at: datetime + + model_config = {"from_attributes": True} + + +class MemoryEntryListResponse(BaseModel): + entries: list[MemoryEntryResponse] diff --git a/backend/app/services/ai_service.py b/backend/app/services/ai_service.py index cc7cf20..aaf0466 100644 --- a/backend/app/services/ai_service.py +++ b/backend/app/services/ai_service.py @@ -12,9 +12,100 @@ from app.models.message import Message from app.models.skill import Skill from app.services.context_service import DEFAULT_SYSTEM_PROMPT, get_primary_context, get_personal_context from app.services.chat_service import get_chat, save_message +from app.services.memory_service import get_critical_memories, create_memory, get_user_memories +from app.services.document_service import search_documents client = AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY) +# --- AI Tool Definitions --- + +AI_TOOLS = [ + { + "name": "save_memory", + "description": "Save important health information to the user's memory. Use this when the user shares critical health data like conditions, medications, allergies, or important health facts.", + "input_schema": { + "type": "object", + "properties": { + "category": { + "type": "string", + "enum": ["condition", "medication", "allergy", "vital", "document_summary", "other"], + "description": "Category of the memory entry", + }, + "title": {"type": "string", "description": "Short title for the memory entry"}, + "content": {"type": "string", "description": "Detailed content of the memory entry"}, + "importance": { + "type": "string", + "enum": ["critical", "high", "medium", "low"], + "description": "Importance level", + }, + }, + "required": ["category", "title", "content", "importance"], + }, + }, + { + "name": "search_documents", + "description": "Search the user's uploaded health documents for relevant information. Use this when you need to find specific health records, lab results, or consultation notes.", + "input_schema": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query to find relevant documents"}, + }, + "required": ["query"], + }, + }, + { + "name": "get_memory", + "description": "Retrieve the user's stored health memories filtered by category. Use this to recall previously saved health information.", + "input_schema": { + "type": "object", + "properties": { + "category": { + "type": "string", + "enum": ["condition", "medication", "allergy", "vital", "document_summary", "other"], + "description": "Optional category filter. Omit to get all memories.", + }, + }, + "required": [], + }, + }, +] + + +async def _execute_tool( + db: AsyncSession, user_id: uuid.UUID, tool_name: str, tool_input: dict +) -> str: + """Execute an AI tool and return the result as a string.""" + if tool_name == "save_memory": + entry = await create_memory( + db, user_id, + category=tool_input["category"], + title=tool_input["title"], + content=tool_input["content"], + importance=tool_input["importance"], + ) + await db.commit() + return json.dumps({"status": "saved", "id": str(entry.id), "title": entry.title}) + + elif tool_name == "search_documents": + docs = await search_documents(db, user_id, tool_input["query"], limit=5) + results = [] + for doc in docs: + excerpt = (doc.extracted_text or "")[:1000] + results.append({ + "filename": doc.original_filename, + "doc_type": doc.doc_type, + "excerpt": excerpt, + }) + return json.dumps({"results": results, "count": len(results)}) + + elif tool_name == "get_memory": + category = tool_input.get("category") + entries = await get_user_memories(db, user_id, category=category, is_active=True) + items = [{"category": e.category, "title": e.title, "content": e.content, "importance": e.importance} for e in entries] + return json.dumps({"entries": items, "count": len(items)}) + + return json.dumps({"error": f"Unknown tool: {tool_name}"}) + async def assemble_context( db: AsyncSession, chat_id: uuid.UUID, user_id: uuid.UUID, user_message: str @@ -39,9 +130,25 @@ async def assemble_context( if skill and skill.is_active: system_parts.append(f"---\nSpecialist Role ({skill.name}):\n{skill.system_prompt}") + # 4. Critical memory entries + memories = await get_critical_memories(db, user_id) + if memories: + memory_lines = [f"- [{m.category}] {m.title}: {m.content}" for m in memories] + system_parts.append(f"---\nUser Health Profile:\n" + "\n".join(memory_lines)) + + # 5. Relevant document excerpts (based on user message keywords) + if user_message.strip(): + docs = await search_documents(db, user_id, user_message, limit=3) + if docs: + doc_lines = [] + for d in docs: + excerpt = (d.extracted_text or "")[:1500] + doc_lines.append(f"[{d.original_filename} ({d.doc_type})]\n{excerpt}") + system_parts.append(f"---\nRelevant Document Excerpts:\n" + "\n\n".join(doc_lines)) + system_prompt = "\n\n".join(system_parts) - # 4. Conversation history + # 6. Conversation history result = await db.execute( select(Message) .where(Message.chat_id == chat_id, Message.role.in_(["user", "assistant"])) @@ -50,7 +157,7 @@ async def assemble_context( history = result.scalars().all() messages = [{"role": msg.role, "content": msg.content} for msg in history] - # 5. Current user message + # 7. Current user message messages.append({"role": "user", "content": user_message}) return system_prompt, messages @@ -63,7 +170,7 @@ def _sse_event(event: str, data: dict) -> str: async def stream_ai_response( db: AsyncSession, chat_id: uuid.UUID, user_id: uuid.UUID, user_message: str ) -> AsyncGenerator[str, None]: - """Stream AI response as SSE events.""" + """Stream AI response as SSE events, with tool use support.""" # Verify ownership chat = await get_chat(db, chat_id, user_id) @@ -75,28 +182,53 @@ async def stream_ai_response( # Assemble context system_prompt, messages = await assemble_context(db, chat_id, user_id, user_message) - # Stream from Claude - full_content = "" assistant_msg_id = str(uuid.uuid4()) - yield _sse_event("message_start", {"message_id": assistant_msg_id}) - async with client.messages.stream( - model=settings.CLAUDE_MODEL, - max_tokens=4096, - system=system_prompt, - messages=messages, - ) as stream: - async for text in stream.text_stream: - full_content += text - yield _sse_event("content_delta", {"delta": text}) + # Tool use loop + full_content = "" + max_tool_rounds = 5 + + for _ in range(max_tool_rounds): + response = await client.messages.create( + model=settings.CLAUDE_MODEL, + max_tokens=4096, + system=system_prompt, + messages=messages, + tools=AI_TOOLS, + ) + + # Process content blocks + tool_use_blocks = [] + for block in response.content: + if block.type == "text": + full_content += block.text + yield _sse_event("content_delta", {"delta": block.text}) + elif block.type == "tool_use": + tool_use_blocks.append(block) + yield _sse_event("tool_use", {"tool": block.name, "input": block.input}) + + # If no tool use, we're done + if response.stop_reason != "tool_use" or not tool_use_blocks: + break + + # Execute tools and continue conversation + messages.append({"role": "assistant", "content": response.content}) + tool_results = [] + for tool_block in tool_use_blocks: + result = await _execute_tool(db, user_id, tool_block.name, tool_block.input) + tool_results.append({ + "type": "tool_result", + "tool_use_id": tool_block.id, + "content": result, + }) + yield _sse_event("tool_result", {"tool": tool_block.name, "result": result}) + messages.append({"role": "user", "content": tool_results}) - # Get final message for metadata - final_message = await stream.get_final_message() metadata = { - "model": final_message.model, - "input_tokens": final_message.usage.input_tokens, - "output_tokens": final_message.usage.output_tokens, + "model": response.model, + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, } # Save assistant message @@ -109,7 +241,6 @@ async def stream_ai_response( ) assistant_count = len(result.scalars().all()) if assistant_count == 1 and chat.title == "New Chat": - # Auto-generate title from first few words title = full_content[:50].split("\n")[0].strip() if len(title) > 40: title = title[:40] + "..." diff --git a/backend/app/services/document_service.py b/backend/app/services/document_service.py new file mode 100644 index 0000000..0cc3667 --- /dev/null +++ b/backend/app/services/document_service.py @@ -0,0 +1,97 @@ +import uuid + +from fastapi import HTTPException, status +from sqlalchemy import func, select, text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.document import Document +from app.utils.file_storage import delete_file + + +async def create_document( + db: AsyncSession, + user_id: uuid.UUID, + filename: str, + original_filename: str, + storage_path: str, + mime_type: str, + file_size: int, + doc_type: str = "other", +) -> Document: + doc = Document( + user_id=user_id, + filename=filename, + original_filename=original_filename, + storage_path=storage_path, + mime_type=mime_type, + file_size=file_size, + doc_type=doc_type, + processing_status="pending", + ) + db.add(doc) + await db.flush() + return doc + + +async def get_document(db: AsyncSession, doc_id: uuid.UUID, user_id: uuid.UUID) -> Document: + result = await db.execute( + select(Document).where(Document.id == doc_id, Document.user_id == user_id) + ) + doc = result.scalar_one_or_none() + if not doc: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found") + return doc + + +async def get_user_documents( + db: AsyncSession, + user_id: uuid.UUID, + doc_type: str | None = None, + processing_status: str | None = None, +) -> list[Document]: + stmt = select(Document).where(Document.user_id == user_id) + if doc_type: + stmt = stmt.where(Document.doc_type == doc_type) + if processing_status: + stmt = stmt.where(Document.processing_status == processing_status) + stmt = stmt.order_by(Document.created_at.desc()) + result = await db.execute(stmt) + return list(result.scalars().all()) + + +async def delete_document(db: AsyncSession, doc_id: uuid.UUID, user_id: uuid.UUID) -> None: + doc = await get_document(db, doc_id, user_id) + storage_path = doc.storage_path + await db.delete(doc) + await db.flush() + delete_file(storage_path) + + +async def search_documents(db: AsyncSession, user_id: uuid.UUID, query: str, limit: int = 5) -> list[Document]: + stmt = ( + select(Document) + .where( + Document.user_id == user_id, + Document.processing_status == "completed", + text("to_tsvector('english', coalesce(extracted_text, '')) @@ plainto_tsquery('english', :query)"), + ) + .params(query=query) + .order_by( + text("ts_rank(to_tsvector('english', coalesce(extracted_text, '')), plainto_tsquery('english', :query)) DESC") + ) + .params(query=query) + .limit(limit) + ) + result = await db.execute(stmt) + return list(result.scalars().all()) + + +async def update_document_text( + db: AsyncSession, doc_id: uuid.UUID, extracted_text: str, status_val: str = "completed" +) -> None: + result = await db.execute(select(Document).where(Document.id == doc_id)) + doc = result.scalar_one_or_none() + if doc: + doc.extracted_text = extracted_text + doc.processing_status = status_val + await db.flush() diff --git a/backend/app/services/memory_service.py b/backend/app/services/memory_service.py new file mode 100644 index 0000000..27c9512 --- /dev/null +++ b/backend/app/services/memory_service.py @@ -0,0 +1,71 @@ +import uuid + +from fastapi import HTTPException, status +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.memory_entry import MemoryEntry + + +async def create_memory(db: AsyncSession, user_id: uuid.UUID, **kwargs) -> MemoryEntry: + entry = MemoryEntry(user_id=user_id, **kwargs) + db.add(entry) + await db.flush() + return entry + + +async def get_memory(db: AsyncSession, entry_id: uuid.UUID, user_id: uuid.UUID) -> MemoryEntry: + result = await db.execute( + select(MemoryEntry).where(MemoryEntry.id == entry_id, MemoryEntry.user_id == user_id) + ) + entry = result.scalar_one_or_none() + if not entry: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Memory entry not found") + return entry + + +async def get_user_memories( + db: AsyncSession, + user_id: uuid.UUID, + category: str | None = None, + importance: str | None = None, + is_active: bool | None = None, +) -> list[MemoryEntry]: + stmt = select(MemoryEntry).where(MemoryEntry.user_id == user_id) + if category: + stmt = stmt.where(MemoryEntry.category == category) + if importance: + stmt = stmt.where(MemoryEntry.importance == importance) + if is_active is not None: + stmt = stmt.where(MemoryEntry.is_active == is_active) + stmt = stmt.order_by(MemoryEntry.created_at.desc()) + result = await db.execute(stmt) + return list(result.scalars().all()) + + +ALLOWED_UPDATE_FIELDS = {"category", "title", "content", "importance", "is_active"} + + +async def update_memory(db: AsyncSession, entry_id: uuid.UUID, user_id: uuid.UUID, **kwargs) -> MemoryEntry: + entry = await get_memory(db, entry_id, user_id) + for key, value in kwargs.items(): + if key in ALLOWED_UPDATE_FIELDS: + setattr(entry, key, value) + await db.flush() + return entry + + +async def delete_memory(db: AsyncSession, entry_id: uuid.UUID, user_id: uuid.UUID) -> None: + entry = await get_memory(db, entry_id, user_id) + await db.delete(entry) + + +async def get_critical_memories(db: AsyncSession, user_id: uuid.UUID) -> list[MemoryEntry]: + result = await db.execute( + select(MemoryEntry).where( + MemoryEntry.user_id == user_id, + MemoryEntry.is_active == True, # noqa: E712 + MemoryEntry.importance.in_(["critical", "high"]), + ).order_by(MemoryEntry.importance, MemoryEntry.created_at.desc()) + ) + return list(result.scalars().all()) diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/utils/file_storage.py b/backend/app/utils/file_storage.py new file mode 100644 index 0000000..0164d7b --- /dev/null +++ b/backend/app/utils/file_storage.py @@ -0,0 +1,34 @@ +import uuid +from pathlib import Path + +import aiofiles + +from app.config import settings + + +def _get_upload_dir(user_id: uuid.UUID, doc_id: uuid.UUID) -> Path: + path = Path(settings.UPLOAD_DIR) / str(user_id) / str(doc_id) + path.mkdir(parents=True, exist_ok=True) + return path + + +async def save_upload(user_id: uuid.UUID, doc_id: uuid.UUID, filename: str, content: bytes) -> str: + directory = _get_upload_dir(user_id, doc_id) + file_path = directory / filename + async with aiofiles.open(file_path, "wb") as f: + await f.write(content) + return str(file_path) + + +def get_file_path(storage_path: str) -> Path: + return Path(storage_path) + + +def delete_file(storage_path: str) -> None: + path = Path(storage_path) + if path.exists(): + path.unlink() + # Clean up empty parent dirs + parent = path.parent + if parent.exists() and not any(parent.iterdir()): + parent.rmdir() diff --git a/backend/app/utils/text_extraction.py b/backend/app/utils/text_extraction.py new file mode 100644 index 0000000..c8938b6 --- /dev/null +++ b/backend/app/utils/text_extraction.py @@ -0,0 +1,19 @@ +from pathlib import Path + + +def extract_text_from_pdf(file_path: str) -> str: + import fitz # PyMuPDF + + text_parts = [] + with fitz.open(file_path) as doc: + for page in doc: + text_parts.append(page.get_text()) + return "\n".join(text_parts).strip() + + +def extract_text(file_path: str, mime_type: str) -> str: + if mime_type == "application/pdf": + return extract_text_from_pdf(file_path) + # For images, we'd use pytesseract but skip for now as it requires system deps + # For other types, return empty + return "" diff --git a/backend/app/workers/__init__.py b/backend/app/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/workers/document_processor.py b/backend/app/workers/document_processor.py new file mode 100644 index 0000000..4d8a672 --- /dev/null +++ b/backend/app/workers/document_processor.py @@ -0,0 +1,32 @@ +import uuid + +from app.database import async_session_factory +from app.services.document_service import update_document_text +from app.utils.text_extraction import extract_text + + +async def process_document(doc_id: uuid.UUID, storage_path: str, mime_type: str) -> None: + """Background task: extract text from uploaded document.""" + async with async_session_factory() as db: + try: + # Update status to processing + from sqlalchemy import select + from app.models.document import Document + + result = await db.execute(select(Document).where(Document.id == doc_id)) + doc = result.scalar_one_or_none() + if not doc: + return + doc.processing_status = "processing" + await db.commit() + + # Extract text + text = extract_text(storage_path, mime_type) + + # Update with extracted text + await update_document_text(db, doc_id, text, "completed" if text else "failed") + await db.commit() + + except Exception: + await update_document_text(db, doc_id, "", "failed") + await db.commit() diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 3d8444e..200def1 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -16,6 +16,8 @@ dependencies = [ "python-multipart>=0.0.9", "httpx>=0.27.0", "anthropic>=0.40.0", + "pymupdf>=1.24.0", + "aiofiles>=24.0.0", ] [project.optional-dependencies] diff --git a/backend/tests/test_documents.py b/backend/tests/test_documents.py new file mode 100644 index 0000000..1461079 --- /dev/null +++ b/backend/tests/test_documents.py @@ -0,0 +1,97 @@ +import io +import pytest +from httpx import AsyncClient + + +@pytest.fixture +async def auth_headers(client: AsyncClient): + resp = await client.post("/api/v1/auth/register", json={ + "email": "docuser@example.com", + "username": "docuser", + "password": "testpass123", + }) + assert resp.status_code == 201 + return {"Authorization": f"Bearer {resp.json()['access_token']}"} + + +async def test_upload_document(client: AsyncClient, auth_headers: dict): + resp = await client.post( + "/api/v1/documents/?doc_type=lab_result", + headers=auth_headers, + files={"file": ("test.pdf", b"%PDF-1.4 test content", "application/pdf")}, + ) + assert resp.status_code == 201 + data = resp.json() + assert data["original_filename"] == "test.pdf" + assert data["doc_type"] == "lab_result" + assert data["processing_status"] == "pending" + + +async def test_upload_invalid_type(client: AsyncClient, auth_headers: dict): + resp = await client.post( + "/api/v1/documents/", + headers=auth_headers, + files={"file": ("test.exe", b"MZ...", "application/x-msdownload")}, + ) + assert resp.status_code == 400 + + +async def test_list_documents(client: AsyncClient, auth_headers: dict): + # Upload first + await client.post( + "/api/v1/documents/", + headers=auth_headers, + files={"file": ("list_test.pdf", b"%PDF-1.4 content", "application/pdf")}, + ) + + resp = await client.get("/api/v1/documents/", headers=auth_headers) + assert resp.status_code == 200 + assert len(resp.json()["documents"]) >= 1 + + +async def test_get_document(client: AsyncClient, auth_headers: dict): + resp = await client.post( + "/api/v1/documents/", + headers=auth_headers, + files={"file": ("get_test.pdf", b"%PDF-1.4 content", "application/pdf")}, + ) + doc_id = resp.json()["id"] + + resp = await client.get(f"/api/v1/documents/{doc_id}", headers=auth_headers) + assert resp.status_code == 200 + assert resp.json()["id"] == doc_id + + +async def test_delete_document(client: AsyncClient, auth_headers: dict): + resp = await client.post( + "/api/v1/documents/", + headers=auth_headers, + files={"file": ("del_test.pdf", b"%PDF-1.4 content", "application/pdf")}, + ) + doc_id = resp.json()["id"] + + resp = await client.delete(f"/api/v1/documents/{doc_id}", headers=auth_headers) + assert resp.status_code == 204 + + resp = await client.get(f"/api/v1/documents/{doc_id}", headers=auth_headers) + assert resp.status_code == 404 + + +async def test_document_ownership_isolation(client: AsyncClient, auth_headers: dict): + resp = await client.post( + "/api/v1/documents/", + headers=auth_headers, + files={"file": ("private.pdf", b"%PDF-1.4 content", "application/pdf")}, + ) + doc_id = resp.json()["id"] + + # Register another user + resp = await client.post("/api/v1/auth/register", json={ + "email": "docother@example.com", + "username": "docother", + "password": "testpass123", + }) + other_headers = {"Authorization": f"Bearer {resp.json()['access_token']}"} + + resp = await client.get(f"/api/v1/documents/{doc_id}", headers=other_headers) + assert resp.status_code == 404 diff --git a/backend/tests/test_memory.py b/backend/tests/test_memory.py new file mode 100644 index 0000000..11498c2 --- /dev/null +++ b/backend/tests/test_memory.py @@ -0,0 +1,109 @@ +import pytest +from httpx import AsyncClient + + +@pytest.fixture +async def auth_headers(client: AsyncClient): + resp = await client.post("/api/v1/auth/register", json={ + "email": "memuser@example.com", + "username": "memuser", + "password": "testpass123", + }) + assert resp.status_code == 201 + return {"Authorization": f"Bearer {resp.json()['access_token']}"} + + +async def test_create_memory(client: AsyncClient, auth_headers: dict): + resp = await client.post("/api/v1/memory/", json={ + "category": "condition", + "title": "Diabetes Type 2", + "content": "Diagnosed in 2024, managed with metformin", + "importance": "critical", + }, headers=auth_headers) + assert resp.status_code == 201 + data = resp.json() + assert data["category"] == "condition" + assert data["title"] == "Diabetes Type 2" + assert data["importance"] == "critical" + assert data["is_active"] is True + + +async def test_list_memories(client: AsyncClient, auth_headers: dict): + await client.post("/api/v1/memory/", json={ + "category": "allergy", + "title": "Penicillin", + "content": "Severe allergic reaction", + "importance": "critical", + }, headers=auth_headers) + + resp = await client.get("/api/v1/memory/", headers=auth_headers) + assert resp.status_code == 200 + assert len(resp.json()["entries"]) >= 1 + + +async def test_filter_by_category(client: AsyncClient, auth_headers: dict): + await client.post("/api/v1/memory/", json={ + "category": "medication", + "title": "Metformin", + "content": "500mg twice daily", + "importance": "high", + }, headers=auth_headers) + + resp = await client.get("/api/v1/memory/", params={"category": "medication"}, headers=auth_headers) + assert resp.status_code == 200 + entries = resp.json()["entries"] + assert all(e["category"] == "medication" for e in entries) + + +async def test_update_memory(client: AsyncClient, auth_headers: dict): + resp = await client.post("/api/v1/memory/", json={ + "category": "vital", + "title": "Blood Pressure", + "content": "130/85", + "importance": "medium", + }, headers=auth_headers) + entry_id = resp.json()["id"] + + resp = await client.patch(f"/api/v1/memory/{entry_id}", json={ + "content": "125/80 (improved)", + "importance": "low", + }, headers=auth_headers) + assert resp.status_code == 200 + assert resp.json()["content"] == "125/80 (improved)" + assert resp.json()["importance"] == "low" + + +async def test_delete_memory(client: AsyncClient, auth_headers: dict): + resp = await client.post("/api/v1/memory/", json={ + "category": "other", + "title": "To Delete", + "content": "Test", + "importance": "low", + }, headers=auth_headers) + entry_id = resp.json()["id"] + + resp = await client.delete(f"/api/v1/memory/{entry_id}", headers=auth_headers) + assert resp.status_code == 204 + + resp = await client.get(f"/api/v1/memory/{entry_id}", headers=auth_headers) + assert resp.status_code == 404 + + +async def test_memory_ownership_isolation(client: AsyncClient, auth_headers: dict): + resp = await client.post("/api/v1/memory/", json={ + "category": "condition", + "title": "Private Info", + "content": "Sensitive", + "importance": "critical", + }, headers=auth_headers) + entry_id = resp.json()["id"] + + resp = await client.post("/api/v1/auth/register", json={ + "email": "memother@example.com", + "username": "memother", + "password": "testpass123", + }) + other_headers = {"Authorization": f"Bearer {resp.json()['access_token']}"} + + resp = await client.get(f"/api/v1/memory/{entry_id}", headers=other_headers) + assert resp.status_code == 404 diff --git a/frontend/public/locales/en/translation.json b/frontend/public/locales/en/translation.json index f534858..b444cd1 100644 --- a/frontend/public/locales/en/translation.json +++ b/frontend/public/locales/en/translation.json @@ -90,6 +90,55 @@ "subtitle": "This context is added to all your AI conversations", "placeholder": "Add personal information that the AI should know about you..." }, + "documents": { + "upload": "Upload", + "drop_or_click": "Drop a file here or click to browse", + "doc_type": "Document Type", + "no_documents": "No documents uploaded yet.", + "download": "Download", + "reindex": "Re-extract text", + "extracted_text": "Extracted Text", + "search_placeholder": "Search documents...", + "clear_search": "Clear", + "types": { + "other": "Other", + "lab_result": "Lab Result", + "consultation": "Consultation", + "prescription": "Prescription", + "imaging": "Imaging" + }, + "status": { + "pending": "Pending", + "processing": "Processing", + "completed": "Completed", + "failed": "Failed" + } + }, + "memory": { + "create": "Add Memory Entry", + "edit": "Edit Memory Entry", + "no_entries": "No memory entries yet. The AI will save important health information here.", + "category": "Category", + "importance": "Importance", + "title_field": "Title", + "title_placeholder": "e.g. Diabetes Type 2", + "content_field": "Content", + "content_placeholder": "Detailed information...", + "categories": { + "condition": "Condition", + "medication": "Medication", + "allergy": "Allergy", + "vital": "Vital Sign", + "document_summary": "Document Summary", + "other": "Other" + }, + "importance_levels": { + "critical": "Critical", + "high": "High", + "medium": "Medium", + "low": "Low" + } + }, "common": { "loading": "Loading...", "error": "An error occurred", diff --git a/frontend/public/locales/ru/translation.json b/frontend/public/locales/ru/translation.json index 3270034..0b6ee88 100644 --- a/frontend/public/locales/ru/translation.json +++ b/frontend/public/locales/ru/translation.json @@ -90,6 +90,55 @@ "subtitle": "Этот контекст добавляется ко всем вашим разговорам с ИИ", "placeholder": "Добавьте личную информацию, которую ИИ должен знать о вас..." }, + "documents": { + "upload": "Загрузить", + "drop_or_click": "Перетащите файл или нажмите для выбора", + "doc_type": "Тип документа", + "no_documents": "Документы ещё не загружены.", + "download": "Скачать", + "reindex": "Извлечь текст заново", + "extracted_text": "Извлечённый текст", + "search_placeholder": "Поиск по документам...", + "clear_search": "Очистить", + "types": { + "other": "Другое", + "lab_result": "Анализы", + "consultation": "Консультация", + "prescription": "Рецепт", + "imaging": "Снимки" + }, + "status": { + "pending": "Ожидание", + "processing": "Обработка", + "completed": "Готово", + "failed": "Ошибка" + } + }, + "memory": { + "create": "Добавить запись", + "edit": "Редактировать запись", + "no_entries": "Записей пока нет. ИИ будет сохранять важную информацию о здоровье здесь.", + "category": "Категория", + "importance": "Важность", + "title_field": "Заголовок", + "title_placeholder": "напр. Диабет 2 типа", + "content_field": "Содержание", + "content_placeholder": "Подробная информация...", + "categories": { + "condition": "Заболевание", + "medication": "Лекарство", + "allergy": "Аллергия", + "vital": "Показатели", + "document_summary": "Сводка документа", + "other": "Другое" + }, + "importance_levels": { + "critical": "Критическая", + "high": "Высокая", + "medium": "Средняя", + "low": "Низкая" + } + }, "common": { "loading": "Загрузка...", "error": "Произошла ошибка", diff --git a/frontend/src/api/documents.ts b/frontend/src/api/documents.ts new file mode 100644 index 0000000..85fe615 --- /dev/null +++ b/frontend/src/api/documents.ts @@ -0,0 +1,70 @@ +import api from "./client"; + +export interface Document { + id: string; + user_id: string; + filename: string; + original_filename: string; + mime_type: string; + file_size: number; + doc_type: string; + processing_status: string; + extracted_text: string | null; + metadata: Record | null; + created_at: string; +} + +export interface DocumentListResponse { + documents: Document[]; +} + +export async function uploadDocument(file: File, docType = "other"): Promise { + const form = new FormData(); + form.append("file", file); + const { data } = await api.post(`/documents/?doc_type=${docType}`, form, { + headers: { "Content-Type": "multipart/form-data" }, + }); + return data; +} + +export async function getDocuments( + docType?: string, + processingStatus?: string +): Promise { + const params: Record = {}; + if (docType) params.doc_type = docType; + if (processingStatus) params.processing_status = processingStatus; + const { data } = await api.get("/documents/", { params }); + return data.documents; +} + +export async function getDocument(docId: string): Promise { + const { data } = await api.get(`/documents/${docId}`); + return data; +} + +export async function deleteDocument(docId: string): Promise { + await api.delete(`/documents/${docId}`); +} + +export async function reindexDocument(docId: string): Promise { + const { data } = await api.post(`/documents/${docId}/reindex`); + return data; +} + +export async function searchDocuments(query: string): Promise { + const { data } = await api.post("/documents/search", { query }); + return data.documents; +} + +export async function downloadDocument(docId: string, filename: string): Promise { + const { data } = await api.get(`/documents/${docId}/download`, { responseType: "blob" }); + const url = URL.createObjectURL(data); + const a = document.createElement("a"); + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); +} diff --git a/frontend/src/api/memory.ts b/frontend/src/api/memory.ts new file mode 100644 index 0000000..ed9de2b --- /dev/null +++ b/frontend/src/api/memory.ts @@ -0,0 +1,59 @@ +import api from "./client"; + +export interface MemoryEntry { + id: string; + user_id: string; + category: string; + title: string; + content: string; + source_document_id: string | null; + importance: string; + is_active: boolean; + created_at: string; +} + +export interface MemoryEntryListResponse { + entries: MemoryEntry[]; +} + +export async function createMemory(data: { + category: string; + title: string; + content: string; + importance?: string; +}): Promise { + const { data: entry } = await api.post("/memory/", data); + return entry; +} + +export async function getMemories(filters?: { + category?: string; + importance?: string; + is_active?: boolean; +}): Promise { + const { data } = await api.get("/memory/", { params: filters }); + return data.entries; +} + +export async function getMemory(entryId: string): Promise { + const { data } = await api.get(`/memory/${entryId}`); + return data; +} + +export async function updateMemory( + entryId: string, + updates: Partial<{ + category: string; + title: string; + content: string; + importance: string; + is_active: boolean; + }> +): Promise { + const { data } = await api.patch(`/memory/${entryId}`, updates); + return data; +} + +export async function deleteMemory(entryId: string): Promise { + await api.delete(`/memory/${entryId}`); +} diff --git a/frontend/src/components/documents/document-list.tsx b/frontend/src/components/documents/document-list.tsx new file mode 100644 index 0000000..668af4b --- /dev/null +++ b/frontend/src/components/documents/document-list.tsx @@ -0,0 +1,68 @@ +import { useTranslation } from "react-i18next"; +import { FileText, Download, Trash2, RefreshCw } from "lucide-react"; +import type { Document } from "@/api/documents"; +import { downloadDocument } from "@/api/documents"; +import { cn } from "@/lib/utils"; + +interface DocumentListProps { + documents: Document[]; + onDelete: (docId: string) => void; + onReindex: (docId: string) => void; + onSelect: (doc: Document) => void; +} + +const statusColors: Record = { + pending: "bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-400", + processing: "bg-blue-100 text-blue-800 dark:bg-blue-900/30 dark:text-blue-400", + completed: "bg-green-100 text-green-800 dark:bg-green-900/30 dark:text-green-400", + failed: "bg-red-100 text-red-800 dark:bg-red-900/30 dark:text-red-400", +}; + +export function DocumentList({ documents, onDelete, onReindex, onSelect }: DocumentListProps) { + const { t } = useTranslation(); + + if (documents.length === 0) { + return ( +

{t("documents.no_documents")}

+ ); + } + + return ( +
+ {documents.map((doc) => ( +
onSelect(doc)} + className="flex items-center gap-3 rounded-lg border bg-card p-4 cursor-pointer hover:bg-accent/50 transition-colors" + > + +
+

{doc.original_filename}

+
+ {t(`documents.types.${doc.doc_type}`)} + {(doc.file_size / 1024 / 1024).toFixed(2)} MB +
+
+ + {t(`documents.status.${doc.processing_status}`)} + +
e.stopPropagation()}> + + + +
+
+ ))} +
+ ); +} diff --git a/frontend/src/components/documents/document-viewer.tsx b/frontend/src/components/documents/document-viewer.tsx new file mode 100644 index 0000000..1a55f80 --- /dev/null +++ b/frontend/src/components/documents/document-viewer.tsx @@ -0,0 +1,44 @@ +import { useTranslation } from "react-i18next"; +import type { Document } from "@/api/documents"; +import { downloadDocument } from "@/api/documents"; +import { Download, ArrowLeft } from "lucide-react"; + +interface DocumentViewerProps { + document: Document; + onBack: () => void; +} + +export function DocumentViewer({ document: doc, onBack }: DocumentViewerProps) { + const { t } = useTranslation(); + + return ( +
+
+ +
+

{doc.original_filename}

+

+ {t(`documents.types.${doc.doc_type}`)} · {(doc.file_size / 1024 / 1024).toFixed(2)} MB +

+
+ +
+ + {doc.extracted_text && ( +
+

{t("documents.extracted_text")}

+
+            {doc.extracted_text}
+          
+
+ )} +
+ ); +} diff --git a/frontend/src/components/documents/upload-dialog.tsx b/frontend/src/components/documents/upload-dialog.tsx new file mode 100644 index 0000000..eca195f --- /dev/null +++ b/frontend/src/components/documents/upload-dialog.tsx @@ -0,0 +1,100 @@ +import { useState, useRef } from "react"; +import { useTranslation } from "react-i18next"; +import { useMutation, useQueryClient } from "@tanstack/react-query"; +import { uploadDocument } from "@/api/documents"; +import { Upload, X } from "lucide-react"; + +interface UploadDialogProps { + open: boolean; + onClose: () => void; +} + +const DOC_TYPES = ["other", "lab_result", "consultation", "prescription", "imaging"]; + +export function UploadDialog({ open, onClose }: UploadDialogProps) { + const { t } = useTranslation(); + const queryClient = useQueryClient(); + const fileRef = useRef(null); + const [file, setFile] = useState(null); + const [docType, setDocType] = useState("other"); + const [dragOver, setDragOver] = useState(false); + + const mutation = useMutation({ + mutationFn: () => uploadDocument(file!, docType), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ["documents"] }); + setFile(null); + onClose(); + }, + }); + + if (!open) return null; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + setDragOver(false); + const dropped = e.dataTransfer.files[0]; + if (dropped) setFile(dropped); + }; + + return ( +
+
+
+

{t("documents.upload")}

+ +
+ +
{ e.preventDefault(); setDragOver(true); }} + onDragLeave={() => setDragOver(false)} + onDrop={handleDrop} + onClick={() => fileRef.current?.click()} + className={`flex flex-col items-center justify-center rounded-lg border-2 border-dashed p-8 cursor-pointer transition-colors ${ + dragOver ? "border-primary bg-primary/5" : "border-input hover:border-primary/50" + }`} + > + +

+ {file ? file.name : t("documents.drop_or_click")} +

+ {file && ( +

+ {(file.size / 1024 / 1024).toFixed(2)} MB +

+ )} + setFile(e.target.files?.[0] || null)} + /> +
+ +
+ + +
+ + +
+
+ ); +} diff --git a/frontend/src/components/layout/sidebar.tsx b/frontend/src/components/layout/sidebar.tsx index 66dd427..c31f6c5 100644 --- a/frontend/src/components/layout/sidebar.tsx +++ b/frontend/src/components/layout/sidebar.tsx @@ -19,8 +19,8 @@ const navItems = [ { key: "chats", to: "/chat", icon: MessageSquare, enabled: true, end: false }, { key: "skills", to: "/skills", icon: Sparkles, enabled: true, end: true }, { key: "personal_context", to: "/profile/context", icon: BookOpen, enabled: true, end: true }, - { key: "documents", to: "/documents", icon: FileText, enabled: false, end: true }, - { key: "memory", to: "/memory", icon: Brain, enabled: false, end: true }, + { key: "documents", to: "/documents", icon: FileText, enabled: true, end: true }, + { key: "memory", to: "/memory", icon: Brain, enabled: true, end: true }, { key: "notifications", to: "/notifications", icon: Bell, enabled: false, end: true }, ]; diff --git a/frontend/src/components/memory/memory-editor.tsx b/frontend/src/components/memory/memory-editor.tsx new file mode 100644 index 0000000..80ec17b --- /dev/null +++ b/frontend/src/components/memory/memory-editor.tsx @@ -0,0 +1,90 @@ +import { useState, useEffect } from "react"; +import { useTranslation } from "react-i18next"; +import type { MemoryEntry } from "@/api/memory"; + +const CATEGORIES = ["condition", "medication", "allergy", "vital", "document_summary", "other"]; +const IMPORTANCE_LEVELS = ["critical", "high", "medium", "low"]; + +interface MemoryEditorProps { + entry?: MemoryEntry | null; + onSave: (data: { category: string; title: string; content: string; importance: string }) => void; + onCancel: () => void; + loading?: boolean; +} + +export function MemoryEditor({ entry, onSave, onCancel, loading }: MemoryEditorProps) { + const { t } = useTranslation(); + const [category, setCategory] = useState("other"); + const [title, setTitle] = useState(""); + const [content, setContent] = useState(""); + const [importance, setImportance] = useState("medium"); + + useEffect(() => { + if (entry) { + setCategory(entry.category); + setTitle(entry.title); + setContent(entry.content); + setImportance(entry.importance); + } else { + setCategory("other"); + setTitle(""); + setContent(""); + setImportance("medium"); + } + }, [entry]); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + onSave({ category, title, content, importance }); + }; + + return ( +
+
+
+ + +
+
+ + +
+
+ +
+ + setTitle(e.target.value)} required maxLength={255} + className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + placeholder={t("memory.title_placeholder")} /> +
+ +
+ +