Phase 4: Documents & Memory — upload, FTS, AI tools, context injection
Backend:
- Document + MemoryEntry models with Alembic migration (GIN FTS index)
- File upload endpoint with path traversal protection (sanitized filenames)
- Background document text extraction (PyMuPDF)
- Full-text search on extracted_text via PostgreSQL tsvector/tsquery
- Memory CRUD with enum-validated categories/importance, field allow-list
- AI tools: save_memory, search_documents, get_memory (Claude function calling)
- Tool execution loop in stream_ai_response (multi-turn tool use)
- Context assembly: injects critical memory + relevant doc excerpts
- File storage abstraction (local filesystem, S3-swappable)
- Secure file deletion (DB flush before disk delete)
Frontend:
- Document upload dialog (drag-and-drop + file picker)
- Document list with status badges, search, download (via authenticated blob)
- Document viewer with extracted text preview
- Memory list grouped by category with importance color coding
- Memory editor with category/importance dropdowns
- Documents + Memory pages with full CRUD
- Enabled sidebar navigation for both sections
Review fixes applied:
- Sanitized upload filenames (path traversal prevention)
- Download via axios blob (not bare <a href>, preserves auth)
- Route ordering: /search before /{id}/reindex
- Memory update allows is_active=False + field allow-list
- MemoryEditor form resets on mode switch
- Literal enum validation on category/importance schemas
- DB flush before file deletion for data integrity
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
126
backend/app/api/v1/documents.py
Normal file
126
backend/app/api/v1/documents.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import asyncio
|
||||
import uuid
|
||||
from pathlib import PurePosixPath
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, UploadFile, File, HTTPException, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.document import DocumentListResponse, DocumentResponse, DocumentSearchRequest
|
||||
from app.services import document_service
|
||||
from app.utils.file_storage import save_upload, get_file_path
|
||||
from app.workers.document_processor import process_document
|
||||
|
||||
router = APIRouter(prefix="/documents", tags=["documents"])
|
||||
|
||||
ALLOWED_MIME_TYPES = [
|
||||
"application/pdf",
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/tiff",
|
||||
"image/webp",
|
||||
]
|
||||
|
||||
|
||||
@router.post("/", response_model=DocumentResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def upload_document(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
file: UploadFile = File(...),
|
||||
doc_type: str = Query(default="other"),
|
||||
):
|
||||
if file.content_type not in ALLOWED_MIME_TYPES:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file.content_type}")
|
||||
|
||||
content = await file.read()
|
||||
if len(content) > settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail=f"File too large. Max {settings.MAX_UPLOAD_SIZE_MB}MB")
|
||||
|
||||
doc_id = uuid.uuid4()
|
||||
safe_name = PurePosixPath(file.filename or "upload").name
|
||||
filename = f"{doc_id}_{safe_name}"
|
||||
storage_path = await save_upload(user.id, doc_id, filename, content)
|
||||
|
||||
doc = await document_service.create_document(
|
||||
db, user.id, filename, safe_name,
|
||||
storage_path, file.content_type or "application/octet-stream",
|
||||
len(content), doc_type,
|
||||
)
|
||||
|
||||
# Trigger background processing
|
||||
asyncio.create_task(process_document(doc.id, storage_path, file.content_type or ""))
|
||||
|
||||
return DocumentResponse.model_validate(doc)
|
||||
|
||||
|
||||
@router.get("/", response_model=DocumentListResponse)
|
||||
async def list_documents(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
doc_type: str | None = Query(default=None),
|
||||
processing_status: str | None = Query(default=None),
|
||||
):
|
||||
docs = await document_service.get_user_documents(db, user.id, doc_type, processing_status)
|
||||
return DocumentListResponse(documents=[DocumentResponse.model_validate(d) for d in docs])
|
||||
|
||||
|
||||
@router.get("/{doc_id}", response_model=DocumentResponse)
|
||||
async def get_document(
|
||||
doc_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
doc = await document_service.get_document(db, doc_id, user.id)
|
||||
return DocumentResponse.model_validate(doc)
|
||||
|
||||
|
||||
@router.get("/{doc_id}/download")
|
||||
async def download_document(
|
||||
doc_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
doc = await document_service.get_document(db, doc_id, user.id)
|
||||
file_path = get_file_path(doc.storage_path)
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail="File not found on disk")
|
||||
return FileResponse(
|
||||
path=str(file_path),
|
||||
filename=doc.original_filename,
|
||||
media_type=doc.mime_type,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{doc_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_document(
|
||||
doc_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
await document_service.delete_document(db, doc_id, user.id)
|
||||
|
||||
|
||||
@router.post("/search", response_model=DocumentListResponse)
|
||||
async def search_documents(
|
||||
data: DocumentSearchRequest,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
docs = await document_service.search_documents(db, user.id, data.query)
|
||||
return DocumentListResponse(documents=[DocumentResponse.model_validate(d) for d in docs])
|
||||
|
||||
|
||||
@router.post("/{doc_id}/reindex", response_model=DocumentResponse)
|
||||
async def reindex_document(
|
||||
doc_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
doc = await document_service.get_document(db, doc_id, user.id)
|
||||
asyncio.create_task(process_document(doc.id, doc.storage_path, doc.mime_type))
|
||||
return DocumentResponse.model_validate(doc)
|
||||
70
backend/app/api/v1/memory.py
Normal file
70
backend/app/api/v1/memory.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import uuid
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.memory import (
|
||||
CreateMemoryRequest,
|
||||
MemoryEntryListResponse,
|
||||
MemoryEntryResponse,
|
||||
UpdateMemoryRequest,
|
||||
)
|
||||
from app.services import memory_service
|
||||
|
||||
router = APIRouter(prefix="/memory", tags=["memory"])
|
||||
|
||||
|
||||
@router.post("/", response_model=MemoryEntryResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_memory(
|
||||
data: CreateMemoryRequest,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
entry = await memory_service.create_memory(db, user.id, **data.model_dump())
|
||||
return MemoryEntryResponse.model_validate(entry)
|
||||
|
||||
|
||||
@router.get("/", response_model=MemoryEntryListResponse)
|
||||
async def list_memories(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
category: str | None = Query(default=None),
|
||||
importance: str | None = Query(default=None),
|
||||
is_active: bool | None = Query(default=None),
|
||||
):
|
||||
entries = await memory_service.get_user_memories(db, user.id, category, importance, is_active)
|
||||
return MemoryEntryListResponse(entries=[MemoryEntryResponse.model_validate(e) for e in entries])
|
||||
|
||||
|
||||
@router.get("/{entry_id}", response_model=MemoryEntryResponse)
|
||||
async def get_memory(
|
||||
entry_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
entry = await memory_service.get_memory(db, entry_id, user.id)
|
||||
return MemoryEntryResponse.model_validate(entry)
|
||||
|
||||
|
||||
@router.patch("/{entry_id}", response_model=MemoryEntryResponse)
|
||||
async def update_memory(
|
||||
entry_id: uuid.UUID,
|
||||
data: UpdateMemoryRequest,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
entry = await memory_service.update_memory(db, entry_id, user.id, **data.model_dump(exclude_unset=True))
|
||||
return MemoryEntryResponse.model_validate(entry)
|
||||
|
||||
|
||||
@router.delete("/{entry_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_memory(
|
||||
entry_id: uuid.UUID,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
await memory_service.delete_memory(db, entry_id, user.id)
|
||||
@@ -5,6 +5,8 @@ from app.api.v1.chats import router as chats_router
|
||||
from app.api.v1.admin import router as admin_router
|
||||
from app.api.v1.skills import router as skills_router
|
||||
from app.api.v1.users import router as users_router
|
||||
from app.api.v1.documents import router as documents_router
|
||||
from app.api.v1.memory import router as memory_router
|
||||
|
||||
api_v1_router = APIRouter(prefix="/api/v1")
|
||||
|
||||
@@ -13,6 +15,8 @@ api_v1_router.include_router(chats_router)
|
||||
api_v1_router.include_router(admin_router)
|
||||
api_v1_router.include_router(skills_router)
|
||||
api_v1_router.include_router(users_router)
|
||||
api_v1_router.include_router(documents_router)
|
||||
api_v1_router.include_router(memory_router)
|
||||
|
||||
|
||||
@api_v1_router.get("/health")
|
||||
|
||||
Reference in New Issue
Block a user