Backend:
- Document + MemoryEntry models with Alembic migration (GIN FTS index)
- File upload endpoint with path traversal protection (sanitized filenames)
- Background document text extraction (PyMuPDF)
- Full-text search on extracted_text via PostgreSQL tsvector/tsquery
- Memory CRUD with enum-validated categories/importance, field allow-list
- AI tools: save_memory, search_documents, get_memory (Claude function calling)
- Tool execution loop in stream_ai_response (multi-turn tool use)
- Context assembly: injects critical memory + relevant doc excerpts
- File storage abstraction (local filesystem, S3-swappable)
- Secure file deletion (DB flush before disk delete)
Frontend:
- Document upload dialog (drag-and-drop + file picker)
- Document list with status badges, search, download (via authenticated blob)
- Document viewer with extracted text preview
- Memory list grouped by category with importance color coding
- Memory editor with category/importance dropdowns
- Documents + Memory pages with full CRUD
- Enabled sidebar navigation for both sections
Review fixes applied:
- Sanitized upload filenames (path traversal prevention)
- Download via axios blob (not bare <a href>, preserves auth)
- Route ordering: /search before /{id}/reindex
- Memory update allows is_active=False + field allow-list
- MemoryEditor form resets on mode switch
- Literal enum validation on category/importance schemas
- DB flush before file deletion for data integrity
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
98 lines
3.0 KiB
Python
98 lines
3.0 KiB
Python
import uuid
|
|
|
|
from fastapi import HTTPException, status
|
|
from sqlalchemy import func, select, text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.document import Document
|
|
from app.utils.file_storage import delete_file
|
|
|
|
|
|
async def create_document(
|
|
db: AsyncSession,
|
|
user_id: uuid.UUID,
|
|
filename: str,
|
|
original_filename: str,
|
|
storage_path: str,
|
|
mime_type: str,
|
|
file_size: int,
|
|
doc_type: str = "other",
|
|
) -> Document:
|
|
doc = Document(
|
|
user_id=user_id,
|
|
filename=filename,
|
|
original_filename=original_filename,
|
|
storage_path=storage_path,
|
|
mime_type=mime_type,
|
|
file_size=file_size,
|
|
doc_type=doc_type,
|
|
processing_status="pending",
|
|
)
|
|
db.add(doc)
|
|
await db.flush()
|
|
return doc
|
|
|
|
|
|
async def get_document(db: AsyncSession, doc_id: uuid.UUID, user_id: uuid.UUID) -> Document:
|
|
result = await db.execute(
|
|
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
|
|
)
|
|
doc = result.scalar_one_or_none()
|
|
if not doc:
|
|
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found")
|
|
return doc
|
|
|
|
|
|
async def get_user_documents(
|
|
db: AsyncSession,
|
|
user_id: uuid.UUID,
|
|
doc_type: str | None = None,
|
|
processing_status: str | None = None,
|
|
) -> list[Document]:
|
|
stmt = select(Document).where(Document.user_id == user_id)
|
|
if doc_type:
|
|
stmt = stmt.where(Document.doc_type == doc_type)
|
|
if processing_status:
|
|
stmt = stmt.where(Document.processing_status == processing_status)
|
|
stmt = stmt.order_by(Document.created_at.desc())
|
|
result = await db.execute(stmt)
|
|
return list(result.scalars().all())
|
|
|
|
|
|
async def delete_document(db: AsyncSession, doc_id: uuid.UUID, user_id: uuid.UUID) -> None:
|
|
doc = await get_document(db, doc_id, user_id)
|
|
storage_path = doc.storage_path
|
|
await db.delete(doc)
|
|
await db.flush()
|
|
delete_file(storage_path)
|
|
|
|
|
|
async def search_documents(db: AsyncSession, user_id: uuid.UUID, query: str, limit: int = 5) -> list[Document]:
|
|
stmt = (
|
|
select(Document)
|
|
.where(
|
|
Document.user_id == user_id,
|
|
Document.processing_status == "completed",
|
|
text("to_tsvector('english', coalesce(extracted_text, '')) @@ plainto_tsquery('english', :query)"),
|
|
)
|
|
.params(query=query)
|
|
.order_by(
|
|
text("ts_rank(to_tsvector('english', coalesce(extracted_text, '')), plainto_tsquery('english', :query)) DESC")
|
|
)
|
|
.params(query=query)
|
|
.limit(limit)
|
|
)
|
|
result = await db.execute(stmt)
|
|
return list(result.scalars().all())
|
|
|
|
|
|
async def update_document_text(
|
|
db: AsyncSession, doc_id: uuid.UUID, extracted_text: str, status_val: str = "completed"
|
|
) -> None:
|
|
result = await db.execute(select(Document).where(Document.id == doc_id))
|
|
doc = result.scalar_one_or_none()
|
|
if doc:
|
|
doc.extracted_text = extracted_text
|
|
doc.processing_status = status_val
|
|
await db.flush()
|