def list_documents(morphik: Morphik, filters=None, limit=None):
if isinstance(filters, str) and filters.strip():
filters = json.loads(filters)
response = morphik.list_documents(filters=filters, limit=limit or 10)
docs = response.documents if hasattr(response, "documents") else response
normalized = []
for doc in docs:
meta = doc.metadata or {}
normalized.append({
"external_id": doc.external_id,
"filename": doc.filename,
"doc_id": meta.get("doc_id"),
"title": meta.get("title"),
"doc_type": meta.get("doc_type"),
"category": meta.get("category"),
"status": meta.get("status"),
"effective_date": str(meta.get("effective_date")) if meta.get("effective_date") else None,
"expires_at": str(meta.get("expires_at")) if meta.get("expires_at") else None,
"tags": meta.get("tags") or [],
})
return {"documents": normalized, "count": len(normalized)}
def retrieve_chunks(morphik: Morphik, query, filters=None, k=None):
if isinstance(filters, str) and filters.strip():
filters = json.loads(filters)
chunks = morphik.retrieve_chunks(
query=query,
filters=filters,
k=k or 3,
use_colpali=True,
output_format="url"
)
results = []
image_urls = []
for chunk in chunks:
meta = chunk.metadata or {}
is_image_url = isinstance(chunk.content, str) and chunk.content.startswith("http")
if is_image_url:
image_urls.append(chunk.content)
preview = chunk.content
if isinstance(preview, str) and len(preview) > 300:
preview = f"{preview[:300]}..."
if is_image_url:
preview = f"[Image URL: {chunk.content}]"
results.append({
"score": round(chunk.score, 3),
"content_preview": preview,
"filename": meta.get("filename") or chunk.filename or "unknown",
"doc_id": meta.get("doc_id"),
"doc_type": meta.get("doc_type"),
"title": meta.get("title"),
"expires_at": str(meta.get("expires_at")) if meta.get("expires_at") else None,
"tags": meta.get("tags") or [],
})
return {
"chunks": results,
"count": len(results),
"image_urls": image_urls
}