Documentation Index
Fetch the complete documentation index at: https://morphik.ai/docs/llms.txt
Use this file to discover all available pages before exploring further.
def ingest_directory(
directory: Union[str, Path],
recursive: bool = False,
pattern: str = "*",
metadata: Optional[Dict[str, Any]] = None,
use_colpali: bool = True,
parallel: bool = True,
) -> List[Document]
async def ingest_directory(
directory: Union[str, Path],
recursive: bool = False,
pattern: str = "*",
metadata: Optional[Dict[str, Any]] = None,
use_colpali: bool = True,
parallel: bool = True,
) -> List[Document]
Parameters
directory (str | Path): Directory containing files to ingest
recursive (bool, optional): Whether to recurse into subdirectories. Defaults to False.
pattern (str, optional): Glob pattern to select files (for example "*.pdf"). Defaults to "*".
metadata (Dict[str, Any], optional): Metadata applied to each ingested file
use_colpali (bool, optional): Whether to use ColPali-style embedding. Defaults to True.
parallel (bool, optional): Whether to process files in parallel. Defaults to True.
Returns
List[Document]: List of ingested document metadata
Examples
from pathlib import Path
from morphik import Morphik
db = Morphik()
docs = db.ingest_directory(
Path("/data/contracts"),
recursive=True,
pattern="*.pdf",
metadata={"category": "contracts"},
)
print(f"Ingested {len(docs)} documents")
from pathlib import Path
from morphik import AsyncMorphik
async with AsyncMorphik() as db:
docs = await db.ingest_directory(
Path("/data/contracts"),
recursive=True,
pattern="*.pdf",
metadata={"category": "contracts"},
)
print(f"Ingested {len(docs)} documents")