Modules

Derivative based on the original work here: https://github.com/thehyve/omop-cdm/blob/main/src/omop_cdm/regular/cdm600/tables.py Modifications made to this file: - Removed support for schema. - Added new tables

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at *
https://www.apache.org/licenses/LICENSE-2.0 *
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Derivative based on the original work here: https://github.com/thehyve/omop-cdm/blob/main/src/omop_cdm/regular/cdm54/tables.py Modifications made to this file: - Removed support for schema. - Added new tables

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at *
https://www.apache.org/licenses/LICENSE-2.0 *
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Engine and session factory for OMOP CDM databases.

This module provides an asynchronous SQLAlchemy engine factory with helpers to create/init CDM schemas and obtain async sessions across supported backends (SQLite, MySQL, PostgreSQL).

`CdmEngineFactory` ¶

Bases: object

Factory to create async SQLAlchemy engines and sessions for OMOP CDM.

Supports SQLite (default), MySQL, and PostgreSQL. Exposes convenience properties for the configured engine and async session maker.

Parameters:

Name	Description	Default
`db`	Database type: "sqlite", "mysql", or "pgsql".	`None`
`host`	Database host (ignored for SQLite).	`None`
`port`	Database port (ignored for SQLite).	`None`
`user`	Database user (ignored for SQLite).	`None`
`pw`	Database password (ignored for SQLite).	`None`
`name`	Database name or SQLite filename.	`None`
`schema`	PostgreSQL schema to use for CDM.	`None`

Source code in src/pyomop/engine_factory.py

class CdmEngineFactory(object):
    """Factory to create async SQLAlchemy engines and sessions for OMOP CDM.

    Supports SQLite (default), MySQL, and PostgreSQL. Exposes convenience
    properties for the configured engine and async session maker.

    Args:
        db: Database type: "sqlite", "mysql", or "pgsql".
        host: Database host (ignored for SQLite).
        port: Database port (ignored for SQLite).
        user: Database user (ignored for SQLite).
        pw: Database password (ignored for SQLite).
        name: Database name or SQLite filename.
        schema: PostgreSQL schema to use for CDM.
    """

    def __init__(
        self,
        db=None,
        host=None,
        port=None,
        user=None,
        pw=None,
        name=None,
        schema=None,
    ):
        import os

        self._db = db or os.environ.get("PYOMOP_DB", "sqlite")
        self._name = name or os.environ.get("PYOMOP_NAME", "cdm.sqlite")
        self._host = host or os.environ.get("PYOMOP_HOST", "localhost")
        self._port = (
            port if port is not None else int(os.environ.get("PYOMOP_PORT", "5432"))
        )
        self._user = user or os.environ.get("PYOMOP_USER", "root")
        self._pw = pw or os.environ.get("PYOMOP_PW", "pass")
        self._schema = schema or os.environ.get("PYOMOP_SCHEMA", "")
        self._engine = None
        self._base = None

    async def init_models(self, metadata):
        """Drop and re-create all tables from provided metadata.

        This is mainly used for tests and quick local setups.

        Args:
            metadata: SQLAlchemy ``MetaData`` containing table definitions.

        Raises:
            ValueError: If the engine has not been initialized.
        """
        if self._engine is None:
            raise ValueError("Database engine is not initialized.")
        async with self._engine.begin() as conn:
            await conn.run_sync(metadata.drop_all)
            await conn.run_sync(metadata.create_all)

    @property
    def db(self):
        """Return the configured database type (sqlite/mysql/pgsql)."""
        return self._db

    @property
    def host(self):
        """Return the configured database host (if applicable)."""
        return self._host

    @property
    def port(self):
        """Return the configured database port (if applicable)."""
        return self._port

    @property
    def name(self):
        """Return the configured database name or SQLite filename."""
        return self._name

    @property
    def user(self):
        """Return the configured database user (if applicable)."""
        return self._user

    @property
    def pw(self):
        """Return the configured database password (if applicable)."""
        return self._pw

    @property
    def schema(self):
        """Return the configured schema (PostgreSQL)."""
        return self._schema

    @property
    def base(self):
        """Return automapped classes when an engine exists, otherwise None."""
        if self.engine is not None:  # Not self_engine
            Base = automap_base()
            Base.prepare(self.engine, reflect=True)
            return Base.classes
        return None

    @property
    def engine(self):
        """Create or return the async engine for the configured backend.

        Returns:
            Async engine instance bound to the configured database.
        """
        if self._engine is not None:
            return self._engine
        if self._db == "sqlite":
            # Schemas are not supported for SQLite; warn if a non-default schema was provided
            if self._schema and self._schema not in ("", "public"):
                logger.warning(
                    "Schema is not supported for SQLite; ignoring schema='%s'",
                    self._schema,
                )
            self._engine = create_async_engine(
                "sqlite+aiosqlite:///" + self._name,
                pool_pre_ping=True,
                pool_recycle=3600,
            )
        elif self._db == "mysql":
            # Schemas are not supported for MySQL in the same way as PostgreSQL; warn and ignore
            if self._schema and self._schema not in ("", "public"):
                logger.warning(
                    "Schema is not supported for MySQL; ignoring schema='%s'",
                    self._schema,
                )
            mysql_url = "mysql://{}:{}@{}:{}/{}"
            mysql_url = mysql_url.format(
                self._user, self._pw, self._host, self._port, self._name
            )
            self._engine = create_async_engine(
                mysql_url, isolation_level="READ UNCOMMITTED"
            )
        elif self._db == "pgsql":
            pgsql_url = "postgresql+asyncpg://{}:{}@{}:{}/{}"
            pgsql_url = pgsql_url.format(
                self._user, self._pw, self._host, self._port, self._name
            )
            connect_args = {}
            # If a schema is provided, set the PostgreSQL search_path so that all
            # operations (reflection, DDL/DML) use this schema by default.
            if self._schema and self._schema != "":
                connect_args = {"server_settings": {"search_path": self._schema}}
            self._engine = create_async_engine(pgsql_url, connect_args=connect_args)
        else:
            # Unknown DB type; create no engine and warn
            logger.warning("Unknown database type '%s'—no engine created.", self._db)
            return None
        return self._engine

    @property
    def session(self):
        """Return an async_sessionmaker for creating AsyncSession objects."""
        if self._engine is not None:
            async_session = async_sessionmaker(
                self._engine, expire_on_commit=False, class_=AsyncSession
            )
            return async_session
        return None

    @property
    def async_session(self):
        """Alias for session to maintain backward compatibility."""
        if self._engine is not None:
            async_session = async_sessionmaker(
                self._engine, expire_on_commit=False, class_=AsyncSession
            )
            return async_session
        return None

    async def dispose(self) -> None:
        """Dispose of the engine and close all connections.

        This should be called when done with database operations to
        ensure proper cleanup and avoid hanging.
        """
        if self._engine is not None:
            await self._engine.dispose()
            self._engine = None

    @db.setter
    def db(self, value):
        self._db = value
        self._engine = None

    @name.setter
    def name(self, value):
        self._name = value
        self._engine = None

    @port.setter
    def port(self, value):
        self._port = value
        self._engine = None

    @host.setter
    def host(self, value):
        self._host = value
        self._engine = None

    @user.setter
    def user(self, value):
        self._user = value
        self._engine = None

    @pw.setter
    def pw(self, value):
        self._pw = value
        self._engine = None

    @schema.setter
    def schema(self, value):
        self._schema = value
        self._engine = None

    def print_connection_info(self):
        """Return a string with the connection details (for logging/display)."""
        if self._db == "sqlite":
            current_directory = os.getcwd()
            _server = os.path.join(current_directory, "cdm.sqlite")
            print(
                f"connectionDetails <- DatabaseConnector::createConnectionDetails(\n"
                f'    dbms = "sqlite", server = "{_server}")\n'
            )
        elif self._db == "mysql":
            return f"MySQL database '{self._name}' on {self._host}:{self._port} as user '{self._user}'"
        elif self._db == "pgsql":
            schema_info = f", schema '{self._schema}'" if self._schema else ""
            return f"PostgreSQL database '{self._name}' on {self._host}:{self._port} as user '{self._user}'{schema_info}"
        else:
            return "No valid database connection configured."

`async_session` `property` ¶

Alias for session to maintain backward compatibility.

`base` `property` ¶

Return automapped classes when an engine exists, otherwise None.

`db` `property` `writable` ¶

Return the configured database type (sqlite/mysql/pgsql).

`engine` `property` ¶

Create or return the async engine for the configured backend.

Returns:

Type	Description
	Async engine instance bound to the configured database.

`host` `property` `writable` ¶

Return the configured database host (if applicable).

`name` `property` `writable` ¶

Return the configured database name or SQLite filename.

`port` `property` `writable` ¶

Return the configured database port (if applicable).

`pw` `property` `writable` ¶

Return the configured database password (if applicable).

`schema` `property` `writable` ¶

Return the configured schema (PostgreSQL).

`session` `property` ¶

Return an async_sessionmaker for creating AsyncSession objects.

`user` `property` `writable` ¶

Return the configured database user (if applicable).

`dispose()` `async` ¶

Dispose of the engine and close all connections.

This should be called when done with database operations to ensure proper cleanup and avoid hanging.

Source code in src/pyomop/engine_factory.py

async def dispose(self) -> None:
    """Dispose of the engine and close all connections.

    This should be called when done with database operations to
    ensure proper cleanup and avoid hanging.
    """
    if self._engine is not None:
        await self._engine.dispose()
        self._engine = None

`init_models(metadata)` `async` ¶

Drop and re-create all tables from provided metadata.

This is mainly used for tests and quick local setups.

Parameters:

Name	Type	Description	Default
`metadata`		SQLAlchemy `MetaData` containing table definitions.	required

Raises:

Type	Description
`ValueError`	If the engine has not been initialized.

Source code in src/pyomop/engine_factory.py

async def init_models(self, metadata):
    """Drop and re-create all tables from provided metadata.

    This is mainly used for tests and quick local setups.

    Args:
        metadata: SQLAlchemy ``MetaData`` containing table definitions.

    Raises:
        ValueError: If the engine has not been initialized.
    """
    if self._engine is None:
        raise ValueError("Database engine is not initialized.")
    async with self._engine.begin() as conn:
        await conn.run_sync(metadata.drop_all)
        await conn.run_sync(metadata.create_all)

`print_connection_info()` ¶

Return a string with the connection details (for logging/display).

Source code in src/pyomop/engine_factory.py

def print_connection_info(self):
    """Return a string with the connection details (for logging/display)."""
    if self._db == "sqlite":
        current_directory = os.getcwd()
        _server = os.path.join(current_directory, "cdm.sqlite")
        print(
            f"connectionDetails <- DatabaseConnector::createConnectionDetails(\n"
            f'    dbms = "sqlite", server = "{_server}")\n'
        )
    elif self._db == "mysql":
        return f"MySQL database '{self._name}' on {self._host}:{self._port} as user '{self._user}'"
    elif self._db == "pgsql":
        schema_info = f", schema '{self._schema}'" if self._schema else ""
        return f"PostgreSQL database '{self._name}' on {self._host}:{self._port} as user '{self._user}'{schema_info}"
    else:
        return "No valid database connection configured."

LLM-oriented SQLDatabase wrapper for OMOP CDM.

This module provides utilities for connecting LLMs to OMOP CDM databases using langchain's SQL toolkit and agents. It uses the OMOP CDM metadata from this package's SQLAlchemy models to enable LLM-powered query components to reason about available tables, columns, and foreign keys.

This file is import-safe even when the optional LLM extras are not installed; in that case, attempting to instantiate CDMDatabase will raise a clear ImportError directing you to install pyomop[llm].

`CDMDatabase` ¶

Bases: SQLDatabase

OMOP-aware SQLDatabase for LLM query engines.

This class wraps langchain's SQLDatabase to use the OMOP CDM SQLAlchemy metadata bundled with this package, making it easy to expose concise schema information to LLM components.

Parameters:

Name	Type	Description	Default
`engine`	`Engine`	SQLAlchemy `Engine` connected to the OMOP database.	required
`schema`	`str \| None`	Optional database schema name.	`None`
`ignore_tables`	`list[str] \| None`	Tables to hide from the LLM context.	`None`
`include_tables`	`list[str] \| None`	Explicit subset of tables to expose.	`None`
`sample_rows_in_table_info`	`int`	Number of sample rows to include in table info.	`3`
`max_string_length`	`int`	Max length of generated descriptions.	`300`
`version`	`str`	OMOP CDM version label ("cdm54" or "cdm6").	`'cdm54'`

Source code in src/pyomop/llm_engine.py

class CDMDatabase(SQLDatabase):
    """OMOP-aware SQLDatabase for LLM query engines.

    This class wraps langchain's ``SQLDatabase`` to use the OMOP CDM
    SQLAlchemy metadata bundled with this package, making it easy to expose
    concise schema information to LLM components.

    Args:
        engine: SQLAlchemy ``Engine`` connected to the OMOP database.
        schema: Optional database schema name.
        ignore_tables: Tables to hide from the LLM context.
        include_tables: Explicit subset of tables to expose.
        sample_rows_in_table_info: Number of sample rows to include in table info.
        max_string_length: Max length of generated descriptions.
        version: OMOP CDM version label ("cdm54" or "cdm6").
    """

    def __init__(
        self,
        engine: Engine,
        schema: str | None = None,
        ignore_tables: list[str] | None = None,
        include_tables: list[str] | None = None,
        sample_rows_in_table_info: int = 3,
        max_string_length: int = 300,
        version: str = "cdm54",
    ) -> None:
        if not _LLM_AVAILABLE:  # pragma: no cover - import-safe guard
            raise ImportError("Install 'pyomop[llm]' to use LLM features.")

        # Basic configuration
        self._engine = engine
        self._schema = schema
        self._max_string_length = max_string_length

        if include_tables and ignore_tables:
            raise ValueError("Cannot specify both include_tables and ignore_tables")

        # Load OMOP metadata for the chosen version
        Base: Any
        if version == "cdm6":
            from .cdm6 import Base
        else:
            from .cdm54 import Base

        metadata = cast(MetaData, Base.metadata)

        # All known tables
        self._all_tables = set(metadata.tables.keys())

        # Validate include/ignore lists
        self._include_tables = set(include_tables) if include_tables else set()
        if self._include_tables:
            missing = self._include_tables - self._all_tables
            if missing:
                raise ValueError(f"include_tables {missing} not found in OMOP metadata")

        self._ignore_tables = set(ignore_tables) if ignore_tables else set()
        if self._ignore_tables:
            missing = self._ignore_tables - self._all_tables
            if missing:
                raise ValueError(f"ignore_tables {missing} not found in OMOP metadata")

        if self._include_tables:
            usable = set(self._include_tables)
        elif self._ignore_tables:
            usable = self._all_tables - self._ignore_tables
        else:
            usable = set(self._all_tables)
        self._usable_tables = usable

        if not isinstance(sample_rows_in_table_info, int):
            raise TypeError("sample_rows_in_table_info must be an integer")
        self._sample_rows_in_table_info = sample_rows_in_table_info

        self._metadata = metadata

        # Initialize parent SQLDatabase
        # Convert AsyncEngine to sync if needed (langchain requires sync engine)
        parent_engine: Engine
        if AsyncEngine is not None and isinstance(self._engine, AsyncEngine):
            url_str = str(self._engine.url)
            # Convert common async driver URLs to sync variants
            url_str = (
                url_str.replace("+aiosqlite", "")
                .replace("+asyncpg", "")
                .replace("+psycopg_async", "+psycopg2")
            )
            parent_engine = create_engine(url_str)
        else:
            parent_engine = self._engine

        super().__init__(
            engine=parent_engine,
            schema=schema,
            include_tables=sorted(self._usable_tables) if self._usable_tables else None,
            sample_rows_in_table_info=sample_rows_in_table_info,
        )

    # --- Helper methods for LLM context ---
    def get_table_columns(self, table_name: str) -> list[str]:
        """Return list of column names for a table.

        This uses the OMOP SQLAlchemy ``MetaData`` instead of DB inspector.
        """
        return [col.name for col in self._metadata.tables[table_name].columns]

    def get_single_table_info(self, table_name: str) -> str:
        """Return a concise description of columns and foreign keys for a table.

        The format is compatible with langchain's SQL components.
        """
        template = "Table '{table_name}' has columns: {columns}, and foreign keys: {foreign_keys}."
        columns: list[str] = []
        foreign_keys: list[str] = []
        for column in self._metadata.tables[table_name].columns:
            columns.append(f"{column.name} ({column.type!s})")
            for fk in column.foreign_keys:
                foreign_keys.append(
                    f"{column.name} -> {fk.column.table.name}.{fk.column.name}"
                )
        column_str = ", ".join(columns)
        fk_str = ", ".join(foreign_keys)
        return template.format(
            table_name=table_name, columns=column_str, foreign_keys=fk_str
        )

    def usable_tables(self) -> list[str]:
        """Return the sorted list of tables exposed to the LLM.

        This respects include/ignore settings passed at initialization.
        """
        return sorted(self._usable_tables)

    # Backwards compat helper name used in some code paths
    def get_usable_table_names(self) -> list[str]:  # pragma: no cover - thin wrapper
        return self.usable_tables()

`get_single_table_info(table_name)` ¶

Return a concise description of columns and foreign keys for a table.

The format is compatible with langchain's SQL components.

Source code in src/pyomop/llm_engine.py

def get_single_table_info(self, table_name: str) -> str:
    """Return a concise description of columns and foreign keys for a table.

    The format is compatible with langchain's SQL components.
    """
    template = "Table '{table_name}' has columns: {columns}, and foreign keys: {foreign_keys}."
    columns: list[str] = []
    foreign_keys: list[str] = []
    for column in self._metadata.tables[table_name].columns:
        columns.append(f"{column.name} ({column.type!s})")
        for fk in column.foreign_keys:
            foreign_keys.append(
                f"{column.name} -> {fk.column.table.name}.{fk.column.name}"
            )
    column_str = ", ".join(columns)
    fk_str = ", ".join(foreign_keys)
    return template.format(
        table_name=table_name, columns=column_str, foreign_keys=fk_str
    )

`get_table_columns(table_name)` ¶

Return list of column names for a table.

This uses the OMOP SQLAlchemy MetaData instead of DB inspector.

Source code in src/pyomop/llm_engine.py

def get_table_columns(self, table_name: str) -> list[str]:
    """Return list of column names for a table.

    This uses the OMOP SQLAlchemy ``MetaData`` instead of DB inspector.
    """
    return [col.name for col in self._metadata.tables[table_name].columns]

`usable_tables()` ¶

Return the sorted list of tables exposed to the LLM.

This respects include/ignore settings passed at initialization.

Source code in src/pyomop/llm_engine.py

def usable_tables(self) -> list[str]:
    """Return the sorted list of tables exposed to the LLM.

    This respects include/ignore settings passed at initialization.
    """
    return sorted(self._usable_tables)

`SQLDatabase` ¶

Minimal stub to allow import without LLM extras.

Source code in src/pyomop/llm_engine.py

class SQLDatabase:  # type: ignore[no-redef]
    """Minimal stub to allow import without LLM extras."""

    pass

LLM query utilities over the OMOP CDM schema.

This module wires langchain components to an OMOP-aware CDMDatabase so you can build SQL query engines that know about your CDM tables. All LLM-related imports are optional and performed lazily at runtime.

`CdmLLMQuery` ¶

Helper that prepares an LLM-backed SQL query engine for OMOP.

It constructs an SQL agent that can generate and execute SQL queries against OMOP CDM tables using an LLM.

Parameters:

Name	Type	Description	Default
`sql_database`	`CDMDatabase`	A `CDMDatabase` instance connected to the OMOP DB.	required
`llm`	`BaseLanguageModel`	A langchain LLM instance (BaseLanguageModel).	required
`**kwargs`	`Any`	Reserved for future expansion.	`{}`

Source code in src/pyomop/llm_query.py

class CdmLLMQuery:
    """Helper that prepares an LLM-backed SQL query engine for OMOP.

    It constructs an SQL agent that can generate and execute SQL queries
    against OMOP CDM tables using an LLM.

    Args:
        sql_database: A ``CDMDatabase`` instance connected to the OMOP DB.
        llm: A langchain LLM instance (BaseLanguageModel).
        **kwargs: Reserved for future expansion.
    """

    def __init__(
        self,
        sql_database: CDMDatabase,
        llm: BaseLanguageModel,
        **kwargs: Any,
    ) -> None:
        self._sql_database = sql_database
        self._llm = llm

        # Create SQL toolkit and agent
        toolkit = MyToolKit(db=sql_database, llm=llm)  # type: ignore
        self._tools = toolkit.get_tools()

        # Create SQL agent using the default agent type
        # This is more flexible and works with various LLM types
        # Use agent_executor_kwargs to enable error handling
        # Disable streaming for tool-calling agents to avoid "Tools not supported in streaming mode" error
        try:
            llm.streaming = False # type: ignore
        except:
            # Some LLMs don't support setting streaming directly
            pass
        self._agent = create_sql_agent(
            llm=llm,
            toolkit=toolkit,
            verbose=True,
            agent_type="tool-calling",
            agent_executor_kwargs={"handle_parsing_errors": True},
        )

        # The agent itself is the query engine for langchain >1.0
        self._query_engine = self._agent

    @property
    def tools(self) -> list[Any]:
        """List of SQL tools available in the query engine."""
        return self._tools

    @property
    def query_engine(self) -> Any:
        """An SQL agent executor over the CDM tables."""
        return self._query_engine

`query_engine` `property` ¶

An SQL agent executor over the CDM tables.

`tools` `property` ¶

List of SQL tools available in the query engine.

`MyToolKit` ¶

Bases: SQLDatabaseToolkit

Custom toolkit that includes the example query tool.

Source code in src/pyomop/llm_query.py

class MyToolKit(SQLDatabaseToolkit):
    """Custom toolkit that includes the example query tool."""

    def __init__(self, db: CDMDatabase, llm: BaseLanguageModel) -> None:
        super().__init__(db=db, llm=llm)  # type: ignore

    def get_tools(self) -> list[Any]:
        """Get the list of tools including the example query tool."""
        tools = super().get_tools()
        tools.append(example_query_tool)
        return tools

`get_tools()` ¶

Get the list of tools including the example query tool.

Source code in src/pyomop/llm_query.py

def get_tools(self) -> list[Any]:
    """Get the list of tools including the example query tool."""
    tools = super().get_tools()
    tools.append(example_query_tool)
    return tools

`example_query_tool(table_name)` ¶

Generate a couple of example queries for the given table name. This will help you understand how to formulate queries for the OMOP CDM. Use this when sql_db_query_checker tool flags an invalid query. Args: table_name: The name of the OMOP CDM table from "person", "condition_occurrence", "condition_era", "drug_exposure", "drug_era", "observation". Returns: A string with example queries for the table.

Source code in src/pyomop/llm_query.py

@tool
def example_query_tool(table_name: str) -> str:
    """
    Generate a couple of example queries for the given table name.
    This will help you understand how to formulate queries for the OMOP CDM.
    Use this when sql_db_query_checker tool flags an invalid query.
    Args:
        table_name: The name of the OMOP CDM table from "person", "condition_occurrence", "condition_era", "drug_exposure", "drug_era", "observation".
    Returns:
        A string with example queries for the table.
    """
    example = ""
    try:
        if table_name.lower() == "person":
            # read the following markdown file from the website and return its content
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/person/PE02.md"
            ).text
            example += "\n"
            example += requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/person/PE03.md"
            ).text
        elif table_name.lower() == "condition_occurrence":
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/condition_occurrence/CO01.md"
            ).text
            example += "\n"
            example += requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/condition_occurrence/CO05.md"
            ).text
        elif table_name.lower() == "condition_era":
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/condition_era/CE01.md"
            ).text
            example += "\n"
            example += requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/condition_era/CE02.md"
            ).text
        elif table_name.lower() == "drug_exposure":
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/drug_exposure/DEX01.md"
            ).text
            example += "\n"
            example += requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/drug_exposure/DEX02.md"
            ).text
        elif table_name.lower() == "drug_era":
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/drug_era/DER01.md"
            ).text
            example += "\n"
            example += requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/drug_era/DER04.md"
            ).text
        elif table_name.lower() == "observation":
            example = requests.get(
                "https://raw.githubusercontent.com/OHDSI/QueryLibrary/refs/heads/master/inst/shinyApps/QueryLibrary/queries/observation/O01.md"
            ).text
    except Exception as e:
        logger.warning(f"Error fetching example queries for {table_name}: {e}")
    logger.info(f"Dynamic prompt tool called for table: {table_name}")
    logger.info(
        f"Example returned: {example[:200] if example else '(empty)'}..."
    )  # Log first 200 characters
    return example

CSV-to-OMOP loader.

This module implements a flexible CSV loader that can populate multiple OMOP CDM tables according to a JSON mapping file. It also performs helpful cleanup operations like foreign key normalization, birthdate backfilling, gender mapping, and concept code lookups.

`CdmCsvLoader` ¶

Load a single CSV into multiple OMOP CDM tables using a JSON mapping file.

Mapping file format (JSON):

{ "csv_key": "patient_id", # optional, CSV column that contains the patient/person identifier "tables": [ { "name": "cohort", # target table name as in the database "filters": [ # optional row filters applied to CSV before mapping {"column": "resourceType", "equals": "Encounter"} ], "columns": { # mapping of target_table_column -> value "cohort_definition_id": {"const": 1}, # constant value "subject_id": "patient_id", # copy from CSV column "cohort_start_date": "period.start", # copy from CSV column "cohort_end_date": "period.end" # copy from CSV column } } ] }

Notes

Constants are provided via {"const": value}.
If a required column is missing from mapping, it's left as None (DB default or nullable required).
Primary keys that are Integer types will autoincrement where supported (SQLite/PostgreSQL typical behavior).
Dates/times are converted to proper Python types where possible based on reflected column types.