Source code for dae.gene_profile.convert_gene_profile_to_duckdb

import argparse
import os
import sqlite3
from pathlib import Path

import duckdb
import pandas as pd

from dae.gene_profile.db import GeneProfileDBWriter
from dae.gpf_instance.gpf_instance import GPFInstance


[docs] def main( gpf_instance: GPFInstance | None = None, argv: list[str] | None = None, ) -> None: """Simple gpdb converter from sqlite to duckdb.""" # flake8: noqa: C901 # pylint: disable=too-many-locals,too-many-branches,too-many-statements description = "Gene profiles database converter from sqlite to duckdb" parser = argparse.ArgumentParser(description=description) dae_db_dir = Path(os.getenv("DAE_DB_DIR", "./")) default_dbfile = str(dae_db_dir / "gpdb") parser.add_argument("--dbfile", default=default_dbfile) default_output = str(dae_db_dir / "gpdb.duckdb") parser.add_argument("--output", default=default_output) args = parser.parse_args(argv) if gpf_instance is None: gpf_instance = GPFInstance.build() # pylint: disable=protected-access, invalid-name config = gpf_instance._gene_profile_config # noqa: SLF001 assert config is not None, "No GP configuration found." gpdb = GeneProfileDBWriter( config.to_dict(), args.output, ) table_name = "gene_profile" query = f"SELECT * from {table_name}" # noqa: S608 conn = sqlite3.connect(database=args.dbfile) df = pd.read_sql(query, conn) # noqa: F841 with duckdb.connect(f"{gpdb.dbfile}") as connection: connection.execute("INSERT INTO gene_profile SELECT * FROM df")