GPF Instance Configuration
instance_id
- ID of the GPF instance.reference_genome
- reference genome resource ID to be used by the GPF instance. This is a required field.gene_models
- gene models resource ID to be used by the GPF instance. This is a required field.annotation
- annotation configuration. This could be a path to a configuration file or a list of annotators.grr
- genomic resources repository configuration.gene_scores_db
- list of gene scores to be used by the GPF instance.gene_sets_db
- list of gene sets to be used by the GPF instance.default_study_config
- default study configuration. This is a path to a configuration file.studies
- directory where the study configuration files are located. If not specified, the GPF instance will look instudies
subdirectory in the GPF instance directory.datasets
- directory where the dataset configuration files are located. If not specified, the GPF instance will look indatasets
subdirectory in the GPF instance directory.phenotype_data
- directory where the phenotype data configuration files are located. If not specified, the GPF instance will look inpheno
subdirectory in the GPF instance directory.gene_profiles_config
- gene profiles configuration. This is a path to a configuration file.genotype_storage
- genotype storage configurations.phenotype_storage
- phenotype storage configurations.cache_path
- path to the cache directory. This is a path to a directory where the GPF instance will store the cache files.phenotype_images
gpfjs
vars:
instance_id: data_hg38_local
instance_id: '%(instance_id)s'
reference_genome:
resource_id: hg38/genomes/GRCh38-hg38
gene_models:
resource_id: hg38/gene_models/refGene_v20170601
gene_sets_db:
gene_set_collections:
- gene_properties/gene_sets/autism
- gene_properties/gene_sets/relevant
- gene_properties/gene_sets/sfari
- gene_properties/gene_sets/spark
- gene_properties/gene_sets/MSigDB_curated
- gene_properties/gene_sets/miRNA
- gene_properties/gene_sets/GO
- gene_properties/gene_sets/PFAM_37.0_domains
- gene_properties/gene_sets/miRNA_Darnell
gene_scores_db:
gene_scores:
- gene_properties/gene_scores/LGD
- gene_properties/gene_scores/RVIS
- gene_properties/gene_scores/pLI
- gene_properties/gene_scores/pRec
- gene_properties/gene_scores/SFARI_gene_score
- gene_properties/gene_scores/Satterstrom_Buxbaum_Cell_2020
- gene_properties/gene_scores/Iossifov_Wigler_PNAS_2015
- gene_properties/gene_scores/LOEUF
annotation:
conf_file: annotation.yaml
phenotype_data:
dir: pheno
studies:
dir: studies
datasets:
dir: datasets
default_study_config:
conf_file: defaultConfiguration.conf
gene_profiles_config:
conf_file: geneProfiles.yaml
gpfjs:
visible_datasets:
- ALL_genotypes
- SSC_genotypes
- SPARK_genotypes
genotype_storage:
default: duckdb_parquet
storages:
- id: duckdb_parquet
storage_type: duckdb_parquet
base_dir: '%($DUCKDB_STORAGE)s/parquet'
memory_limit: 12GB
- id: genotype_impala
storage_type: impala
read_only: false
hdfs:
base_dir: /user/%(instance_id)s/studies
host: localhost
port: 8020
replication: 1
impala:
db: '%(instance_id)s'
hosts:
- localhost
port: 21050
pool_size: 3