"""Provides CLI for management of genomic resources repositories."""importargparseimportcopyimportloggingimportosimportpathlibimportsysfromtypingimportAny,castfromurllib.parseimporturlparseimportyamlfromcerberus.schemaimportSchemaErrorfromjinja2importTemplatefromdaeimport__version__# type: ignorefromdae.genomic_resources.cached_repositoryimportGenomicResourceCachedRepofromdae.genomic_resources.fsspec_protocolimportbuild_fsspec_protocolfromdae.genomic_resources.group_repositoryimportGenomicResourceGroupRepofromdae.genomic_resources.repositoryimport(GR_CONF_FILE_NAME,GR_CONTENTS_FILE_NAME,GenomicResource,GenomicResourceRepo,ManifestEntry,ReadOnlyRepositoryProtocol,ReadWriteRepositoryProtocol,parse_resource_id_version,version_tuple_to_string,)fromdae.genomic_resources.repository_factoryimport(DEFAULT_DEFINITION,build_genomic_resource_repository,build_resource_implementation,get_default_grr_definition,get_default_grr_definition_path,load_definition_file,)fromdae.genomic_resources.resource_implementationimport(GenomicResourceImplementation,ResourceStatistics,)fromdae.task_graph.cli_toolsimportTaskGraphClifromdae.task_graph.graphimportTaskGraphfromdae.utilsimportfs_utilsfromdae.utils.fs_utilsimportfind_directory_with_a_filefromdae.utils.helpersimportconvert_sizefromdae.utils.verbosity_configurationimportVerbosityConfigurationlogger=logging.getLogger("grr_manage")def_add_repository_resource_parameters_group(parser:argparse.ArgumentParser,*,use_resource:bool=True,)->None:group=parser.add_argument_group(title="Repository/Resource")group.add_argument("-R","--repository",type=str,default=None,help="URL to the genomic resources repository. If not specified ""the tool assumes a local file system repository and starts looking ""for .CONTENTS.json file from the current working directory up to the ""root directory. If found the directory is assumed for root ""repository directory; otherwise error is reported.")group.add_argument("--grr","--definition","-g",type=str,default=None,help="Path to an extra GRR definition file. This GRR will be loaded""in a group alongside the local one.")group.add_argument("--extra-args",type=str,default=None,help="comma separated list of `key=value` pairs arguments needed for ""connection to the specific repository protocol. ""Ex: if you want to connect to an S3 repository it is often ""neccessary to pass additional `endpoint-url` argument.",)ifuse_resource:group.add_argument("-r","--resource",type=str,help="Specifies the resource whose manifest we want to rebuild. ""If not specified the tool assumes local filesystem repository ""and starts looking for 'genomic_resource.yaml' file from ""current working directory up to the root directory. If found ""the directory is assumed for a resource directory; otherwise ""error is reported.")def_add_dry_run_and_force_parameters_group(parser:argparse.ArgumentParser)->None:group=parser.add_argument_group(title="Force/Dry run")group.add_argument("-n","--dry-run",default=False,action="store_true",help="only checks if the manifest update is needed whithout ""actually updating it")group.add_argument("-f","--force",default=False,action="store_true",help="ignore resource state and rebuild manifest")def_add_dvc_parameters_group(parser:argparse.ArgumentParser)->None:group=parser.add_argument_group(title="DVC params")group.add_argument("--with-dvc",default=True,action="store_true",dest="use_dvc",help="use '.dvc' files if present to get md5 sum of resource files ""(default)")group.add_argument("-D","--without-dvc",default=True,action="store_false",dest="use_dvc",help="calculate the md5 sum if necessary of resource files; ""do not use '.dvc' files to get md5 sum of resource files")def_add_hist_parameters_group(parser:argparse.ArgumentParser)->None:group=parser.add_argument_group(title="Statistics")group.add_argument("--region-size",type=int,default=300_000_000,help="Region size to use for splitting statistics calculation into ""tasks")def_configure_list_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("list",help="List a GR Repo")parser.add_argument("--hr",default=False,action="store_true",help="Projects the size in human-readable format.")_add_repository_resource_parameters_group(parser,use_resource=False)VerbosityConfiguration.set_arguments(parser)def_run_list_command(proto:ReadOnlyRepositoryProtocol|GenomicResourceRepo,args:argparse.Namespace)->None:repos:list=[proto]ifisinstance(proto,GenomicResourceGroupRepo):repos=proto.childrenforrepoinrepos:forresinrepo.get_all_resources():res_size=sum(fsfor_,fsinres.get_manifest().get_files())files_msg=f"{len(list(res.get_manifest().get_files())):2d}"ifisinstance(repo,GenomicResourceCachedRepo):cached_files=repo.get_resource_cached_files(res.get_id())files_msg=f"{len(cached_files):2d}/{files_msg}"res_size_msg=res_size \
ifhasattr(args,"bytes")andargs.bytesisTrue \
elseconvert_size(res_size)repo_id=repo.repo_idifisinstance(repo,GenomicResourceRepo) \
elserepo.get_id()print(f"{res.get_type():20}{res.get_version_str():7s} "f"{files_msg}{res_size_msg:12} "f"{repo_id} "f"{res.get_id()}")def_configure_repo_init_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("repo-init",help="Initialize a directory to turn it into a GRR")_add_repository_resource_parameters_group(parser,use_resource=False)_add_dry_run_and_force_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)def_run_repo_init_command(**kwargs:str)->None:repository:str|None=kwargs.get("repository")ifrepositoryisNone:repo_url=find_directory_with_a_file(GR_CONTENTS_FILE_NAME)else:assertrepositoryisnotNonerepo_url=find_directory_with_a_file(GR_CONTENTS_FILE_NAME,repository)ifrepo_urlisnotNone:logger.error("current working directory is part of a GRR at %s",repo_url)sys.exit(1)ifrepositoryisNone:cwd=pathlib.Path().absolute()else:cwd=pathlib.Path(repository).absolute()proto=_create_proto(str(cwd))proto.build_content_file()def_configure_repo_manifest_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("repo-manifest",help="Create/update manifests for whole GRR")_add_repository_resource_parameters_group(parser,use_resource=False)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)def_configure_resource_manifest_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("resource-manifest",help="Create/update manifests for a resource")_add_repository_resource_parameters_group(parser)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)def_configure_repo_stats_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("repo-stats",help="Build the statistics for a resource")_add_repository_resource_parameters_group(parser,use_resource=False)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)_add_hist_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)def_configure_resource_stats_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("resource-stats",help="Build the statistics for a resource")_add_repository_resource_parameters_group(parser)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)_add_hist_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)def_configure_repo_repair_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("repo-repair",help="Update/rebuild manifest and histograms whole GRR")_add_repository_resource_parameters_group(parser,use_resource=False)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)_add_hist_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)def_configure_resource_repair_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("resource-repair",help="Update/rebuild manifest and histograms for a resource")_add_repository_resource_parameters_group(parser)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)_add_hist_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)def_configure_repo_info_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("repo-info",help="Build the index.html for the whole GRR",)_add_repository_resource_parameters_group(parser)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)def_configure_resource_info_subparser(subparsers:argparse._SubParsersAction)->None:parser=subparsers.add_parser("resource-info",help="Build the index.html for the specific resource",)_add_repository_resource_parameters_group(parser)_add_dry_run_and_force_parameters_group(parser)_add_dvc_parameters_group(parser)VerbosityConfiguration.set_arguments(parser)TaskGraphCli.add_arguments(parser,use_commands=False,force_mode="always",never_cache=True,)
[docs]defcollect_dvc_entries(proto:ReadWriteRepositoryProtocol,res:GenomicResource)->dict[str,ManifestEntry]:"""Collect manifest entries defined by .dvc files."""result={}manifest=proto.collect_resource_entries(res)forentryinmanifest:ifnotentry.name.endswith(".dvc"):continuefilename=entry.name[:-4]basename=os.path.basename(filename)iffilenamenotinmanifest:logger.info("filling manifest of <%s> with entry for <%s> based on ""dvc data only",res.resource_id,filename)withproto.open_raw_file(res,entry.name,"rt")asinfile:content=infile.read()dvc=yaml.safe_load(content)fordataindvc["outs"]:ifdata["path"]==basename:result[filename]= \
ManifestEntry(filename,data["size"],data["md5"])returnresult
def_do_resource_manifest_command(proto:ReadWriteRepositoryProtocol,res:GenomicResource,dry_run:bool,# noqa: FBT001force:bool,# noqa: FBT001use_dvc:bool,# noqa: FBT001)->bool:prebuild_entries={}ifuse_dvc:prebuild_entries=collect_dvc_entries(proto,res)manifest_update=proto.check_update_manifest(res,prebuild_entries)ifnotbool(manifest_update):logger.debug("manifest of <%s> is up to date",res.get_genomic_resource_id_version())else:msg=(f"manifest of "f"<{res.get_genomic_resource_id_version()}> "f"should be updated; "f"entries to update in manifest "f"{sorted(manifest_update.entries_to_update)}")ifmanifest_update.entries_to_delete:msg=(f"{msg}; "# noqa: S608f"entries to delete from manifest "f"{sorted(manifest_update.entries_to_delete)}")logger.warning(msg)ifdry_run:returnbool(manifest_update)ifforce:logger.info("building manifest for resource <%s>...",res.resource_id)manifest=proto.build_manifest(res,prebuild_entries)proto.save_manifest(res,manifest)returnFalseifbool(manifest_update):logger.info("updating manifest for resource <%s>...",res.resource_id)manifest=proto.update_manifest(res,prebuild_entries)proto.save_manifest(res,manifest)returnFalsereturnbool(manifest_update)def_run_repo_manifest_command_internal(proto:ReadWriteRepositoryProtocol,**kwargs:bool|int|str)->dict[str,Any]:dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))use_dvc=cast(bool,kwargs.get("use_dvc",True))updates_needed={}forresinproto.get_all_resources():updates_needed[res.resource_id]=_do_resource_manifest_command(proto,res,dry_run=dry_run,force=force,use_dvc=use_dvc,)ifnotdry_run:proto.build_content_file()returnupdates_neededdef_run_repo_manifest_command(proto:ReadWriteRepositoryProtocol,**kwargs:bool|int|str,)->int:dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))ifdry_runandforce:logger.warning("please choose one of 'dry_run' and 'force' options")return1updates_needed=_run_repo_manifest_command_internal(proto,**kwargs)ifdry_run:returnlen(updates_needed)return0def_find_resource(proto:ReadOnlyRepositoryProtocol,repo_url:str,**kwargs:str|bool|int)->GenomicResource|None:resource_id=cast(str,kwargs.get("resource"))ifresource_idisnotNone:res=proto.get_resource(resource_id)else:ifurlparse(repo_url).schemenotin{"file",""}:logger.error("resource not specified but the repository URL %s ""is not local filesystem repository",repo_url)returnNonecwd=os.getcwd()resource_dir=find_directory_with_a_file(GR_CONF_FILE_NAME,cwd)ifresource_dirisNone:logger.error("Can't find resource starting from %s",cwd)returnNonerid_ver=os.path.relpath(resource_dir,repo_url)resource_id,version=parse_resource_id_version(rid_ver)res=proto.get_resource(resource_id,version_constraint=f"={version_tuple_to_string(version)}")returnresdef_run_resource_manifest_command_internal(proto:ReadWriteRepositoryProtocol,repo_url:str,**kwargs:bool|int|str)->bool:dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))use_dvc=cast(bool,kwargs.get("use_dvc",True))res=_find_resource(proto,repo_url,**kwargs)ifresisNone:logger.error("resource not found...")returnFalsereturn_do_resource_manifest_command(proto,res,dry_run=dry_run,force=force,use_dvc=use_dvc)def_run_resource_manifest_command(proto:ReadWriteRepositoryProtocol,repo_url:str,**kwargs:bool|int|str,)->int:dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))ifdry_runandforce:logger.warning("please choose one of 'dry_run' and 'force' options")return1needs_update=_run_resource_manifest_command_internal(proto,repo_url,**kwargs)ifdry_run:returnint(needs_update)return0def_read_stats_hash(proto:ReadWriteRepositoryProtocol,implementation:GenomicResourceImplementation)->bytes|None:res=implementation.resourcestats_dir=ResourceStatistics.get_statistics_folder()ifnotproto.file_exists(res,f"{stats_dir}/stats_hash"):returnNonewithproto.open_raw_file(res,f"{stats_dir}/stats_hash",mode="rb",)asinfile:returncast(bytes,infile.read())def_store_stats_hash(proto:ReadWriteRepositoryProtocol,resource:GenomicResource)->bool:impl=build_resource_implementation(resource)stats_dir=ResourceStatistics.get_statistics_folder()ifstats_dirisNone:logger.warning("Couldn't store stats hash for %s; unable to get stats dir",resource.resource_id)returnFalsewithproto.open_raw_file(resource,f"{stats_dir}/stats_hash",mode="wb",)asoutfile:stats_hash=impl.calc_statistics_hash()outfile.write(stats_hash)returnTruedef_collect_impl_stats_tasks(# pylint: disable=too-many-argumentsgraph:TaskGraph,proto:ReadWriteRepositoryProtocol,impl:GenomicResourceImplementation,grr:GenomicResourceRepo,*,dry_run:bool,force:bool,use_dvc:bool,region_size:int,)->None:tasks=impl.add_statistics_build_tasks(graph,region_size=region_size,grr=grr)# This is the hack to update stats_hash without recreaing the histograms.graph.create_task(f"{impl.resource.get_full_id()}_store_stats_hash",_store_stats_hash,[proto,impl.resource],tasks,)graph.create_task(f"{impl.resource.get_full_id()}_manifest_rebuild",_do_resource_manifest_command,[proto,impl.resource,dry_run,force,use_dvc],tasks,)def_stats_need_rebuild(proto:ReadWriteRepositoryProtocol,impl:GenomicResourceImplementation)->bool:"""Check if an implementation's stats need rebuilding."""current_hash=impl.calc_statistics_hash()stored_hash=_read_stats_hash(proto,impl)ifstored_hashisNone:logger.info("No hash stored for <%s>; needs update",impl.resource.get_full_id(),)returnTrueifstored_hash!=current_hash:logger.info("Stored hash for <%s> is outdated; needs update",impl.resource.get_full_id(),)returnTruelogger.debug("<%s> statistics hash is up to date",impl.resource.get_full_id(),)returnFalsedef_run_repo_stats_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,**kwargs:bool|int|str)->int:dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))use_dvc=cast(bool,kwargs.get("use_dvc",True))region_size=cast(int,kwargs.get("region_size",3_000_000))ifdry_runandforce:logger.warning("please choose one of 'dry_run' and 'force' options")return0updates_needed=_run_repo_manifest_command_internal(proto,**kwargs)graph=TaskGraph()status=0forresinproto.get_all_resources():ifupdates_needed[res.resource_id]:status+=1logger.info("Manifest of <%s> needs update, cannot check statistics",res.resource_id,)continueimpl=build_resource_implementation(res)needs_rebuild=_stats_need_rebuild(proto,impl)if(forceorneeds_rebuild)andnotdry_run:_collect_impl_stats_tasks(graph,proto,impl,repo,dry_run=dry_run,force=force,use_dvc=use_dvc,region_size=region_size)elifdry_runandneeds_rebuild:logger.info("Statistics of <%s> needs update",res.resource_id)status+=1ifdry_run:returnstatusiflen(graph.tasks)>0:modified_kwargs=copy.copy(kwargs)modified_kwargs["command"]="run"ifmodified_kwargs.get("tasks_log_dir")isNone:repo_url=proto.get_url()modified_kwargs["log_dir"]= \
fs_utils.join(repo_url,".task-log")TaskGraphCli.process_graph(graph,force_mode="always",**modified_kwargs)proto.build_content_file()return0def_run_resource_stats_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,repo_url:str,**kwargs:bool|int|str)->int:needs_update=_run_resource_manifest_command_internal(proto,repo_url,**kwargs)dry_run=cast(bool,kwargs.get("dry_run",False))force=cast(bool,kwargs.get("force",False))use_dvc=cast(bool,kwargs.get("use_dvc",True))region_size=cast(int,kwargs.get("region_size",3_000_000))res=_find_resource(proto,repo_url,**kwargs)ifresisNone:raiseValueError("can't find resource")ifdry_runandforce:logger.warning("please choose one of 'dry_run' and 'force' options")return1ifresisNone:logger.error("unable to find resource...")return1ifdry_runandneeds_update:logger.info("Manifest of <%s> needs update, cannot check statistics",res.resource_id,)return1impl=build_resource_implementation(res)needs_rebuild=_stats_need_rebuild(proto,impl)ifdry_runandneeds_rebuild:logger.info("Statistics of <%s> needs update",res.resource_id)return1if(forceorneeds_rebuild)andnotdry_run:graph=TaskGraph()_collect_impl_stats_tasks(graph,proto,impl,repo,dry_run=dry_run,force=force,use_dvc=use_dvc,region_size=region_size)iflen(graph.tasks)==0:return0modified_kwargs=copy.copy(kwargs)modified_kwargs["command"]="run"ifmodified_kwargs.get("tasks_log_dir")isNone:repo_url=proto.get_url()modified_kwargs["log_dir"]= \
fs_utils.join(repo_url,".task-log")TaskGraphCli.process_graph(graph,force_mode="always",**modified_kwargs,)return0def_run_repo_repair_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,**kwargs:str|bool|int)->int:return_run_repo_info_command(repo,proto,**kwargs)def_run_resource_repair_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,repo_url:str,**kwargs:str|bool|int)->int:return_run_resource_info_command(repo,proto,repo_url,**kwargs)def_run_repo_info_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,**kwargs:str|bool|int)->int:status=_run_repo_stats_command(repo,proto,**kwargs)dry_run=cast(bool,kwargs.get("dry_run",False))ifdry_run:returnstatusproto.build_index_info(repository_template)# type: ignoreforresinproto.get_all_resources():try:_do_resource_info_command(repo,proto,res)exceptValueError:logger.exception("Failed to generate repo index for %s",res.resource_id,)exceptSchemaError:logger.exception("Resource %s has an invalid configuration",res.resource_id,)exceptBaseException:# pylint: disable=broad-exceptlogger.exception("Failed to load %s",res.resource_id,)return0def_do_resource_info_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,res:GenomicResource)->None:implementation=build_resource_implementation(res)withproto.open_raw_file(res,"index.html",mode="wt")asoutfile:content=implementation.get_info(repo=repo)outfile.write(content)withproto.open_raw_file(res,"statistics/index.html",mode="wt",)asoutfile:content=implementation.get_statistics_info(repo=repo)outfile.write(content)def_run_resource_info_command(repo:GenomicResourceRepo,proto:ReadWriteRepositoryProtocol,repo_url:str,**kwargs:str|int|bool)->int:status=_run_resource_stats_command(repo,proto,repo_url,**kwargs)dry_run=cast(bool,kwargs.get("dry_run",False))ifdry_run:returnstatusres=_find_resource(proto,repo_url,**kwargs)ifresisNone:logger.error("resource not found...")return1_do_resource_info_command(repo,proto,res)return0
[docs]defcli_manage(cli_args:list[str]|None=None)->None:"""Provide CLI for repository management."""# flake8: noqa: C901# pylint: disable=too-many-branches,too-many-statementsifcli_argsisNone:cli_args=sys.argv[1:]desc="Genomic Resource Repository Management Tool"parser=argparse.ArgumentParser(description=desc)parser.add_argument("--version",action="store_true",default=False,help="Prints GPF version and exists.")VerbosityConfiguration.set_arguments(parser)commands_parser:argparse._SubParsersAction=parser.add_subparsers(dest="command",help="Command to execute")_configure_list_subparser(commands_parser)_configure_repo_init_subparser(commands_parser)_configure_repo_manifest_subparser(commands_parser)_configure_resource_manifest_subparser(commands_parser)_configure_repo_stats_subparser(commands_parser)_configure_resource_stats_subparser(commands_parser)_configure_repo_info_subparser(commands_parser)_configure_resource_info_subparser(commands_parser)_configure_repo_repair_subparser(commands_parser)_configure_resource_repair_subparser(commands_parser)args=parser.parse_args(cli_args)VerbosityConfiguration.set(args)args.no_cache=Trueifargs.version:print(f"GPF version: {__version__}")sys.exit(0)command=args.commandifcommandisNone:logger.error("missing grr_manage subcommand")parser.print_help()sys.exit(1)ifcommand=="repo-init":_run_repo_init_command(**vars(args))returnrepo_url=args.repositoryifrepo_urlisNone:repo_url=find_directory_with_a_file(GR_CONTENTS_FILE_NAME)ifrepo_urlisNone:repo_url=find_directory_with_a_file(GR_CONTENTS_FILE_NAME[:-5])ifrepo_urlisNone:logger.error("Can't find repository starting from: %s",os.getcwd())sys.exit(1)repo_url=str(repo_url)print(f"working with repository: {repo_url}")extra_definition_path=args.grrifextra_definition_path:ifnotos.path.exists(extra_definition_path):raiseFileNotFoundError(f"Definition {extra_definition_path} not found!",)extra_definition=load_definition_file(extra_definition_path)else:extra_definition=get_default_grr_definition()grr_definition={"id":"cli_grr","type":"group","children":[{"id":"local","type":"dir","directory":repo_url,},extra_definition,],}repo=build_genomic_resource_repository(definition=grr_definition)proto=_create_proto(repo_url,args.extra_args)ifcommand=="list":_run_list_command(proto,args)returnifnotisinstance(proto,ReadWriteRepositoryProtocol):raiseTypeError(f"resource management works with RW protocols; "f"{proto.proto_id} ({proto.scheme}) is read only")ifcommandin{"repo-manifest","repo-stats","repo-info","repo-repair"}:status=0try:ifcommand=="repo-manifest":status=_run_repo_manifest_command(proto,**vars(args))elifcommand=="repo-stats":status=_run_repo_stats_command(repo,proto,**vars(args))elifcommand=="repo-info":status=_run_repo_info_command(repo,proto,**vars(args))elifcommand=="repo-repair":status=_run_repo_repair_command(repo,proto,**vars(args))else:logger.error("Unknown command %s.",command)sys.exit(1)ifstatus==0:logger.info("GRR <%s> is consistent",repo_url)returnexceptValueErrorasex:logger.error(# noqa: TRY400"Misconfigured repository %s; %s",repo_url,ex)status=1logger.warning("inconsistent GRR <%s> state",repo_url)sys.exit(status)elifcommandin{"resource-manifest","resource-stats","resource-info","resource-repair"}:status=0try:ifcommand=="resource-manifest":status=_run_resource_manifest_command(proto,repo_url,**vars(args))elifcommand=="resource-stats":status=_run_resource_stats_command(repo,proto,repo_url,**vars(args))elifcommand=="resource-info":status=_run_resource_info_command(repo,proto,repo_url,**vars(args))elifcommand=="resource-repair":status=_run_resource_repair_command(repo,proto,repo_url,**vars(args))else:logger.error("Unknown command %s.",command)sys.exit(1)ifstatus==0:logger.info("GRR <%s> is consistent",repo_url)returnexceptValueError:logger.exception("unexpected exception")status=1logger.warning("inconsistent GRR <%s> state",repo_url)sys.exit(status)else:logger.error("Unknown command %s. The known commands are index, ""list and histogram",command)sys.exit(1)
def_create_proto(repo_url:str,extra_args:str="",)->ReadWriteRepositoryProtocol:url=urlparse(repo_url)ifurl.schemein{"file",""}andnotos.path.isabs(repo_url):repo_url=os.path.abspath(repo_url)kwargs:dict[str,str]={}ifextra_args:parsed=[tuple(a.split("="))forainextra_args.split(",")]kwargs={p[0]:p[1]forpinparsed}proto=build_fsspec_protocol(proto_id="manage",root_url=repo_url,**kwargs)ifnotisinstance(proto,ReadWriteRepositoryProtocol):raiseTypeError(f"repository protocol is not writable: {repo_url}")returnproto
[docs]defcli_browse(cli_args:list[str]|None=None)->None:"""Provide CLI for repository browsing."""desc="Genomic Resource Repository Browse Tool"parser=argparse.ArgumentParser(description=desc)parser.add_argument("--version",action="store_true",default=False,help="Prints GPF version and exists.")VerbosityConfiguration.set_arguments(parser)group=parser.add_argument_group(title="Repository/Resource")group.add_argument("-g","--grr",type=str,default=None,help="path to GRR definition file.")parser.add_argument("--bytes",default=False,action="store_true",help="Print the resource size in bytes",)ifcli_argsisNone:cli_args=sys.argv[1:]args=parser.parse_args(cli_args)VerbosityConfiguration.set(args)ifargs.version:print(f"GPF version: {__version__}")sys.exit(0)definition_path=args.grrifargs.grrisnotNone \
elseget_default_grr_definition_path()definition=load_definition_file(definition_path) \
ifdefinition_pathisnotNone \
elseDEFAULT_DEFINITIONifdefinition_pathisnotNone:print("Working with GRR definition:",definition_path)else:print("No GRR definition found, using the DEFAULT_DEFINITION")print(yaml.safe_dump(definition,sort_keys=False))repo=build_genomic_resource_repository(definition=definition)_run_list_command(repo,args)