Commit 63901538 authored by Pradat Yoann's avatar Pradat Yoann

[dev] succesfully run annotator one

parent 6e9a67fd
[run]
omit = */__init__.py
# Project specific # Project specific
/data/ /examples/
/results/ /htmlcov/
# Logs # Logs
log/ log/
......
...@@ -12,7 +12,7 @@ see [VEP's options page](https://www.ensembl.org/info/docs/tools/vep/script/vep_ ...@@ -12,7 +12,7 @@ see [VEP's options page](https://www.ensembl.org/info/docs/tools/vep/script/vep_
### 1.1. Manual parsing ### 1.1. Manual parsing
Relies on tags specified by the user to extract relevant info like genotype (GT), somatic status (SS), quality and filter info (QUAL, INFO) and most importantly reads information (AD, DP, FA, DP4, TAR, TIR). The parser has been tested on TCGA VCF files as produced by Relies on tags specified by the user to extract relevant info like genotype (GT), somatic status (SS), quality and filter info (QUAL, INFO) and most importantly reads information (AD, DP, FA, DP4, TAR, TIR). The parser has been tested on VCF files as produced by
- Mutect v.1 (TCGA GA SNV) and Strelka (TCGA GA Indel) - Mutect v.1 (TCGA GA SNV) and Strelka (TCGA GA Indel)
- sets of callers VarScanSomatic-Strelka-Sniper-Samtools (TCGA HS SNP), GatkSomaticIndel-Pindel-Strelka-VarScanSomatic (TCGA HS Indel). - sets of callers VarScanSomatic-Strelka-Sniper-Samtools (TCGA HS SNP), GatkSomaticIndel-Pindel-Strelka-VarScanSomatic (TCGA HS Indel).
- Mutect v.1.1.7 with no header - Mutect v.1.1.7 with no header
......
...@@ -10,18 +10,38 @@ Created on Thu Aug 13 2020 ...@@ -10,18 +10,38 @@ Created on Thu Aug 13 2020
Main functions for running each step and the assembling step. Main functions for running each step and the assembling step.
""" """
import os
import numpy as np
import pandas as pd
def run_annotator(i_split: int, n_split: int, vcf2maf: str, vep_folder: str, vep_data: str, fasta: str): from .manual import run_manual_annotator
""" from .vcf2maf import run_vcf2maf_annotator
Run the manual, vcf2maf and vep annotations and assemble. from .vep import run_vep_annotator
DataFrame = pd.core.frame.DataFrame
#### # FUNCTION FOR ONE VCF
#### # #######################################################################################################
Paramters def run_annotator_one(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: str, tumor_id: str, normal_id: str,
infos_n_reads: list, infos_other: list, vcf2maf: str, vep_folder: str, vep_data: str, fasta: str, dt_folders: dict):
"""
Run the manual, vcf2maf and vep annotations on one VCF file and assemble.
Parameters
-------- --------
i_split: int vcf_file: str
the split processed. name of the vcf file
n_split: int vcf_folder: str
the number of splits for processing a set of vcf files path to the folder where the vcf is
col_normal: str
name of the column in the vcf for the normal sample
col_tumor: str
name of the column in the vcf for the tumor sample
infos_n_reads: list
list of sigles that contain read info
infos_other: list
list of sigles that need extraction
vcf2maf: str vcf2maf: str
path to the vcf2maf perl script path to the vcf2maf perl script
vep_folder: str vep_folder: str
...@@ -30,47 +50,243 @@ def run_annotator(i_split: int, n_split: int, vcf2maf: str, vep_folder: str, vep ...@@ -30,47 +50,243 @@ def run_annotator(i_split: int, n_split: int, vcf2maf: str, vep_folder: str, vep
path to the .vep data where the reference genome is located path to the .vep data where the reference genome is located
fasta: str fasta: str
relative path to fasta file from vep_folder relative path to fasta file from vep_folder
vcf_folder: str
path to the folder where the vcf files are
dt_folders: dict
dict with the following keys:
* manual_out_folder
* vcf2maf_tmp_folder
* vcf2maf_out_folder
* vep_out_folder
* maf_folder
""" """
vcf_path = os.path.join(vcf_folder, vcf_file)
out_file = vcf_file.replace(".vcf", ".txt")
manual_out_path = os.path.join(dt_folders["manual_out_folder"], out_file)
vcf2maf_out_path = os.path.join(dt_folders["vcf2maf_out_folder"], out_file)
vep_out_path = os.path.join(dt_folders["vep_out_folder"], out_file)
#### # 1. RUN EACH ANNOTATOR
#### # ###################################################################################################
run_manual_annotator(
vcf_path = vcf_path,
out_path = manual_out_path,
col_normal = col_normal,
col_tumor = col_tumor,
infos_n_reads = infos_n_reads,
infos_other = infos_other
)
run_vcf2maf_annotator(
vcf2maf = vcf2maf,
vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = vcf_path,
out_path = vcf2maf_out_path,
tmp_folder = dt_folders["vcf2maf_tmp_folder"],
tumor_id = tumor_id,
normal_id = normal_id,
fasta = fasta
)
#### # SCRIPT PARAMETERS run_vep_annotator(
#### ##################################################################################################### vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = vcf_path,
out_path = vep_out_path,
fasta = fasta
)
parser = argparse.ArgumentParser() #### # 2. ASSEMBLE ANNOTATIONS
parser.add_argument('--i_split' , type=int , default=1 , help='the split processed') #### ######################################################################################################
parser.add_argument('--n_split' , type=int , default=1 , help='total number of splits')
parser.add_argument('--vcf2maf' , type=str , default="" , help='path to the vcf2maf perl script')
parser.add_argument('--vep_folder' , type=str , default="" , help='path to the folder of the vep command')
parser.add_argument('--vep_data' , type=str , default="" , help='path to the .vep data folder')
parser.add_argument('--fasta' , type=str , default="" , help='path to reference genome FASTA file')
args = parser.parse_args()
print("Parameters", flush=True) ddf_vcf = {}
for arg in vars(args):
print("%s: %s" % (arg, getattr(args, arg)), flush=True)
#### vep manual
ddf_vcf["manual"] = pd.read_csv(
filepath_or_buffer = manual_out_path,
sep = "\t"
)
#### vep alone
skipsymbol = "##"
with open(vep_out_path) as file:
skiprows = sum(line.startswith(skipsymbol) for line in file.readlines())
df_alone = pd.read_csv(
filepath_or_buffer = vep_out_path,
sep = "\t",
skiprows = skiprows
)
df_alone_extra = df_alone.Extra.apply(lambda x: dict(item.split("=") for item in x.split(";")))
df_alone_extra = df_alone_extra.apply(pd.Series)
df_alone = pd.concat((df_alone, df_alone_extra), axis=1)
if __name__ == "__main__": ddf_vcf["alone"] = df_alone
#### change working dir #### maf vcf2maf
current_wd = setwd_to_data() skipsymbol = "#"
with open(vcf2maf_out_path) as file:
skiprows = sum(line.startswith(skipsymbol) for line in file.readlines())
#### Parameters ddf_vcf["vcf2maf"] = pd.read_csv(
dt_paths = { filepath_or_buffer = vcf2maf_out_path,
'vcf_meta' : './raw/vcf/mbc/info/mbc_vcf_identifiers.txt', sep = "\t",
'vcf_dir' : './raw/vcf/mbc/files_filtered/', skiprows = skiprows
'vcf_list' : './raw/vcf/mbc/info/list_vcf_mbc.txt', )
'vep_manual_dir' : './raw/vep/vep_manual/mbc',
'vep_alone_dir' : './raw/vep/vep_alone/mbc', #### checked that all scripts ran ok the vcf file
'vep_vcf2maf_dir' : './raw/vep/vep_vcf2maf/mbc/vep', n_manual = ddf_vcf["manual"].shape[0]
'maf_vcf2maf_dir' : './raw/vep/vep_vcf2maf/mbc/maf', n_vcf2maf = ddf_vcf["vcf2maf"].shape[0]
'maf_one_dir' : './raw/maf/mbc', n_alone = ddf_vcf["alone"].shape[0]
if n_manual != n_vcf2maf:
print("SHAPE error: manual %d vs vcf2maf %d " % (n_manual, n_vcf2maf), flush=True)
if n_manual != n_alone:
print("SHAPE error: manual %d vs alone %d " % (n_manual, n_alone), flush=True)
#### choose column per source
choice_columns = {
"Hugo_Symbol" : "vcf2maf",
"Entrez_Gene_Id" : "vcf2maf",
"NCBI_Build" : "vcf2maf",
"Chromosome" : "vcf2maf",
"Start_Position" : "vcf2maf",
"End_Position" : "vcf2maf",
"Variant_Quality" : "manual",
"Filter" : "manual",
"Variant_Classification" : "vcf2maf",
"Variant_Type" : "vcf2maf",
"Reference_Allele" : "vcf2maf",
"Tumor_Seq_Allele1" : "vcf2maf",
"Tumor_Seq_Allele2" : "vcf2maf",
"dbSNP_RS" : "vcf2maf",
"Tumor_Sample" : "manual",
"Tumor_Sample_Barcode" : "manual",
"Tumor_Sample_Site" : "manual",
"Matched_Norm_Sample_Barcode" : "manual",
"Match_Norm_Seq_Allele1" : "vcf2maf",
"Match_Norm_Seq_Allele2" : "vcf2maf",
"Tumor_Sample_UUID" : "manual",
"Matched_Norm_Sample_UUID" : "manual",
"HGVSc" : "vcf2maf",
"HGVSp" : "vcf2maf",
"HGVSp_Short" : "vcf2maf",
"all_effects" : "vcf2maf",
"Location" : "alone",
"Gene" : "alone",
"Feature" : "alone",
"Feature_type" : "alone",
"cDNA_position" : "alone",
"CDS_position" : "alone",
"Protein_position" : "alone",
"Amino_acids" : "alone",
"Codons" : "alone",
"Existing_variation" : "alone",
"Consequence" : "alone",
"IMPACT" : "alone",
"SIFT" : "alone",
"PolyPhen" : "alone",
"CLIN_SIG" : "alone",
"STRAND" : "alone",
"SYMBOL_SOURCE" : "alone",
"HGNC_ID" : "alone",
"BIOTYPE" : "alone",
"CCDS" : "alone",
"ENSP" : "alone",
"SWISSPROT" : "alone",
"TREMBL" : "alone",
"UNIPARC" : "alone",
"EXON" : "alone",
"INTRON" : "alone",
"AF" : "alone", #### AF from 1000 Genomes Phase 3
"AA_AF" : "alone", #### NHLBI-ESP populations
"EA_AF" : "alone", #### NHLBI-ESP populations
"gnomAD_AF" : "alone", #### gnomAD exome populations
"MAX_AF" : "alone", #### highest AF from any of 1000 genomes, ESP or gnomAD
"MAX_AF_POPS" : "alone", #### highest AF from any of 1000 genomes, ESP or gnomAD
} }
#### make dir if does not exist already maf_columns = []
for k, v in dt_paths.items(): cols_reads = [x for x in ddf_vcf["manual"].columns if x.startswith(("n_", "t_"))]
if "dir" in k:
os.makedirs(v, exist_ok=True) #### add column from dict
for column,source in choice_columns.items():
if column not in ddf_vcf[source].columns:
print("WARNING: %s is not in %s" % (column, source), flush=True)
else:
maf_columns.append(ddf_vcf[source][column])
#### add read columns from manual extraction of reads
for column in cols_reads:
if column not in ddf_vcf["manual"].columns:
print("WARNING: %s is not in %s" % (column, source), flush=True)
else:
maf_columns.append(ddf_vcf["manual"][column])
#### # 3. SAVE
#### ######################################################################################################
maf_out_path = os.path.join(dt_folders["maf_folder"], out_file)
df_maf = pd.concat(maf_columns, axis=1)
df_maf.to_csv(
path_or_buf = maf_out_path,
sep = "\t",
header = True,
index = False
)
print("maf file saved at %s" % maf_out_path, flush=True)
def run_annotator(i_split: int, n_split: int, vcf2maf: str, vep_folder: str, vep_data: str, fasta: str, vcf_folder: str,
out_folder: str, vcf_list_path: str=None, vcf_meta_path: str=None):
"""
Run the manual, vcf2maf and vep annotations and assemble.
Parameters
--------
i_split: int
the split processed.
n_split: int
the number of splits for processing a set of vcf files
vcf2maf: str
path to the vcf2maf perl script
vep_folder: str
path to the folder where the vep command is
vep_data: str
path to the .vep data where the reference genome is located
fasta: str
relative path to fasta file from vep_folder
vcf_folder: str
path to the folder where the vcf files are
out_folder: str
path to the folder where subfolders with output results will be saved
vcf_list_path: str, optional
path to the file containing the list of vcf files to be processed. If not specified, all vcf files found in the
vcf_folder are processed.
vcf_meta_path: str, optional
path to the file that contain the names of the tumor and normal columns for each vcf file. The file must be a
.txt file containing 3 columns: "vcf_name", "normal_column", "tumor_column". If not specified,
all vcf files must have columns with the names NORMAL and PRIMARY.
"""
#### paths to results folders
dt_folders = {
'manual_out_folder' : os.path.join(out_folder, "tmp/manual/out"),
'vcf2maf_tmp_folder' : os.path.join(out_folder, "tmp/vcf2maf/tmp"),
'vcf2maf_out_folder' : os.path.join(out_folder, "tmp/vcf2maf/out"),
'vep_out_folder' : os.path.join(out_folder, "tmp/vep/out"),
'maf_folder' : os.path.join(out_folder, "tmp/maf"),
}
#### make folders if they do not exist already
for k, v in dt_folders.items():
os.makedirs(v, exist_ok=True)
#### load meta data #### load meta data
df_meta = pd.read_csv( df_meta = pd.read_csv(
...@@ -80,76 +296,44 @@ if __name__ == "__main__": ...@@ -80,76 +296,44 @@ if __name__ == "__main__":
if os.path.exists(dt_paths["vcf_list"]): if os.path.exists(dt_paths["vcf_list"]):
with open(dt_paths["vcf_list"]) as file: with open(dt_paths["vcf_list"]) as file:
vcf_names_orig = file.read().splitlines() vcf_files = file.read().splitlines()
else: else:
vcf_names_orig = df_meta.loc[df_meta.keep == True, "vcf"].tolist() #### what if compressed ?
vcf_files = [x for x in os.listdir(vcf_folder) if x.endswith(".vcf")]
#### get list of vcfs for the split #### get list of vcfs for the split
count_one_split = len(vcf_names_orig)//args.n_split count_one_split = len(vcf_files)//args.n_split
if args.i_split == args.n_split: if args.i_split == args.n_split:
vcf_names_orig = vcf_names_orig[(args.i_split-1)*count_one_split:] vcf_files = vcf_files[(args.i_split-1)*count_one_split:]
else: else:
vcf_names_orig = vcf_names_orig[(args.i_split-1)*count_one_split:args.i_split*count_one_split] vcf_files = vcf_files[(args.i_split-1)*count_one_split:args.i_split*count_one_split]
count = 0 count = 0
count_total = len(vcf_names_orig) count_total = len(vcf_files)
#### loop over the list #### loop over the list
for vcf_name_orig in vcf_names_orig: for vcf_file in vcf_files:
vcf_name = vcf_name_orig
vcf_path = os.path.join(dt_paths['vcf_dir'], vcf_name) col_normal = "NORMAL"
col_tumor = "PRIMARY"
count += 1 normal_id = "TCGA-A1-A0SD-10A-01D-A110-09"
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
print("="*80, flush=True) infos_n_reads = ["AD", "DP4", "DP", "TAR", "TIR"]
print("vcf %d/%d" % (count, count_total), flush=True) infos_other = ["SS", "GT"]
print("processing %s\n" % vcf_name, flush=True)
run_annotator_one(
#### get vcf type vcf_folder = vcf_folder,
if "mutect" in vcf_name_orig: vcf_file = vcf_file,
vcf_type = "snv" col_normal = col_normal,
elif "indel" in vcf_name_orig: col_tumor = col_tumor,
vcf_type = "indel" normal_id = normal_id,
tumor_id = tumor_id,
#### get vcf identifiers infos_n_reads = infos_n_reads,
mask_vcf_name = df_meta.vcf == vcf_name_orig infos_other = infos_other,
index_vcf_name = mask_vcf_name[mask_vcf_name].index[0] vcf2maf = vcf2maf,
vep_folder = vep_folder,
vcf_identifiers = { vep_data = vep_data,
"Tumor_Sample" : df_meta.loc[index_vcf_name, "Subject.ID"], fasta = fasta,
"Tumor_Sample_Barcode" : df_meta.loc[index_vcf_name, "tumor_sample"], dt_folders = dt_folders
"Matched_Norm_Sample_Barcode" : df_meta.loc[index_vcf_name, "normal_sample"],
}
if vcf_type == "snv":
infos_n_reads = ["RC", "AC"]
infos_other = ["RS", "AS", "GT"]
col_tumor = vcf_identifiers["Tumor_Sample_Barcode"]
col_normal = vcf_identifiers["Matched_Norm_Sample_Barcode"]
elif vcf_type == "indel":
infos_n_reads = ["DP", "TAR", "TIR"]
infos_other = ["SGT"]
col_tumor = "TUMOR"
col_normal = "NORMAL"
process_vcf(
vcf_identifiers = vcf_identifiers,
vcf_path = vcf_path,
vcf_name = vcf_name,
vcf2maf = args.vcf2maf,
vep_folder = args.vep_folder,
vep_data = args.vep_data,
fasta = args.fasta,
dt_paths = dt_paths,
col_tumor = col_tumor,
col_normal = col_normal,
infos_n_reads = infos_n_reads,
infos_other = infos_other
) )
#### revert to previous work dir
os.chdir(current_wd)
...@@ -291,8 +291,7 @@ def process_assemble(df_vcf: DataFrame, df_vcf_info: DataFrame, df_vcf_reads: Da ...@@ -291,8 +291,7 @@ def process_assemble(df_vcf: DataFrame, df_vcf_info: DataFrame, df_vcf_reads: Da
#### # MAIN FUNCTION #### # MAIN FUNCTION
#### ##################################################################################################### #### #####################################################################################################
def run_manual_annotator(vcf_path: str, out_path:str, col_normal: str, col_tumor: str, infos_n_reads: list, def run_manual_annotator(vcf_path: str, out_path:str, col_normal: str, col_tumor: str, infos_n_reads: list, infos_other: list):
infos_other: list):
""" """
Manually parse VCF file and save at the path specified. Manually parse VCF file and save at the path specified.
......
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 13 2020
@author: Yoann Pradat
CentraleSupelec
MICS laboratory
9 rue Juliot Curie, Gif-Sur-Yvette, 91190 France
Test functions from vep module.
"""
import os
from ..main import run_annotator_one
def test_main():
vcf2maf = "~/Documents/biotools/informatics/VCF/mskcc-vcf2maf-5453f80/vcf2maf.pl"
vep_folder = "~/Documents/biotools/informatics/VCF/ensembl-vep"
vep_data = "~/.vep"
fasta = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
#### # 1. TCGA GA
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/"
#### paths to results folders
dt_folders = {
'manual_out_folder' : os.path.join(out_folder, "tmp/out_manual"),
'vcf2maf_tmp_folder' : os.path.join(out_folder, "tmp/tmp_vcf2maf"),
'vcf2maf_out_folder' : os.path.join(out_folder, "tmp/out_vcf2maf"),
'vep_out_folder' : os.path.join(out_folder, "tmp/out_vep"),
'maf_folder' : os.path.join(out_folder, "maf"),
}
#### make folders if they do not exist already
for k, v in dt_folders.items():
os.makedirs(v, exist_ok=True)
#### Indel TCGA_GA
vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.indel.capture.tcga.vcf"
col_normal = "NORMAL"
col_tumor = "PRIMARY"
normal_id = "TCGA-A1-A0SD-10A-01D-A110-09"
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
infos_n_reads = ["AD", "DP4", "DP", "TAR", "TIR"]
infos_other = ["SS", "GT"]
run_annotator_one(
vcf_folder = vcf_folder,
vcf_file = vcf_file,
col_normal = col_normal,
col_tumor = col_tumor,
normal_id = normal_id,
tumor_id = tumor_id,
infos_n_reads = infos_n_reads,
infos_other = infos_other,
vcf2maf = vcf2maf,
vep_folder = vep_folder,
vep_data = vep_data,
fasta = fasta,
dt_folders = dt_folders
)
#### SNP TCGA_GA
vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.oxoG.snp.capture.tcga.vcf"
#### # 2. TCGA HS
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/"
#### Indel TCGA_HS
vcf_file = "genome.wustl.edu.TCGA-A1-A0SD.indel.0e81f9c986154ce89e59240c3f09534f.vcf"
#### SNP TCGA_HS
vcf_file = "genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.vcf"
...@@ -20,7 +20,7 @@ def test_manual(): ...@@ -20,7 +20,7 @@ def test_manual():
#### # ######################################################################################################## #### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/" vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/out_manual" out_folder = "./examples/results/TCGA_GA/tmp/out_manual"
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_GA #### Indel TCGA_GA
...@@ -61,7 +61,7 @@ def test_manual(): ...@@ -61,7 +61,7 @@ def test_manual():
#### # ######################################################################################################## #### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/" vcf_folder = "./examples/data/TCGA_HS/"
out_folder = "./examples/results/TCGA_HS/out_manual" out_folder = "./examples/results/TCGA_HS/tmp/out_manual"
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_HS #### Indel TCGA_HS
......
...@@ -24,8 +24,8 @@ def test_vcf2maf(): ...@@ -24,8 +24,8 @@ def test_vcf2maf():
#### # ######################################################################################################## #### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/" vcf_folder = "./examples/data/TCGA_GA/"
tmp_folder = "./examples/results/TCGA_GA/tmp_vcf2maf" tmp_folder = "./examples/results/TCGA_GA/tmp/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_GA/out_vcf2maf" out_folder = "./examples/results/TCGA_GA/tmp/out_vcf2maf"
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_GA #### Indel TCGA_GA
...@@ -68,8 +68,8 @@ def test_vcf2maf(): ...@@ -68,8 +68,8 @@ def test_vcf2maf():
#### # ######################################################################################################## #### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/" vcf_folder = "./examples/data/TCGA_HS/"
tmp_folder = "./examples/results/TCGA_HS/tmp_vcf2maf" tmp_folder = "./examples/results/TCGA_HS/tmp/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_HS/out_vcf2maf" out_folder = "./examples/results/TCGA_HS/tmp/out_vcf2maf"
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_HS #### Indel TCGA_HS
......
...@@ -23,7 +23,7 @@ def test_vep(): ...@@ -23,7 +23,7 @@ def test_vep():
#### # ######################################################################################################## #### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/" vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/out_vep" out_folder = "./examples/results/TCGA_GA/tmp/out_vep"
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)