Commit 5a5941bb authored by Pradat Yoann's avatar Pradat Yoann

clean main

parent 97a45912
......@@ -252,100 +252,3 @@ dt_identifiers: dict=None):
index = False
print("maf file saved at %s" % maf_out_path, flush=True)
def run_annotator_all(i_split: int, n_split: int, vcf2maf: str, vep_folder: str, vep_data: str, fasta: str, vcf_folder: str,
out_folder: str, vcf_list_path: str=None, vcf_meta_path: str=None):
Run the manual, vcf2maf and vep annotations and assemble.
i_split: int
the split processed.
n_split: int
the number of splits for processing a set of vcf files
vcf2maf: str
path to the vcf2maf perl script
vep_folder: str
path to the folder where the vep command is
vep_data: str
path to the .vep data where the reference genome is located
fasta: str
relative path to fasta file from vep_folder
vcf_folder: str
path to the folder where the vcf files are
out_folder: str
path to the folder where subfolders with output results will be saved
vcf_list_path: str, optional
path to the file containing the list of vcf files to be processed. If not specified, all vcf files found in the
vcf_folder are processed.
vcf_meta_path: str, optional
path to the file that contain the names of the tumor and normal columns for each vcf file. The file must be a
.txt file containing 3 columns: "vcf_name", "normal_column", "tumor_column". If not specified,
all vcf files must have columns with the names NORMAL and PRIMARY.
#### paths to results folders
dt_folders = {
'manual_out_folder' : os.path.join(out_folder, "tmp/manual/out"),
'vcf2maf_tmp_folder' : os.path.join(out_folder, "tmp/vcf2maf/tmp"),
'vcf2maf_out_folder' : os.path.join(out_folder, "tmp/vcf2maf/out"),
'vep_out_folder' : os.path.join(out_folder, "tmp/vep/out"),
'maf_folder' : os.path.join(out_folder, "tmp/maf"),
#### make folders if they do not exist already
for k, v in dt_folders.items():
os.makedirs(v, exist_ok=True)
#### load meta data
if vcf_meta_path is not None:
df_meta = pd.read_csv(
filepath_or_buffer = vcf_meta_path,
sep = "\t"
if vcf_list_path is not None:
with open(vcf_list_path) as file:
vcf_files =
vcf_files = [x for x in os.listdir(vcf_folder) if x.endswith(".vcf")]
#### get list of vcfs for the split
count_one_split = len(vcf_files)//args.n_split
if args.i_split == args.n_split:
vcf_files = vcf_files[(args.i_split-1)*count_one_split:]
vcf_files = vcf_files[(args.i_split-1)*count_one_split:args.i_split*count_one_split]
count = 0
count_total = len(vcf_files)
#### loop over the list
for vcf_file in vcf_files:
col_normal = "NORMAL"
col_tumor = "PRIMARY"
normal_id = "TCGA-A1-A0SD-10A-01D-A110-09"
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
infos_n_reads = ["AD", "DP4", "DP", "TAR", "TIR"]
infos_other = ["SS", "GT"]
vcf_folder = vcf_folder,
vcf_file = vcf_file,
col_normal = col_normal,
col_tumor = col_tumor,
normal_id = normal_id,
tumor_id = tumor_id,
infos_n_reads = infos_n_reads,
infos_other = infos_other,
vcf2maf = vcf2maf,
vep_folder = vep_folder,
vep_data = vep_data,
fasta = fasta,
dt_folders = dt_folders
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment