Commit e734d1b2 authored by Pradat Yoann's avatar Pradat Yoann
Browse files

add --cache to VEP and vep_n_fork argument to run_annotator; examples ran succesfully

parent d5d0c44d
Hugo_Symbol Entrez_Gene_Id NCBI_Build Chromosome Start_Position End_Position Variant_Quality Filter Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 HGVSc HGVSp HGVSp_Short all_effects Location Gene Feature Feature_type CANONICAL cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Consequence IMPACT STRAND SYMBOL_SOURCE HGNC_ID BIOTYPE CCDS ENSP SWISSPROT TREMBL UNIPARC EXON INTRON AF gnomAD_AF MAX_AF MAX_AF_POPS n_GT n_SS n_FA n_DP n_AD t_GT t_SS t_FA t_DP t_AD Tumor_Sample Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Tumor_Sample_Site
FAM131C 348487 GRCh37 1 16386305 16386306 50 PASS Intron INS - - C rs542191066 - - c.451+58dup FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; 1:16386305-16386306 ENSG00000185519 ENST00000375662.4 Transcript YES - - - - - rs542191066 intron_variant MODIFIER -1 HGNC 26717 protein_coding CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 5/6 0.3678 0.5051 SAS 0/0 2 0.0 11 11,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
ZIC4 84107 GRCh37 3 147121630 147121631 50 PASS Intron DEL TC TC - rs142316820 TC TC c.135+120_135+121del ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; 3:147121630-147121631 ENSG00000174963 ENST00000525172.2 Transcript YES - - - - - rs142316820 intron_variant MODIFIER -1 HGNC 20393 protein_coding CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 1/4 0/0 2 0.0 6 6,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
EIF4G1 1981 GRCh37 3 184043926 184043927 50 PASS Intron DEL AC AC - rs34901174 AC AC c.3243+217_3243+218del EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; 3:184043926-184043927 ENSG00000114867 ENST00000424196.1 Transcript YES - - - - - rs34901174 intron_variant MODIFIER 1 HGNC 3296 protein_coding CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 0/0 2 0.0 7 7,0 0/1 2 0.667 6 2,4 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
STEAP1B 256227 GRCh37 7 22533452 22533453 50 PASS Frame_Shift_Del DEL CA CA - novel CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; 7:22533452-22533453 ENSG00000105889 ENST00000404369.4 Transcript YES 503-504 87-88 29-30 HE/QX caTGag/caag - frameshift_variant,splice_region_variant HIGH -1 HGNC 41907 protein_coding CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 3/5 0/0 2 0.0 22 22,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
TEX12 56158 GRCh37 11 112042480 112042480 50 PASS Intron DEL T T - rs1225064086 T T c.228-9del TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; 11:112042480 ENSG00000150783 ENST00000280358.4 Transcript YES - - - - - rs1225064086 intron_variant MODIFIER 1 HGNC 11734 protein_coding CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 4/4 1.711e-05 8.319e-05 gnomAD_AFR 0/0 2 0.0 31 31,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
KMT2D 8085 GRCh37 12 49431403 49431404 50 PASS Frame_Shift_Ins INS - - T novel - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; 12:49431403-49431404 ENSG00000167548 ENST00000301067.7 Transcript YES 9735-9736 9735-9736 3245-3246 -/X -/A - frameshift_variant HIGH -1 HGNC 7133 protein_coding CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 34/54 0/0 2 0.0 48 48,0 0/1 2 0.212 33 26,7 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
PDS5B 23047 GRCh37 13 33332314 33332314 50 PASS Frame_Shift_Del DEL A A - novel A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; 13:33332314 ENSG00000083642 ENST00000315596.10 Transcript YES 3332 3146 1049 Q/X cAa/ca - frameshift_variant HIGH 1 HGNC 20418 protein_coding CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 27/35 0/0 2 0.0 47 47,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
CCR7 1236 GRCh37 17 38712161 38712161 50 PASS Intron DEL T T - rs532551852 T T c.61-91del CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; 17:38712161 ENSG00000126353 ENST00000246657.2 Transcript YES - - - - - rs532551852 intron_variant MODIFIER -1 HGNC 1608 protein_coding CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F 2/2 0.0008 0.004 EAS 0/0 2 0.0 5 5,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
ATP9A 10079 GRCh37 20 50342307 50342308 50 PASS Intron DEL TC TC - novel TC TC c.327+50_327+51del ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001225305,; 20:50342307-50342308 ENSG00000054793 ENST00000338821.5 Transcript YES - - - - - - intron_variant MODIFIER -1 HGNC 13540 protein_coding CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 3/27 0/0 2 0.0 22 22,0 0/1 2 0.4 15 9,6 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
Hugo_Symbol Entrez_Gene_Id NCBI_Build Chromosome Start_Position End_Position Variant_Quality Filter Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 HGVSc HGVSp HGVSp_Short Transcript_ID all_effects Location Gene Feature Feature_type CANONICAL cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Consequence IMPACT STRAND SYMBOL_SOURCE HGNC_ID BIOTYPE CCDS ENSP SWISSPROT TREMBL UNIPARC EXON INTRON gnomAD_AF MAX_AF MAX_AF_POPS n_GT n_SS n_FA n_DP n_AD t_GT t_SS t_FA t_DP t_AD Tumor_Sample Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Tumor_Sample_Site
FAM131C 348487 GRCh37 1 16386305 16386306 50 PASS Intron INS - - C rs372070031 - - c.451+58dup ENST00000375662 FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; 1:16386305-16386306 ENSG00000185519 ENST00000375662.4 Transcript YES - - - - - rs372070031 intron_variant MODIFIER -1 HGNC 26717 protein_coding CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 5/6 0.5051 SAS 0/0 2 0.0 11 11,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
ZIC4 84107 GRCh37 3 147121630 147121631 50 PASS Intron DEL TC TC - rs142316820 TC TC c.135+120_135+121del ENST00000525172 ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; 3:147121630-147121631 ENSG00000174963 ENST00000525172.2 Transcript YES - - - - - rs142316820 intron_variant MODIFIER -1 HGNC 20393 protein_coding CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 1/4 0/0 2 0.0 6 6,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
EIF4G1 1981 GRCh37 3 184043926 184043927 50 PASS Intron DEL AC AC - rs34901174 AC AC c.3243+217_3243+218del ENST00000424196 EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; 3:184043926-184043927 ENSG00000114867 ENST00000424196.1 Transcript YES - - - - - rs34901174 intron_variant MODIFIER 1 HGNC 3296 protein_coding CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 0/0 2 0.0 7 7,0 0/1 2 0.667 6 2,4 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
STEAP1B 256227 GRCh37 7 22533452 22533453 50 PASS Frame_Shift_Del DEL CA CA - novel CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 ENST00000404369 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; 7:22533452-22533453 ENSG00000105889 ENST00000404369.4 Transcript YES 503-504 87-88 29-30 HE/QX caTGag/caag - frameshift_variant,splice_region_variant HIGH -1 HGNC 41907 protein_coding CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 3/5 0/0 2 0.0 22 22,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
TEX12 56158 GRCh37 11 112042480 112042480 50 PASS Intron DEL T T - rs1225064086 T T c.228-9del ENST00000280358 TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; 11:112042480 ENSG00000150783 ENST00000280358.4 Transcript YES - - - - - rs1225064086 intron_variant MODIFIER 1 HGNC 11734 protein_coding CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 4/4 1.711e-05 8.319e-05 gnomAD_AFR 0/0 2 0.0 31 31,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
KMT2D 8085 GRCh37 12 49431403 49431404 50 PASS Frame_Shift_Ins INS - - T novel - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 ENST00000301067 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; 12:49431403-49431404 ENSG00000167548 ENST00000301067.7 Transcript YES 9735-9736 9735-9736 3245-3246 -/X -/A - frameshift_variant HIGH -1 HGNC 7133 protein_coding CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 34/54 0/0 2 0.0 48 48,0 0/1 2 0.212 33 26,7 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
PDS5B 23047 GRCh37 13 33332314 33332314 50 PASS Frame_Shift_Del DEL A A - novel A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 ENST00000315596 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; 13:33332314 ENSG00000083642 ENST00000315596.10 Transcript YES 3332 3146 1049 Q/X cAa/ca - frameshift_variant HIGH 1 HGNC 20418 protein_coding CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 27/35 0/0 2 0.0 47 47,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
CCR7 1236 GRCh37 17 38712161 38712161 50 PASS Intron DEL T T - rs372297045 T T c.61-91del ENST00000246657 CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; 17:38712161 ENSG00000126353 ENST00000246657.2 Transcript YES - - - - - rs372297045 intron_variant MODIFIER -1 HGNC 1608 protein_coding CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F 2/2 0.004 EAS 0/0 2 0.0 5 5,0 0/1 2 0.33299999999999996 6 4,2 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
ATP9A 10079 GRCh37 20 50342307 50342308 50 PASS Intron DEL TC TC - novel TC TC c.327+50_327+51del ENST00000338821 ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001644001,; 20:50342307-50342308 ENSG00000054793 ENST00000338821.5 Transcript YES - - - - - - intron_variant MODIFIER -1 HGNC 13540 protein_coding CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 3/27 0/0 2 0.0 22 22,0 0/1 2 0.4 15 9,6 TCGA-A1-A0SB TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 01
#version 2.4
Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps vcf_id vcf_qual ExAC_AF_Adj ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos
FAM131C 348487 . GRCh37 1 16386305 16386306 + Intron INS - - C rs542191066 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 - - c.451+58dup ENST00000375662 FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; C ENSG00000185519 ENST00000375662 Transcript intron_variant rs542191066 1 -1 FAM131C HGNC 26717 protein_coding YES CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 NM_182623.2 5/6 0.3678 0.2731 0.3573 0.3581 0.3757 0.5051 MODIFIER 1 insertion PASS GGC rs143272992 50 16386305
ZIC4 84107 . GRCh37 3 147121630 147121631 + Intron DEL TC TC - rs142316820 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 TC TC c.135+120_135+121del ENST00000525172 ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; - ENSG00000174963 ENST00000525172 Transcript intron_variant rs142316820 1 -1 ZIC4 HGNC 20393 protein_coding YES CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 NM_001168378.1 1/4 MODIFIER 1 deletion 1 PASS GATCT . 50 147121629
EIF4G1 1981 . GRCh37 3 184043926 184043927 + Intron DEL AC AC - rs34901174 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 AC AC c.3243+217_3243+218del ENST00000424196 EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; - ENSG00000114867 ENST00000424196 Transcript intron_variant rs34901174 1 1 EIF4G1 HGNC 3296 protein_coding YES CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 MODIFIER 1 deletion 30 1 PASS AAACA rs112208190 50 184043925
STEAP1B 256227 . GRCh37 7 22533452 22533453 + Frame_Shift_Del DEL CA CA - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 ENST00000404369 3/5 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; - ENSG00000105889 ENST00000404369 Transcript frameshift_variant,splice_region_variant 503-504/1515 87-88/1029 29-30/342 HE/QX caTGag/caag 1 -1 STEAP1B HGNC 41907 protein_coding YES CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 NM_001164460.1 3/5 hmmpanther:PTHR14239:SF3,hmmpanther:PTHR14239 HIGH 1 deletion PASS CTCAT rs116873396 50 22533451
TEX12 56158 . GRCh37 11 112042480 112042480 + Intron DEL T T - rs1225064086 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 T T c.228-9del ENST00000280358 TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; - ENSG00000150783 ENST00000280358 Transcript intron_variant rs1225064086 1 1 TEX12 HGNC 11734 protein_coding YES CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 NM_031275.4 4/4 MODIFIER 1 deletion 6 PASS ACTT . 50 1.711e-05 8.319e-05 8.285e-05 1.133e-05 112042479
KMT2D 8085 . GRCh37 12 49431403 49431404 + Frame_Shift_Ins INS - - T novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 ENST00000301067 34/54 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; T ENSG00000167548 ENST00000301067 Transcript frameshift_variant 9735-9736/19419 9735-9736/16614 3245-3246/5537 -/X -/A 1 -1 KMT2D HGNC 7133 protein_coding YES CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 NM_003482.3 34/54 hmmpanther:PTHR22884,hmmpanther:PTHR22884:SF324 HIGH 1 insertion 1 PASS GGT . 50 49431403
PDS5B 23047 . GRCh37 13 33332314 33332314 + Frame_Shift_Del DEL A A - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 ENST00000315596 27/35 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; - ENSG00000083642 ENST00000315596 Transcript frameshift_variant 3332/7497 3146/4344 1049/1447 Q/X cAa/ca 1 1 PDS5B HGNC 20418 protein_coding YES CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 NM_015032.3 27/35 hmmpanther:PTHR12663,hmmpanther:PTHR12663:SF1 HIGH 1 deletion 2 PASS ACAA . 50 33332313
CCR7 1236 . GRCh37 17 38712161 38712161 + Intron DEL T T - rs532551852 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 T T c.61-91del ENST00000246657 CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; - ENSG00000126353 ENST00000246657 Transcript intron_variant rs532551852 1 -1 CCR7 HGNC 1608 protein_coding YES CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F NM_001838.3 2/2 0.0008 0.004 MODIFIER 1 deletion 1 PASS TCTT . 50 38712160
ATP9A 10079 . GRCh37 20 50342307 50342308 + Intron DEL TC TC - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 TC TC c.327+50_327+51del ENST00000338821 ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001225305,; - ENSG00000054793 ENST00000338821 Transcript intron_variant 1 -1 ATP9A HGNC 13540 protein_coding YES CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 NM_006045.1 3/27 MODIFIER 1 deletion PASS TTTCT . 50 50342306
FAM131C 348487 . GRCh37 1 16386305 16386306 + Intron INS - - C rs372070031 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 - - c.451+58dup ENST00000375662 FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; C ENSG00000185519 ENST00000375662 Transcript intron_variant rs372070031 1 -1 FAM131C HGNC 26717 protein_coding YES CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 NM_182623.2 5/6 0.2731 0.3573 0.3581 0.3757 0.5051 MODIFIER 1 insertion PASS GGC rs143272992 50 16386305
ZIC4 84107 . GRCh37 3 147121630 147121631 + Intron DEL TC TC - rs142316820 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 TC TC c.135+120_135+121del ENST00000525172 ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; - ENSG00000174963 ENST00000525172 Transcript intron_variant rs142316820 1 -1 ZIC4 HGNC 20393 protein_coding YES CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 NM_001168378.1 1/4 MODIFIER 1 deletion 1 PASS GATCT . 50 147121629
EIF4G1 1981 . GRCh37 3 184043926 184043927 + Intron DEL AC AC - rs34901174 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 AC AC c.3243+217_3243+218del ENST00000424196 EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; - ENSG00000114867 ENST00000424196 Transcript intron_variant rs34901174 1 1 EIF4G1 HGNC 3296 protein_coding YES CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 MODIFIER 1 deletion 1 PASS AAACA rs112208190 50 184043925
STEAP1B 256227 . GRCh37 7 22533452 22533453 + Frame_Shift_Del DEL CA CA - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 ENST00000404369 3/5 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; - ENSG00000105889 ENST00000404369 Transcript frameshift_variant,splice_region_variant 503-504/1515 87-88/1029 29-30/342 HE/QX caTGag/caag 1 -1 STEAP1B HGNC 41907 protein_coding YES CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 NM_001164460.1 3/5 PANTHER:PTHR14239:SF3,PANTHER:PTHR14239 HIGH 1 deletion PASS CTCAT rs116873396 50 22533451
TEX12 56158 . GRCh37 11 112042480 112042480 + Intron DEL T T - rs1225064086 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 T T c.228-9del ENST00000280358 TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; - ENSG00000150783 ENST00000280358 Transcript intron_variant rs1225064086 1 1 TEX12 HGNC 11734 protein_coding YES CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 NM_031275.4 4/4 MODIFIER 1 deletion PASS ACTT . 50 1.711e-05 8.319e-05 8.285e-05 1.133e-05 112042479
KMT2D 8085 . GRCh37 12 49431403 49431404 + Frame_Shift_Ins INS - - T novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 ENST00000301067 34/54 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; T ENSG00000167548 ENST00000301067 Transcript frameshift_variant 9735-9736/19419 9735-9736/16614 3245-3246/5537 -/X -/A 1 -1 KMT2D HGNC 7133 protein_coding YES CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 NM_003482.3 34/54 PANTHER:PTHR22884,PANTHER:PTHR22884:SF324 HIGH 1 insertion 1 PASS GGT . 50 49431403
PDS5B 23047 . GRCh37 13 33332314 33332314 + Frame_Shift_Del DEL A A - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 ENST00000315596 27/35 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; - ENSG00000083642 ENST00000315596 Transcript frameshift_variant 3332/7497 3146/4344 1049/1447 Q/X cAa/ca 1 1 PDS5B HGNC 20418 protein_coding YES CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 NM_015032.3 27/35 PANTHER:PTHR12663,PANTHER:PTHR12663:SF1 HIGH 1 deletion 2 PASS ACAA . 50 33332313
CCR7 1236 . GRCh37 17 38712161 38712161 + Intron DEL T T - rs372297045 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 T T c.61-91del ENST00000246657 CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; - ENSG00000126353 ENST00000246657 Transcript intron_variant rs372297045 1 -1 CCR7 HGNC 1608 protein_coding YES CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F NM_001838.3 2/2 0.004 MODIFIER 1 deletion 1 PASS TCTT . 50 38712160
ATP9A 10079 . GRCh37 20 50342307 50342308 + Intron DEL TC TC - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 TC TC c.327+50_327+51del ENST00000338821 ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001644001,; - ENSG00000054793 ENST00000338821 Transcript intron_variant 1 -1 ATP9A HGNC 13540 protein_coding YES CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 NM_006045.1 3/27 MODIFIER 1 deletion PASS TTTCT . 50 50342306
## ENSEMBL VARIANT EFFECT PREDICTOR v101.0
## Output produced at 2020-08-28 09:53:06
## Using cache in /Users/ypradat/.vep/homo_sapiens/101_GRCh37
## Using API version 101, DB version ?
## ensembl-variation version 101.50e7372
## ensembl-funcgen version 101.b918a49
## ensembl version 101.856c8e8
## ensembl-io version 101.943b6c2
## HGMD-PUBLIC version 20194
## sift version sift5.2.2
## gencode version GENCODE 19
## dbSNP version 153
## regbuild version 1.0
## 1000genomes version phase3
## COSMIC version 90
## ESP version 20141103
## genebuild version 2011-04
## gnomAD version r2.1
## ClinVar version 201912
## polyphen version 2.2.2
## assembly version GRCh37.p13
## Column descriptions:
## Uploaded_variation : Identifier of uploaded variant
## Location : Location of variant in standard coordinate format (chr:start or chr:start-end)
## Allele : The variant allele used to calculate the consequence
## Gene : Stable ID of affected gene
## Feature : Stable ID of feature
## Feature_type : Type of feature - Transcript, RegulatoryFeature or MotifFeature
## Consequence : Consequence type
## cDNA_position : Relative position of base pair in cDNA sequence
## CDS_position : Relative position of base pair in coding sequence
## Protein_position : Relative position of amino acid in protein
## Amino_acids : Reference and variant amino acids
## Codons : Reference and variant codon sequence
## Existing_variation : Identifier(s) of co-located known variants
## Extra column keys:
## IMPACT : Subjective impact classification of consequence type
## DISTANCE : Shortest distance from variant to transcript
## STRAND : Strand of the feature (1/-1)
## FLAGS : Transcript quality flags
## SYMBOL : Gene symbol (e.g. HGNC)
## SYMBOL_SOURCE : Source of gene symbol
## HGNC_ID : Stable identifer of HGNC gene symbol
## BIOTYPE : Biotype of transcript or regulatory feature
## CANONICAL : Indicates if transcript is canonical for this gene
## MANE : MANE (Matched Annotation by NCBI and EMBL-EBI) Transcript
## TSL : Transcript support level
## APPRIS : Annotates alternatively spliced transcripts as primary or alternate based on a range of computational methods
## CCDS : Indicates if transcript is a CCDS transcript
## ENSP : Protein identifer
## SWISSPROT : UniProtKB/Swiss-Prot accession
## TREMBL : UniProtKB/TrEMBL accession
## UNIPARC : UniParc accession
## SIFT : SIFT prediction and/or score
## PolyPhen : PolyPhen prediction and/or score
## EXON : Exon number(s) / total
## INTRON : Intron number(s) / total
## HGVSc : HGVS coding sequence name
## HGVSp : HGVS protein sequence name
## HGVS_OFFSET : Indicates by how many bases the HGVS notations for this variant have been shifted
## AF : Frequency of existing variant in 1000 Genomes combined population
## AFR_AF : Frequency of existing variant in 1000 Genomes combined African population
## AMR_AF : Frequency of existing variant in 1000 Genomes combined American population
## EAS_AF : Frequency of existing variant in 1000 Genomes combined East Asian population
## EUR_AF : Frequency of existing variant in 1000 Genomes combined European population
## SAS_AF : Frequency of existing variant in 1000 Genomes combined South Asian population
## AA_AF : Frequency of existing variant in NHLBI-ESP African American population
## EA_AF : Frequency of existing variant in NHLBI-ESP European American population
## gnomAD_AF : Frequency of existing variant in gnomAD exomes combined population
## gnomAD_AFR_AF : Frequency of existing variant in gnomAD exomes African/American population
## gnomAD_AMR_AF : Frequency of existing variant in gnomAD exomes American population
## gnomAD_ASJ_AF : Frequency of existing variant in gnomAD exomes Ashkenazi Jewish population
## gnomAD_EAS_AF : Frequency of existing variant in gnomAD exomes East Asian population
## gnomAD_FIN_AF : Frequency of existing variant in gnomAD exomes Finnish population
## gnomAD_NFE_AF : Frequency of existing variant in gnomAD exomes Non-Finnish European population
## gnomAD_OTH_AF : Frequency of existing variant in gnomAD exomes other combined populations
## gnomAD_SAS_AF : Frequency of existing variant in gnomAD exomes South Asian population
## MAX_AF : Maximum observed allele frequency in 1000 Genomes, ESP and ExAC/gnomAD
## MAX_AF_POPS : Populations in which maximum allele frequency was observed
## CLIN_SIG : ClinVar clinical significance of the dbSNP variant
## SOMATIC : Somatic status of existing variant
## PHENO : Indicates if existing variant(s) is associated with a phenotype, disease or trait; multiple values correspond to multiple variants
## PUBMED : Pubmed ID(s) of publications that cite existing variant
## MOTIF_NAME : The stable identifier of a transcription factor binding profile (TFBP) aligned at this position
## MOTIF_POS : The relative position of the variation in the aligned TFBP
## HIGH_INF_POS : A flag indicating if the variant falls in a high information position of the TFBP
## MOTIF_SCORE_CHANGE : The difference in motif score of the reference and variant sequences for the TFBP
## TRANSCRIPTION_FACTORS : List of transcription factors which bind to the transcription factor binding profile
#Uploaded_variation Location Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Extra
rs143272992 1:16386305-16386306 C ENSG00000185519 ENST00000375662.4 Transcript intron_variant - - - - - rs372070031 IMPACT=MODIFIER;STRAND=-1;SYMBOL=FAM131C;SYMBOL_SOURCE=HGNC;HGNC_ID=26717;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS41270.1;ENSP=ENSP00000364814;SWISSPROT=Q96AQ9;UNIPARC=UPI000022B016;INTRON=5/6;HGVSc=ENST00000375662.4:c.451+58dup;AFR_AF=0.2731;AMR_AF=0.3573;EAS_AF=0.3581;EUR_AF=0.3757;SAS_AF=0.5051;MAX_AF=0.5051;MAX_AF_POPS=SAS
3_147121630_TC/- 3:147121630-147121631 - ENSG00000174963 ENST00000525172.2 Transcript intron_variant - - - - - rs142316820 IMPACT=MODIFIER;STRAND=-1;SYMBOL=ZIC4;SYMBOL_SOURCE=HGNC;HGNC_ID=20393;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS54652.1;ENSP=ENSP00000435509;SWISSPROT=Q8N9L1;TREMBL=C9JZU7,C9JD04,C9J6T3,B3KPI4;UNIPARC=UPI0001914D88;INTRON=1/4;HGVSc=ENST00000525172.2:c.135+120_135+121del
rs112208190 3:184043926-184043927 - ENSG00000114867 ENST00000424196.1 Transcript intron_variant - - - - - rs34901174 IMPACT=MODIFIER;STRAND=1;SYMBOL=EIF4G1;SYMBOL_SOURCE=HGNC;HGNC_ID=3296;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS54687.1;ENSP=ENSP00000416255;SWISSPROT=Q04637;TREMBL=Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556;UNIPARC=UPI00015E0966;INTRON=20/31;HGVSc=ENST00000424196.1:c.3243+217_3243+218del
rs116873396 7:22533452-22533453 - ENSG00000105889 ENST00000404369.4 Transcript frameshift_variant,splice_region_variant 503-504 87-88 29-30 HE/QX caTGag/caag - IMPACT=HIGH;STRAND=-1;SYMBOL=STEAP1B;SYMBOL_SOURCE=HGNC;HGNC_ID=41907;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS56469.1;ENSP=ENSP00000384370;TREMBL=C9JL51,C9JE84,B5MCI2;UNIPARC=UPI000173A267;EXON=3/5;HGVSc=ENST00000404369.4:c.87_88del;HGVSp=ENSP00000384370.4:p.His29GlnfsTer24
11_112042480_T/- 11:112042480 - ENSG00000150783 ENST00000280358.4 Transcript intron_variant - - - - - rs1225064086 IMPACT=MODIFIER;STRAND=1;SYMBOL=TEX12;SYMBOL_SOURCE=HGNC;HGNC_ID=11734;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS31679.1;ENSP=ENSP00000280358;SWISSPROT=Q9BXU0;UNIPARC=UPI00001377E3;INTRON=4/4;HGVSc=ENST00000280358.4:c.228-9del;gnomAD_AF=1.711e-05;gnomAD_AFR_AF=8.319e-05;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=8.285e-05;gnomAD_FIN_AF=0;gnomAD_NFE_AF=1.133e-05;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=8.319e-05;MAX_AF_POPS=gnomAD_AFR
12_49431404_-/T 12:49431403-49431404 T ENSG00000167548 ENST00000301067.7 Transcript frameshift_variant 9735-9736 9735-9736 3245-3246 -/X -/A - IMPACT=HIGH;STRAND=-1;SYMBOL=KMT2D;SYMBOL_SOURCE=HGNC;HGNC_ID=7133;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS44873.1;ENSP=ENSP00000301067;SWISSPROT=O14686;TREMBL=Q6PIA1,Q59FG6,F8VWW4;UNIPARC=UPI0000EE84D6;EXON=34/54;HGVSc=ENST00000301067.7:c.9735dup;HGVSp=ENSP00000301067.7:p.Pro3246ThrfsTer5
13_33332314_A/- 13:33332314 - ENSG00000083642 ENST00000315596.10 Transcript frameshift_variant 3332 3146 1049 Q/X cAa/ca - IMPACT=HIGH;STRAND=1;SYMBOL=PDS5B;SYMBOL_SOURCE=HGNC;HGNC_ID=20418;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS41878.1;ENSP=ENSP00000313851;SWISSPROT=Q9NTI5;UNIPARC=UPI000006D4A9;EXON=27/35;HGVSc=ENST00000315596.10:c.3148del;HGVSp=ENSP00000313851.10:p.Thr1050GlnfsTer12;HGVS_OFFSET=2
17_38712161_T/- 17:38712161 - ENSG00000126353 ENST00000246657.2 Transcript intron_variant - - - - - rs372297045 IMPACT=MODIFIER;STRAND=-1;SYMBOL=CCR7;SYMBOL_SOURCE=HGNC;HGNC_ID=1608;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS11369.1;ENSP=ENSP00000246657;SWISSPROT=P32248;TREMBL=J3KTN5,J3KSS9,A0N0Q0;UNIPARC=UPI0000001C2F;INTRON=2/2;HGVSc=ENST00000246657.2:c.61-91del;AFR_AF=0;AMR_AF=0;EAS_AF=0.004;EUR_AF=0;SAS_AF=0;MAX_AF=0.004;MAX_AF_POPS=EAS
20_50342307_TC/- 20:50342307-50342308 - ENSG00000054793 ENST00000338821.5 Transcript intron_variant - - - - - - IMPACT=MODIFIER;STRAND=-1;SYMBOL=ATP9A;SYMBOL_SOURCE=HGNC;HGNC_ID=13540;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS33489.1;ENSP=ENSP00000342481;SWISSPROT=O75110;TREMBL=Q2NLD0,B4DR18;UNIPARC=UPI000004D334;INTRON=3/27;HGVSc=ENST00000338821.5:c.327+50_327+51del
......@@ -15,10 +15,11 @@ Example
python examples/run_example_tcga_GA.py \
--i_split 1 \
--n_split 1 \
--vcf2maf ~/Documents/biotools/informatics/VCF/mskcc-vcf2maf-5453f80/vcf2maf.pl \
--vcf2maf ~/Documents/biotools/informatics/VCF/vcf2maf/vcf2maf.pl \
--vep_folder ~/Documents/biotools/informatics/VCF/ensembl-vep \
--vep_data ~/.vep \
--fasta ~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa
--vep_n_fork 4 \
--fasta ~/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa
"""
import argparse
import os
......@@ -39,6 +40,7 @@ parser.add_argument('--n_split' , type=int , default=1 , help='total nu
parser.add_argument('--vcf2maf' , type=str , default="" , help='path to the vcf2maf perl script')
parser.add_argument('--vep_folder' , type=str , default="" , help='path to the folder of the vep command')
parser.add_argument('--vep_data' , type=str , default="" , help='path to the .vep data folder')
parser.add_argument('--vep_n_fork' , type=int , default=4 , help='number of forks to be used by VEP')
parser.add_argument('--fasta' , type=str , default="" , help='path to reference genome FASTA file')
args = parser.parse_args()
......@@ -119,26 +121,28 @@ if __name__ == "__main__":
col_tumor = "PRIMARY"
else:
col_tumor = "METASTATIC"
normal_id = dt_identifiers["Matched_Norm_Sample_Barcode"],
tumor_id = dt_identifiers["Tumor_Sample_Barcode"],
normal_id = dt_identifiers["Matched_Norm_Sample_Barcode"]
tumor_id = dt_identifiers["Tumor_Sample_Barcode"]
infos_n_reads = ["AD", "DP", "FA"]
infos_other = ["SS", "GT"]
run_annotator(
vcf_folder = vcf_folder,
vcf_file = vcf_file,
col_normal = col_normal,
col_tumor = col_tumor,
normal_id = normal_id,
tumor_id = tumor_id,
infos_n_reads = infos_n_reads,
infos_other = infos_other,
vcf2maf = args.vcf2maf,
vep_folder = args.vep_folder,
vep_data = args.vep_data,
vep_custom = "~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN",
vep_overwrite = True,
fasta = args.fasta,
dt_folders = dt_folders,
dt_identifiers = dt_identifiers
vcf_folder = vcf_folder,
vcf_file = vcf_file,
col_normal = col_normal,
col_tumor = col_tumor,
normal_id = normal_id,
tumor_id = tumor_id,
infos_n_reads = infos_n_reads,
infos_other = infos_other,
vcf2maf = args.vcf2maf,
vep_folder = args.vep_folder,
vep_data = args.vep_data,
# vep_custom = "~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN",
vep_n_fork = args.vep_n_fork,
vep_overwrite = True,
vcf2maf_overwrite = True,
fasta = args.fasta,
dt_folders = dt_folders,
dt_identifiers = dt_identifiers
)
......@@ -27,7 +27,7 @@ DataFrame = pd.core.frame.DataFrame
def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: str, tumor_id: str, normal_id: str,
infos_n_reads: list, infos_other: list, vcf2maf: str, vep_folder: str, vep_data: str, fasta: str,
dt_folders: dict, dt_identifiers: dict=None, vep_custom: Union[str,list]=None,
vep_overwrite: bool=False, vcf2maf_overwrite: bool=False):
vep_overwrite: bool=False, vep_n_fork: int=4, vcf2maf_overwrite: bool=False):
"""
Run the manual, vcf2maf and vep annotations on one VCF file and assemble.
......@@ -58,6 +58,8 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
'--custom ~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN'
vep_overwrite: bool, optional.
set to True to overwrite any existing previous run of VEP.
vep_n_fork: int, optional.
number of forks to be used when running VEP.
fasta: str
relative path to fasta file from vep_folder
vcf_folder: str
......@@ -112,6 +114,7 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
fasta = fasta,
vep_custom = vep_custom,
overwrite = vep_overwrite,
vep_n_fork = vep_n_fork
)
#### # 2. ASSEMBLE ANNOTATIONS
......
......@@ -42,13 +42,24 @@ tumor_id: str, normal_id: str, fasta: str, overwrite: bool=False):
if the output file already exists (from previous run), should it be overwritten?
"""
need_run = True
vcf_file = out_path.split("/")[-1]
tmp_file = vcf_file.replace(".txt", ".vep.vcf")
tmp_path = os.path.join(tmp_folder, tmp_file)
if os.path.exists(out_path) and not overwrite:
need_run = False
elif os.path.exists(out_path):
os.remove(out_path)
if need_run:
print("STATUS: RUNNING VCF2MAF")
if os.path.exists(tmp_path):
os.remove(tmp_path)
print("removed existing file: %s" % tmp_path)
if os.path.exists(out_path):
os.remove(out_path)
print("removed existing file: %s" % out_path)
os.system('perl %s \
--input-vcf %s \
--output-maf %s \
......
......@@ -14,7 +14,7 @@ Python wrapper around VEP command.
import os
from typing import Union
def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: str, fasta: str, vep_custom: Union[str,list]=None, overwrite: bool=False):
def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: str, fasta: str, vep_custom: Union[str,list]=None, overwrite: bool=False, vep_n_fork: int=4):
"""
Run variant ensembl predictor alone with custom options. See options details at
https://www.ensembl.org/info/docs/tools/vep/script/vep_options.html#opt_af
......@@ -34,18 +34,24 @@ def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: s
vep_custom: str or list
additional options to add to the vep cmd. For instance
'~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN'
overwrite: bool
overwrite: bool, optional.
if the output file already exists (from previous run), should it be overwritten?
vep_n_fork: int, optional.
number of forks to be used when running VEP.
"""
vep = os.path.join(vep_folder, "vep")
need_run = True
if os.path.exists(out_path) and not overwrite:
need_run = False
elif os.path.exists(out_path):
os.remove(out_path)
if need_run:
print("STATUS: RUNNING VEP")
if os.path.exists(out_path):
os.remove(out_path)
print("removed existing file: %s" % out_path)
cmd = """%s \
--dir %s \
--af \
......@@ -64,7 +70,7 @@ def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: s
--check_existing \
--distance 5000 \
--hgvs \
--fork 4 \
--fork %s \
--numbers \
--mane \
--pick \
......@@ -81,7 +87,8 @@ def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: s
--input_file %s \
--output_file %s \
--fasta %s \
--offline """ % (vep, vep_data, vcf_path, out_path, fasta)
--cache \
--offline """ % (vep, vep_data, vep_n_fork, vcf_path, out_path, fasta)
if vep_custom is not None:
if type(vep_custom) == list:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment