Commit 4f734f19 authored by Pradat Yoann's avatar Pradat Yoann
Browse files

set vcf2maf and ensembl-vep as submodules

parent 1be993bf
#version 2.4
Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps vcf_id vcf_qual ExAC_AF_Adj ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos
FAM131C 348487 . GRCh37 1 16386305 16386306 + Intron INS - - C rs372070031 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 - - c.451+58dup ENST00000375662 FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; C ENSG00000185519 ENST00000375662 Transcript intron_variant rs372070031 1 -1 FAM131C HGNC 26717 protein_coding YES CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 NM_182623.2 5/6 0.2731 0.3573 0.3581 0.3757 0.5051 MODIFIER 1 insertion PASS GGC rs143272992 50 16386305
ZIC4 84107 . GRCh37 3 147121630 147121631 + Intron DEL TC TC - rs142316820 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 TC TC c.135+120_135+121del ENST00000525172 ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; - ENSG00000174963 ENST00000525172 Transcript intron_variant rs142316820 1 -1 ZIC4 HGNC 20393 protein_coding YES CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 NM_001168378.1 1/4 MODIFIER 1 deletion 1 PASS GATCT . 50 147121629
EIF4G1 1981 . GRCh37 3 184043926 184043927 + Intron DEL AC AC - rs34901174 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 AC AC c.3243+217_3243+218del ENST00000424196 EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; - ENSG00000114867 ENST00000424196 Transcript intron_variant rs34901174 1 1 EIF4G1 HGNC 3296 protein_coding YES CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 MODIFIER 1 deletion 1 PASS AAACA rs112208190 50 184043925
STEAP1B 256227 . GRCh37 7 22533452 22533453 + Frame_Shift_Del DEL CA CA - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 ENST00000404369 3/5 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; - ENSG00000105889 ENST00000404369 Transcript frameshift_variant,splice_region_variant 503-504/1515 87-88/1029 29-30/342 HE/QX caTGag/caag 1 -1 STEAP1B HGNC 41907 protein_coding YES CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 NM_001164460.1 3/5 PANTHER:PTHR14239:SF3,PANTHER:PTHR14239 HIGH 1 deletion PASS CTCAT rs116873396 50 22533451
TEX12 56158 . GRCh37 11 112042480 112042480 + Intron DEL T T - rs1225064086 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 T T c.228-9del ENST00000280358 TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; - ENSG00000150783 ENST00000280358 Transcript intron_variant rs1225064086 1 1 TEX12 HGNC 11734 protein_coding YES CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 NM_031275.4 4/4 MODIFIER 1 deletion PASS ACTT . 50 1.711e-05 8.319e-05 8.285e-05 1.133e-05 112042479
KMT2D 8085 . GRCh37 12 49431403 49431404 + Frame_Shift_Ins INS - - T novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 ENST00000301067 34/54 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; T ENSG00000167548 ENST00000301067 Transcript frameshift_variant 9735-9736/19419 9735-9736/16614 3245-3246/5537 -/X -/A 1 -1 KMT2D HGNC 7133 protein_coding YES CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 NM_003482.3 34/54 PANTHER:PTHR22884,PANTHER:PTHR22884:SF324 HIGH 1 insertion 1 PASS GGT . 50 49431403
PDS5B 23047 . GRCh37 13 33332314 33332314 + Frame_Shift_Del DEL A A - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 ENST00000315596 27/35 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; - ENSG00000083642 ENST00000315596 Transcript frameshift_variant 3332/7497 3146/4344 1049/1447 Q/X cAa/ca 1 1 PDS5B HGNC 20418 protein_coding YES CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 NM_015032.3 27/35 PANTHER:PTHR12663,PANTHER:PTHR12663:SF1 HIGH 1 deletion 2 PASS ACAA . 50 33332313
CCR7 1236 . GRCh37 17 38712161 38712161 + Intron DEL T T - rs372297045 TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 T T c.61-91del ENST00000246657 CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; - ENSG00000126353 ENST00000246657 Transcript intron_variant rs372297045 1 -1 CCR7 HGNC 1608 protein_coding YES CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F NM_001838.3 2/2 0.004 MODIFIER 1 deletion 1 PASS TCTT . 50 38712160
ATP9A 10079 . GRCh37 20 50342307 50342308 + Intron DEL TC TC - novel TCGA-A1-A0SD-01A-11D-A10Y-09 TCGA-A1-A0SD-10A-01D-A110-09 TC TC c.327+50_327+51del ENST00000338821 ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001644001,; - ENSG00000054793 ENST00000338821 Transcript intron_variant 1 -1 ATP9A HGNC 13540 protein_coding YES CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 NM_006045.1 3/27 MODIFIER 1 deletion PASS TTTCT . 50 50342306
FAM131C 348487 . GRCh37 1 16386305 16386306 + Intron INS - - C rs372070031 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 - - c.451+58dup ENST00000375662 FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; C ENSG00000185519 ENST00000375662 Transcript intron_variant rs372070031 1 -1 FAM131C HGNC 26717 protein_coding YES CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 NM_182623.2 5/6 0.2731 0.3573 0.3581 0.3757 0.5051 MODIFIER 1 insertion PASS GGC rs143272992 50 16386305
ZIC4 84107 . GRCh37 3 147121630 147121631 + Intron DEL TC TC - rs142316820 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 TC TC c.135+120_135+121del ENST00000525172 ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; - ENSG00000174963 ENST00000525172 Transcript intron_variant rs142316820 1 -1 ZIC4 HGNC 20393 protein_coding YES CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 NM_001168378.1 1/4 MODIFIER 1 deletion 1 PASS GATCT . 50 147121629
EIF4G1 1981 . GRCh37 3 184043926 184043927 + Intron DEL AC AC - rs34901174 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 AC AC c.3243+217_3243+218del ENST00000424196 EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; - ENSG00000114867 ENST00000424196 Transcript intron_variant rs34901174 1 1 EIF4G1 HGNC 3296 protein_coding YES CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 MODIFIER 1 deletion 1 PASS AAACA rs112208190 50 184043925
STEAP1B 256227 . GRCh37 7 22533452 22533453 + Frame_Shift_Del DEL CA CA - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 ENST00000404369 3/5 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; - ENSG00000105889 ENST00000404369 Transcript frameshift_variant,splice_region_variant 503-504/1515 87-88/1029 29-30/342 HE/QX caTGag/caag 1 -1 STEAP1B HGNC 41907 protein_coding YES CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 NM_001164460.1 3/5 PANTHER:PTHR14239:SF3,PANTHER:PTHR14239 HIGH 1 deletion PASS CTCAT rs116873396 50 22533451
TEX12 56158 . GRCh37 11 112042480 112042480 + Intron DEL T T - rs1225064086 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 T T c.228-9del ENST00000280358 TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; - ENSG00000150783 ENST00000280358 Transcript intron_variant rs1225064086 1 1 TEX12 HGNC 11734 protein_coding YES CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 NM_031275.4 4/4 MODIFIER 1 deletion PASS ACTT . 50 1.711e-05 8.319e-05 8.285e-05 1.133e-05 112042479
KMT2D 8085 . GRCh37 12 49431403 49431404 + Frame_Shift_Ins INS - - T novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 ENST00000301067 34/54 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; T ENSG00000167548 ENST00000301067 Transcript frameshift_variant 9735-9736/19419 9735-9736/16614 3245-3246/5537 -/X -/A 1 -1 KMT2D HGNC 7133 protein_coding YES CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 NM_003482.3 34/54 PANTHER:PTHR22884,PANTHER:PTHR22884:SF324 HIGH 1 insertion 1 PASS GGT . 50 49431403
PDS5B 23047 . GRCh37 13 33332314 33332314 + Frame_Shift_Del DEL A A - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 ENST00000315596 27/35 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; - ENSG00000083642 ENST00000315596 Transcript frameshift_variant 3332/7497 3146/4344 1049/1447 Q/X cAa/ca 1 1 PDS5B HGNC 20418 protein_coding YES CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 NM_015032.3 27/35 PANTHER:PTHR12663,PANTHER:PTHR12663:SF1 HIGH 1 deletion 2 PASS ACAA . 50 33332313
CCR7 1236 . GRCh37 17 38712161 38712161 + Intron DEL T T - rs372297045 TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 T T c.61-91del ENST00000246657 CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; - ENSG00000126353 ENST00000246657 Transcript intron_variant rs372297045 1 -1 CCR7 HGNC 1608 protein_coding YES CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F NM_001838.3 2/2 0.004 MODIFIER 1 deletion 1 PASS TCTT . 50 38712160
ATP9A 10079 . GRCh37 20 50342307 50342308 + Intron DEL TC TC - novel TCGA-A1-A0SB-01A-11D-A142-09 TCGA-A1-A0SB-10B-01D-A142-09 TC TC c.327+50_327+51del ENST00000338821 ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001644001,; - ENSG00000054793 ENST00000338821 Transcript intron_variant 1 -1 ATP9A HGNC 13540 protein_coding YES CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 NM_006045.1 3/27 MODIFIER 1 deletion PASS TTTCT . 50 50342306
## ENSEMBL VARIANT EFFECT PREDICTOR v101.0
## Output produced at 2020-08-28 13:00:15
## Output produced at 2020-09-02 10:28:24
## Using cache in /Users/ypradat/.vep/homo_sapiens/101_GRCh37
## Using API version 101, DB version ?
## ensembl-funcgen version 101.b918a49
## ensembl version 101.856c8e8
## ensembl-io version 101.943b6c2
## ensembl-variation version 101.50e7372
## ESP version 20141103
## 1000genomes version phase3
## genebuild version 2011-04
## ensembl version 101.856c8e8
## ensembl-variation version 101.851c7e0
## polyphen version 2.2.2
## ClinVar version 201912
## regbuild version 1.0
## COSMIC version 90
## gencode version GENCODE 19
## assembly version GRCh37.p13
## sift version sift5.2.2
## 1000genomes version phase3
## ESP version 20141103
## HGMD-PUBLIC version 20194
## gnomAD version r2.1
## polyphen version 2.2.2
## genebuild version 2011-04
## dbSNP version 153
## COSMIC version 90
## gencode version GENCODE 19
## regbuild version 1.0
## ClinVar version 201912
## Column descriptions:
## Uploaded_variation : Identifier of uploaded variant
## Location : Location of variant in standard coordinate format (chr:start or chr:start-end)
......
......@@ -30,7 +30,7 @@
##INFO=<ID=VLSC,Number=1,Type=Integer,Description="Final somatic score between 0 and 255 when multiple lines of evidence are available">
##FILTER=<ID=mf1,Description="Filtered out by MuTect v.1">
##FILTER=<ID=oxoG3,Description="Filtered out by OxoG Artifact Filter v3">
##VEP="v101" time="2020-08-28 13:00:12" cache="/Users/ypradat/.vep/homo_sapiens/101_GRCh37" ensembl-variation=101.50e7372 ensembl-funcgen=101.b918a49 ensembl=101.856c8e8 ensembl-io=101.943b6c2 1000genomes="phase3" COSMIC="90" ClinVar="201912" ESP="20141103" HGMD-PUBLIC="20194" assembly="GRCh37.p13" dbSNP="153" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"
##VEP="v101" time="2020-09-02 10:32:28" cache="/Users/ypradat/.vep/homo_sapiens/101_GRCh37" ensembl-variation=101.851c7e0 ensembl=101.856c8e8 ensembl-io=101.943b6c2 ensembl-funcgen=101.b918a49 1000genomes="phase3" COSMIC="90" ClinVar="201912" ESP="20141103" HGMD-PUBLIC="20194" assembly="GRCh37.p13" dbSNP="153" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|DISTANCE|STRAND|FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|TSL|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|RefSeq|GENE_PHENO|SIFT|PolyPhen|DOMAINS|HGVS_OFFSET|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL PRIMARY
1 16386305 rs143272992 G GC 50 PASS DB;DP=17;Gene=FAM131C;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=INS;TID=ENST00000375662.4;VLSC=255;CSQ=C|intron_variant|MODIFIER|FAM131C|ENSG00000185519|Transcript|ENST00000375662|protein_coding||5/6|ENST00000375662.4:c.451+58dup|||||||rs372070031|1||-1||1|insertion|HGNC|26717|YES||CCDS41270.1|ENSP00000364814|Q96AQ9||UPI000022B016|NM_182623.2|||||||0.2731|0.3573|0.3581|0.3757|0.5051||||||||||||||||||||,C|downstream_gene_variant|MODIFIER|CLCNKB|ENSG00000184908|Transcript|ENST00000375667|protein_coding||||||||||rs372070031|1|2502|1|||insertion|HGNC|2027|||CCDS57974.1|ENSP00000364819|P51801||UPI000046FF10|NM_001165945.2|1||||||0.2731|0.3573|0.3581|0.3757|0.5051||||||||||||||||||||,C|downstream_gene_variant|MODIFIER|CLCNKB|ENSG00000184908|Transcript|ENST00000375679|protein_coding||||||||||rs372070031|1|2502|1|||insertion|HGNC|2027|YES||CCDS168.1|ENSP00000364831|P51801||UPI000040E261|NM_000085.4|1||||||0.2731|0.3573|0.3581|0.3757|0.5051||||||||||||||||||||,C|downstream_gene_variant|MODIFIER|CLCNKB|ENSG00000184908|Transcript|ENST00000431772|protein_coding||||||||||rs372070031|1|2502|1|cds_start_NF||insertion|HGNC|2027||||ENSP00000389344||Q5T5Q6|UPI000046FF11||1||||||0.2731|0.3573|0.3581|0.3757|0.5051||||||||||||||||||||,C|intron_variant&non_coding_transcript_variant|MODIFIER|FAM131C|ENSG00000185519|Transcript|ENST00000494078|processed_transcript||4/5|ENST00000494078.1:n.525+58dup|||||||rs372070031|1||-1|||insertion|HGNC|26717|||||||||||||||0.2731|0.3573|0.3581|0.3757|0.5051|||||||||||||||||||| GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:11,0:11:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
......
......@@ -30,7 +30,7 @@
##INFO=<ID=VLSC,Number=1,Type=Integer,Description="Final somatic score between 0 and 255 when multiple lines of evidence are available">
##FILTER=<ID=mf1,Description="Filtered out by MuTect v.1">
##FILTER=<ID=oxoG3,Description="Filtered out by OxoG Artifact Filter v3">
##VEP="v101" time="2020-08-28 09:53:10" cache="/Users/ypradat/.vep/homo_sapiens/101_GRCh37" ensembl-funcgen=101.b918a49 ensembl-io=101.943b6c2 ensembl=101.856c8e8 ensembl-variation=101.50e7372 1000genomes="phase3" COSMIC="90" ClinVar="201912" ESP="20141103" HGMD-PUBLIC="20194" assembly="GRCh37.p13" dbSNP="153" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"
##VEP="v101" time="2020-09-02 10:32:33" cache="/Users/ypradat/.vep/homo_sapiens/101_GRCh37" ensembl-funcgen=101.b918a49 ensembl=101.856c8e8 ensembl-variation=101.851c7e0 ensembl-io=101.943b6c2 1000genomes="phase3" COSMIC="90" ClinVar="201912" ESP="20141103" HGMD-PUBLIC="20194" assembly="GRCh37.p13" dbSNP="153" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|DISTANCE|STRAND|FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|TSL|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|RefSeq|GENE_PHENO|SIFT|PolyPhen|DOMAINS|HGVS_OFFSET|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL PRIMARY
1 44476442 . C T 43 PASS DP=127;Gene=SLC6A9;MQ0=0;SOMATIC;SS=Somatic;VC=5'UTR;VT=SNP;TID=ENST00000372307.3;VLSC=255;CSQ=T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000357730|protein_coding|2/13||ENST00000357730.2:c.200G>A|ENSP00000350362.2:p.Gly67Asp|392/2168|200/1959|67/652|G/D|gGc/gAc|COSV62211565|1||-1|||SNV|HGNC|11056|||CCDS41316.1|ENSP00000350362|P48067|E9PJ65&B7Z589&B7Z3W8|UPI000053030A|NM_006934.3&NM_001261380.1|1|deleterious(0)|probably_damaging(0.999)|Transmembrane_helices:TMhelix&PROSITE_profiles:PS50267&PANTHER:PTHR11616&PANTHER:PTHR11616:SF110&Pfam:PF00209&Superfamily:0053687&Prints:PR00176||||||||||||||||||||1|1||||||,T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000360584|protein_coding|3/14||ENST00000360584.2:c.362G>A|ENSP00000353791.2:p.Gly121Asp|554/2330|362/2121|121/706|G/D|gGc/gAc|COSV62211565|1||-1||1|SNV|HGNC|11056|YES||CCDS41317.1|ENSP00000353791|P48067|B7Z589|UPI000053030B|NM_201649.3|1|deleterious(0)|probably_damaging(1)|Transmembrane_helices:TMhelix&PROSITE_profiles:PS50267&PANTHER:PTHR11616:SF110&PANTHER:PTHR11616&Pfam:PF00209&Superfamily:0053687&Prints:PR00176||||||||||||||||||||1|1||||||,T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000372306|protein_coding|3/12||ENST00000372306.3:c.143G>A|ENSP00000361380.3:p.Gly48Asp|283/1926|143/1740|48/579|G/D|gGc/gAc|COSV62211565|1||-1|||SNV|HGNC|11056||||ENSP00000361380||J3KPA5|UPI0001F7803E||1|deleterious(0)|probably_damaging(0.993)|Transmembrane_helices:TMhelix&PROSITE_profiles:PS50267&PANTHER:PTHR11616&PANTHER:PTHR11616:SF110&Pfam:PF00209&Superfamily:0053687&Prints:PR00176||||||||||||||||||||1|1||||||,T|5_prime_UTR_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000372307|protein_coding|2/12||ENST00000372307.3:c.-53G>A||276/2162|||||COSV62211565|1||-1|||SNV|HGNC|11056||||ENSP00000361381||B7Z3A9|UPI0001914B58||1|||||||||||||||||||||||1|1||||||,T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000372310|protein_coding|3/14||ENST00000372310.3:c.143G>A|ENSP00000361384.3:p.Gly48Asp|309/3130|143/1902|48/633|G/D|gGc/gAc|COSV62211565|1||-1|||SNV|HGNC|11056|||CCDS30695.1|ENSP00000361384|P48067|B7Z589|UPI0000204F05|NM_001024845.2|1|deleterious(0)|probably_damaging(0.999)|Transmembrane_helices:TMhelix&PROSITE_profiles:PS50267&PANTHER:PTHR11616:SF110&PANTHER:PTHR11616&Pfam:PF00209&Superfamily:0053687&Prints:PR00176||||||||||||||||||||1|1||||||,T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000466926|protein_coding|4/4||ENST00000466926.1:c.305G>A|ENSP00000433241.1:p.Gly102Asp|547/591|305/349|102/116|G/D|gGc/gAc|COSV62211565|1||-1|cds_end_NF||SNV|HGNC|11056||||ENSP00000433241||E9PLM5|UPI0001F78040||1|deleterious(0)|probably_damaging(1)|Superfamily:0053687&Pfam:PF00209&PANTHER:PTHR11616&PANTHER:PTHR11616:SF110&PROSITE_profiles:PS50267||||||||||||||||||||1|1||||||,T|intron_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000475075|protein_coding||2/11|ENST00000475075.2:c.-14-2147G>A|||||||COSV62211565|1||-1|||SNV|HGNC|11056||||ENSP00000434460||B7Z589|UPI0001914EDD||1|||||||||||||||||||||||1|1||||||,T|non_coding_transcript_exon_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000489764|retained_intron|3/3||ENST00000489764.1:n.352G>A||352/930|||||COSV62211565|1||-1|||SNV|HGNC|11056|||||||||1|||||||||||||||||||||||1|1||||||,T|intron_variant&non_coding_transcript_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000492434|processed_transcript||3/4|ENST00000492434.2:n.351+39G>A|||||||COSV62211565|1||-1|||SNV|HGNC|11056|||||||||1|||||||||||||||||||||||1|1||||||,T|missense_variant|MODERATE|SLC6A9|ENSG00000196517|Transcript|ENST00000528803|protein_coding|3/5||ENST00000528803.1:c.200G>A|ENSP00000435652.1:p.Gly67Asp|321/524|200/403|67/134|G/D|gGc/gAc|COSV62211565|1||-1|cds_end_NF||SNV|HGNC|11056||||ENSP00000435652||E9PJ65|UPI0001F7803F||1|deleterious(0)|probably_damaging(0.999)|PROSITE_profiles:PS50267&PANTHER:PTHR11616:SF110&PANTHER:PTHR11616&Pfam:PF00209&Superfamily:0053687&Prints:PR00176||||||||||||||||||||1|1||||||,T|downstream_gene_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000533007|processed_transcript||||||||||COSV62211565|1|3860|-1|||SNV|HGNC|11056|||||||||1|||||||||||||||||||||||1|1||||||,T|intron_variant|MODIFIER|SLC6A9|ENSG00000196517|Transcript|ENST00000537678|protein_coding||2/10|ENST00000537678.1:c.-8-674G>A|||||||COSV62211565|1||-1|||SNV|HGNC|11056||||ENSP00000442523||B7Z9G8|UPI000191543C||1|||||||||||||||||||||||1|1|||||| GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:69,0:69:0.000:0:.:2:. 0/1:42,16:58:0.276:0:31:2:.
......
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter n_GT n_SS n_FA n_DP n_DP4 n_AD n_depth n_ref_count n_alt_count t_GT t_SS t_FA t_DP t_DP4 t_AD t_depth t_ref_count t_alt_count
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter_VCF n_GT n_SS n_FA n_DP n_DP4 n_AD n_depth n_ref_count n_alt_count t_GT t_SS t_FA t_DP t_DP4 t_AD t_depth t_ref_count t_alt_count
1 2520302 T C 0/0 0 5 5.0 5.0 0.0 0/1 2 0.5556 9 2,2,3,2 4,5 9.0 4.0 5.0
1 8421092 C T 0/0 0 0 6 6,0,0,0 6.0 6.0 0.0 0/1 2 0.636 11 3,0,7,1 3,7 11.0 3.0 7.0
1 16386416 G A 0/0 0 0 8 8.0 8.0 0.0 0/1 2 0.6207 29 7,4,11,7 11,19 29.0 11.0 19.0
......
......@@ -15,8 +15,6 @@ Example
python examples/run_example_tcga_GA.py \
--i_split 1 \
--n_split 1 \
--vcf2maf_path ~/Documents/biotools/informatics/VCF/vcf2maf/vcf2maf.pl \
--vep_folder ~/Documents/biotools/informatics/VCF/ensembl-vep \
--vep_data ~/.vep \
--vep_n_fork 4 \
--fasta ~/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa
......@@ -36,14 +34,15 @@ from src.main import VepConfig
#### # SCRIPT PARAMETERS
#### #####################################################################################################
default_vep_data = os.path.expanduser("~/.vep")
default_fasta = os.path.expanduser("~/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa")
parser = argparse.ArgumentParser()
parser.add_argument('--i_split' , type=int , default=1 , help='the split processed')
parser.add_argument('--n_split' , type=int , default=1 , help='total number of splits')
parser.add_argument('--vcf2maf_path' , type=str , default="" , help='path to the vcf2maf perl script')
parser.add_argument('--vep_folder' , type=str , default="" , help='path to the folder of the vep command')
parser.add_argument('--vep_data' , type=str , default="" , help='path to the .vep data folder')
parser.add_argument('--vep_n_fork' , type=int , default=4 , help='number of forks to be used by VEP')
parser.add_argument('--fasta' , type=str , default="" , help='path to reference genome FASTA file')
parser.add_argument('--i_split' , type=int , default=1 , help='the split processed')
parser.add_argument('--n_split' , type=int , default=1 , help='total number of splits')
parser.add_argument('--vep_data' , type=str , default=default_vep_data , help='path to the .vep data folder')
parser.add_argument('--vep_n_fork' , type=int , default=4 , help='number of forks to be used by VEP')
parser.add_argument('--fasta' , type=str , default=default_fasta , help='path to reference genome FASTA file')
args = parser.parse_args()
print("Parameters", flush=True)
......@@ -101,20 +100,18 @@ if __name__ == "__main__":
#### configure vep (for inside vcf2maf and for custom if set to use custom vep commands)
vep_config = VepConfig(
folder = args.vep_folder,
data = args.vep_data,
n_fork = args.vep_n_fork,
fasta = args.fasta,
custom_run = False,
# custom_opt = "~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN",
custom_overwrite = False,
custom_overwrite = True,
)
#### configure vcf2maf
vcf2maf_config = Vcf2mafConfig(
path = args.vcf2maf_path,
run = True,
overwrite = False
overwrite = True
)
#### # 4. ANNOTATE
......
......@@ -33,12 +33,11 @@ class VepConfig:
Parameters
--------
folder: str
path to the folder where the vep command is
data: str
path to the .vep data where the reference genome is located
path to the .vep data where the reference genome is located. Default: $HOME/.vep
fasta: str
relative path to fasta file from folder
relative path to fasta file from folder. Default
"$HOME/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
n_fork: int, optional.
number of forks to be used when running VEP. Use at least 2.
custom_run: bool, optional
......@@ -49,9 +48,8 @@ class VepConfig:
custom_overwrite: bool, optional.
set to True to overwrite any existing previous custom run of VEP.
"""
folder: str
data: str
fasta: str
data: str=os.path.expanduser("~/.vep")
fasta: str=os.path.expanduser("~/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa")
n_fork: int=4
custom_run: bool=False
custom_opt: Union[str, list]=None
......@@ -64,14 +62,11 @@ class Vcf2mafConfig:
Parameters
--------
path: str
path to the vcf2maf perl script
run: bool, optional
set to False to not use vcf2maf.
overwrite: bool, optional.
set to True to overwrite any existing previous run of vcf2maf.
"""
path: str
run: bool=True
overwrite: bool=False
......@@ -128,8 +123,6 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
if vcf2maf_config.run:
vcf2maf_out_path = os.path.join(dt_folders["vcf2maf_out_folder"], out_file)
run_vcf2maf_annotator(
vcf2maf_path = vcf2maf_config.path,
vep_folder = vep_config.folder,
vep_data = vep_config.data,
vep_n_fork = vep_config.n_fork,
vcf_path = vcf_path,
......@@ -144,7 +137,6 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
if vep_config.custom_run:
vep_out_path = os.path.join(dt_folders["vep_out_folder"], out_file)
run_vep_annotator(
vep_folder = vep_config.folder,
vep_data = vep_config.data,
vep_n_fork = vep_config.n_fork,
vcf_path = vcf_path,
......
......@@ -18,7 +18,6 @@ from ..main import Vcf2mafConfig
def test_main():
vep_config = VepConfig(
folder = "~/Documents/biotools/informatics/VCF/ensembl-vep",
data = "~/.vep",
n_fork = 4,
fasta = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa",
......@@ -26,7 +25,6 @@ def test_main():
custom_overwrite = True
)
vcf2maf_config = Vcf2mafConfig(
path = "~/Documents/biotools/informatics/VCF/vcf2maf/vcf2maf.pl",
run = True,
overwrite = True
)
......
......@@ -15,8 +15,6 @@ import os
from ..vcf2maf import run_vcf2maf_annotator
def test_vcf2maf():
vcf2maf_path = "~/Documents/biotools/informatics/VCF/mskcc-vcf2maf-5453f80/vcf2maf.pl"
vep_folder = "~/Documents/biotools/informatics/VCF/ensembl-vep"
vep_data = "~/.vep"
vep_n_fork = 4
fasta = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
......@@ -36,8 +34,6 @@ def test_vcf2maf():
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
run_vcf2maf_annotator(
vcf2maf_path = vcf2maf_path,
vep_folder = vep_folder,
vep_data = vep_data,
vep_n_fork = vep_n_fork,
vcf_path = os.path.join(vcf_folder, vcf_file),
......@@ -55,8 +51,6 @@ def test_vcf2maf():
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
run_vcf2maf_annotator(
vcf2maf_path = vcf2maf_path,
vep_folder = vep_folder,
vep_data = vep_data,
vep_n_fork = vep_n_fork,
vcf_path = os.path.join(vcf_folder, vcf_file),
......@@ -82,8 +76,6 @@ def test_vcf2maf():
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
run_vcf2maf_annotator(
vcf2maf_path = vcf2maf_path,
vep_folder = vep_folder,
vep_data = vep_data,
vep_n_fork = vep_n_fork,
vcf_path = os.path.join(vcf_folder, vcf_file),
......@@ -101,8 +93,6 @@ def test_vcf2maf():
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
run_vcf2maf_annotator(
vcf2maf_path = vcf2maf_path,
vep_folder = vep_folder,
vep_data = vep_data,
vep_n_fork = vep_n_fork,
vcf_path = os.path.join(vcf_folder, vcf_file),
......
......@@ -15,7 +15,6 @@ import os
from ..vep import run_vep_annotator
def test_vep():
vep_folder = "~/Documents/biotools/informatics/VCF/ensembl-vep"
vep_data = "~/.vep"
vep_n_fork = 4
fasta = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
......@@ -32,7 +31,6 @@ def test_vep():
out_file = vcf_file.replace(".vcf", ".txt")
run_vep_annotator(
vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = os.path.join(vcf_folder, vcf_file),
out_path = os.path.join(out_folder, out_file),
......@@ -45,7 +43,6 @@ def test_vep():
out_file = vcf_file.replace(".vcf", ".txt")
run_vep_annotator(
vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = os.path.join(vcf_folder, vcf_file),
out_path = os.path.join(out_folder, out_file),
......@@ -65,7 +62,6 @@ def test_vep():
out_file = vcf_file.replace(".vcf", ".txt")
run_vep_annotator(
vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = os.path.join(vcf_folder, vcf_file),
out_path = os.path.join(out_folder, out_file),
......@@ -78,7 +74,6 @@ def test_vep():
out_file = vcf_file.replace(".vcf", ".txt")
run_vep_annotator(
vep_folder = vep_folder,
vep_data = vep_data,
vcf_path = os.path.join(vcf_folder, vcf_file),
out_path = os.path.join(out_folder, out_file),
......
......@@ -19,6 +19,30 @@ import re
DataFrame = pd.core.frame.DataFrame
#### modify if the repository was cloned under a different name
REPO_FOLDER = "BT_variant_annotator"
def set_wd_to_repo():
current_wd = os.getcwd()
if REPO_FOLDER not in os.getcwd():
raise ValueError("Please set the working directory to a location in the repository %s" % REPO_FOLDER)
else:
while not os.getcwd().endswith(REPO_FOLDER):
os.chdir("..")
return current_wd
def get_path_to_repo() -> str:
current_wd = os.getcwd()
if REPO_FOLDER not in os.getcwd():
raise ValueError("Please set the working directory to a location in the repository %s" % REPO_FOLDER)
else:
while not os.getcwd().endswith(REPO_FOLDER):
os.chdir("..")
repo_path = os.getcwd()
os.chdir(current_wd)
return repo_path
def load_vcf(filepath: str, no_header: bool=False) -> DataFrame:
"""
Load VCF file from the specified filepath into a pandas DataFrame.
......
......@@ -12,20 +12,17 @@ Python wrapper around vcf2maf perl script.
"""
import os
from .util import get_path_to_repo
def run_vcf2maf_annotator(vcf2maf_path: str, vep_folder: str, vep_data: str, vep_n_fork: int, vcf_path: str, out_path: str, tmp_folder: str, tumor_id: str, normal_id: str, fasta: str, overwrite: bool=False):
def run_vcf2maf_annotator(vep_data: str, vep_n_fork: int, vcf_path: str, out_path: str, tmp_folder: str, tumor_id: str, normal_id: str, fasta: str, overwrite: bool=False):
"""
Run vcf2maf reannotator. Details may found at https://github.com/mskcc/vcf2maf.
Parameters
----------
vcf2maf_path: str
path to the vcf2maf perl script
vep_folder: str
path to the folder where the vep command is
vep_data: str
path to the .vep data where the reference genome is located
vep_data: int
vep_n_fork: int
number of forks to be used by VEP.
vcf_path: str
path to the vcf file
......@@ -42,6 +39,10 @@ def run_vcf2maf_annotator(vcf2maf_path: str, vep_folder: str, vep_data: str, vep
overwrite: bool
if the output file already exists (from previous run), should it be overwritten?
"""
repo_path = get_path_to_repo()
vcf2maf_path = os.path.join(repo_path, "tools/vcf2maf/vcf2maf.pl")
vep_path = os.path.join(repo_path, "tools/ensembl-vep")
need_run = True
vcf_file = out_path.split("/")[-1]
tmp_file = vcf_file.replace(".txt", ".vep.vcf")
......@@ -74,7 +75,7 @@ def run_vcf2maf_annotator(vcf2maf_path: str, vep_folder: str, vep_data: str, vep
--ncbi-build GRCh37 \
--ref-fasta %s \
--filter-vcf 0' % \
(vcf2maf_path, vcf_path, out_path, tmp_folder, tumor_id, normal_id, vep_folder, vep_data, vep_n_fork, fasta)
(vcf2maf_path, vcf_path, out_path, tmp_folder, tumor_id, normal_id, vep_path, vep_data, vep_n_fork, fasta)
)
else:
print("output file %s already exists and overwrite is set to False" % out_path)
......@@ -13,16 +13,15 @@ Python wrapper around VEP command.
import os
from typing import Union
from .util import get_path_to_repo
def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: str, fasta: str, vep_custom: Union[str,list]=None, overwrite: bool=False, vep_n_fork: int=4):
def run_vep_annotator(vep_data: str, vcf_path: str, out_path: str, fasta: str, vep_custom: Union[str,list]=None, overwrite: bool=False, vep_n_fork: int=4):
"""
Run variant ensembl predictor alone with custom options. See options details at
https://www.ensembl.org/info/docs/tools/vep/script/vep_options.html#opt_af
Parameters
---------
vep_folder: str
path to the folder where the vep command is
vep_data: str
path to the .vep data where the reference genome is located
vcf_path: str
......@@ -39,7 +38,9 @@ def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: s
vep_n_fork: int, optional.
number of forks to be used when running VEP.
"""
vep = os.path.join(vep_folder, "vep")
repo_path = get_path_to_repo()
vep_path = os.path.join(repo_path, "tools/ensembl-vep/vep")
need_run = True
if os.path.exists(out_path) and not overwrite:
......@@ -88,7 +89,7 @@ def run_vep_annotator(vep_folder: str, vep_data: str, vcf_path: str, out_path: s
--output_file %s \
--fasta %s \
--cache \
--offline """ % (vep, vep_data, vep_n_fork, vcf_path, out_path, fasta)
--offline """ % (vep_path, vep_data, vep_n_fork, vcf_path, out_path, fasta)
if vep_custom is not None:
if type(vep_custom) == list:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment