Commit 77942e1a authored by Pradat Yoann's avatar Pradat Yoann

[dev] successful run example and tag v.0.99

parent e8ab514f
# Project specific
/examples/
/htmlcov/
# Logs
......
name: 3.8_bt_variant
channels:
- anaconda
- defaults
dependencies:
- appnope=0.1.0=py38_1001
- attrs=19.3.0=py_0
- backcall=0.2.0=py_0
- blas=1.0=mkl
- ca-certificates=2020.6.24=0
- certifi=2020.6.20=py38_0
- coverage=5.0=py38h1de35cc_0
- decorator=4.4.2=py_0
- iniconfig=1.0.1=py_0
- intel-openmp=2019.4=233
- ipython=7.17.0=py38h39e3cac_0
- ipython_genutils=0.2.0=py38_0
- jedi=0.17.2=py38_0
- libcxx=10.0.0=1
- libedit=3.1.20191231=h1de35cc_1
- libffi=3.3=hb1e8313_2
- mkl=2019.4=233
- mkl-service=2.3.0=py38hfbe908c_0
- mkl_fft=1.1.0=py38hc64f4ea_0
- mkl_random=1.1.1=py38h959d312_0
- more-itertools=8.4.0=py_0
- ncurses=6.2=h0a44026_1
- numpy=1.19.1=py38h3b9f5b6_0
- numpy-base=1.19.1=py38hcfb5961_0
- openssl=1.1.1g=h1de35cc_0
- packaging=20.4=py_0
- pandas=1.1.0=py38hb1e8313_0
- parso=0.7.0=py_0
- pexpect=4.8.0=py38_1
- pickleshare=0.7.5=py38_1001
- pip=20.2.2=py38_0
- pluggy=0.13.1=py38_0
- prompt-toolkit=3.0.5=py_0
- ptyprocess=0.6.0=py38_0
- py=1.9.0=py_0
- pygments=2.6.1=py_0
- pyparsing=2.4.7=py_0
- pytest=6.0.1=py38_0
- pytest-cov=2.10.0=py_0
- python=3.8.5=h26836e1_0
- python-dateutil=2.8.1=py_0
- pytz=2020.1=py_0
- readline=8.0=h1de35cc_0
- setuptools=49.4.0=py38_0
- six=1.15.0=py_0
- sqlite=3.32.3=hffcf06c_0
- tk=8.6.10=hb0a8c7a_0
- toml=0.10.1=py_0
- traitlets=4.3.3=py38_0
- wcwidth=0.2.5=py_0
- wheel=0.34.2=py38_0
- xz=5.2.5=h1de35cc_0
- zlib=1.2.11=h1de35cc_3
prefix: /usr/local/anaconda3/envs/3.8_bt_variant
##fileformat=VCFv4.1
##fileDate=20160402
##tcgaversion=1.1
##reference=<ID=hg19,source="http://www.broadinstitute.org/ftp/pub//seq/references/Homo_sapiens_assembly19.fasta">
##assembly=.
##center="broad.mit.edu"
##phasing=none
##geneAnno=http://www.gencodegenes.org/
##vcfProcessLog=<InputVCF=<.>,InputVCFSource=<.>,InputVCFVer=<.>,InputVCFParam=<.>,InputVCFgeneAnno=<https://tcga-data.nci.nih.gov/docs/GAF/GAF3.0/>>
##INDIVIDUAL=TCGA-A1-A0SB
##SAMPLE=<ID=NORMAL,SampleTCGABarcode=TCGA-A1-A0SB-10B-01D-A142-09,SampleName=TCGA-A1-A0SB-10B-01D-A142-09,Individual=TCGA-A1-A0SB,Description="Normal sample",Platform=Illumina,Source=dbGaP,Accession=.,softwareName=<muTect,CallIndelsPipeline>,softwareVer=<119,65>,softwareParam=<.>,File=TCGA-A1-A0SB-10B-01D-A142-09,SampleUUID=a3254f8e-3bbd-42fc-abea-a5f25b7648b3,MetadataResource=https://tcga-data.nci.nih.gov/uuid/uuidws/mapping/xml/uuid/a3254f8e-3bbd-42fc-abea-a5f25b7648b3>
##SAMPLE=<ID=PRIMARY,SampleTCGABarcode=TCGA-A1-A0SB-01A-11D-A142-09,SampleName=TCGA-A1-A0SB-01A-11D-A142-09,Individual=TCGA-A1-A0SB,Description="Primary Tumor",Platform=Illumina,Source=dbGaP,Accession=.,softwareName=<muTect,CallIndelsPipeline>,softwareVer=<119,65>,softwareParam=<.>,File=TCGA-A1-A0SB-01A-11D-A142-09,SampleUUID=db9d40fb-bfce-4c3b-a6c2-41c5c88982f1,MetadataResource=https://tcga-data.nci.nih.gov/uuid/uuidws/mapping/xml/uuid/db9d40fb-bfce-4c3b-a6c2-41c5c88982f1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Depth of reads supporting alleles 0/1/2/3...">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth at this position in the sample">
##FORMAT=<ID=FA,Number=.,Type=Float,Description="Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample">
##FORMAT=<ID=MQ0,Number=1,Type=Integer,Description="Number of Mapping Quality Zero Reads per sample">
##FORMAT=<ID=SS,Number=1,Type=Integer,Description="Variant status relative to non-adjacent Normal,0=wildtype,1=germline,2=somatic,3=LOH,4=post-transcriptional modification,5=unknown">
##FORMAT=<ID=BQ,Number=.,Type=Integer,Description="Average base quality for reads supporting alleles">
##FORMAT=<ID=SSC,Number=1,Type=Integer,Description="Somatic score between 0 and 255">
##INFO=<ID=Gene,Number=1,Type=String,Description="Hugo Gene Symbol">
##INFO=<ID=VT,Number=1,Type=String,Description="Variant type, can be SNP, INS or DEL">
##INFO=<ID=VC,Number=1,Type=String,Description="Somatic variant classification">
##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of sample">
##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth across samples">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=TID,Number=1,Type=String,Description="Transcript ID">
##INFO=<ID=VLSC,Number=1,Type=Integer,Description="Final somatic score between 0 and 255 when multiple lines of evidence are available">
##FILTER=<ID=mf1,Description="Filtered out by MuTect v.1">
##FILTER=<ID=oxoG3,Description="Filtered out by OxoG Artifact Filter v3">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL PRIMARY
1 16386305 rs143272992 G GC 50 PASS DB;DP=17;Gene=FAM131C;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=INS;TID=ENST00000375662.4;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:11,0:11:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
3 147121629 . ATC A 50 PASS DP=12;Gene=ZIC4;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=DEL;TID=ENST00000491672.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:6,0:6:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
3 184043925 rs112208190 AAC A 50 PASS DB;DP=13;Gene=EIF4G1;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=DEL;TID=ENST00000392537.2;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:7,0:7:0.000:0:.:2:. 0/1:2,4:6:0.667:0:.:2:.
7 22533451 rs116873396 TCA T 50 PASS DB;DP=28;Gene=STEAP1B;MQ0=0;SOMATIC;SS=Somatic;VC=Frame_Shift_Del;VT=DEL;TID=ENST00000404369.4;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:22,0:22:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
11 112042479 . CT C 50 PASS DP=37;Gene=TEX12;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=DEL;TID=ENST00000280358.4;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:31,0:31:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
12 49431403 . G GT 50 PASS DP=81;Gene=KMT2D;MQ0=0;SOMATIC;SS=Somatic;VC=Frame_Shift_Ins;VT=INS;TID=ENST00000301067.7;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:48,0:48:0.000:0:.:2:. 0/1:26,7:33:0.212:0:.:2:.
13 33332313 . CA C 50 PASS DP=53;Gene=PDS5B;MQ0=0;SOMATIC;SS=Somatic;VC=Frame_Shift_Del;VT=DEL;TID=ENST00000315596.10;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:47,0:47:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
17 38712160 . CT C 50 PASS DP=11;Gene=CCR7;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=DEL;TID=ENST00000246657.2;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:5,0:5:0.000:0:.:2:. 0/1:4,2:6:0.333:0:.:2:.
20 50342306 . TTC T 50 PASS DP=37;Gene=ATP9A;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=DEL;TID=ENST00000338821.5;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:22,0:22:0.000:0:.:2:. 0/1:9,6:15:0.400:0:.:2:.
##fileformat=VCFv4.1
##fileDate=20160402
##tcgaversion=1.1
##reference=<ID=hg19,source="http://www.broadinstitute.org/ftp/pub//seq/references/Homo_sapiens_assembly19.fasta">
##assembly=.
##center="broad.mit.edu"
##phasing=none
##geneAnno=http://www.gencodegenes.org/
##vcfProcessLog=<InputVCF=<.>,InputVCFSource=<.>,InputVCFVer=<.>,InputVCFParam=<.>,InputVCFgeneAnno=<https://tcga-data.nci.nih.gov/docs/GAF/GAF3.0/>>
##INDIVIDUAL=TCGA-A1-A0SB
##SAMPLE=<ID=NORMAL,SampleTCGABarcode=TCGA-A1-A0SB-10B-01D-A142-09,SampleName=TCGA-A1-A0SB-10B-01D-A142-09,Individual=TCGA-A1-A0SB,Description="Normal sample",Platform=Illumina,Source=dbGaP,Accession=.,softwareName=<muTect,CallIndelsPipeline>,softwareVer=<119,65>,softwareParam=<.>,File=TCGA-A1-A0SB-10B-01D-A142-09,SampleUUID=a3254f8e-3bbd-42fc-abea-a5f25b7648b3,MetadataResource=https://tcga-data.nci.nih.gov/uuid/uuidws/mapping/xml/uuid/a3254f8e-3bbd-42fc-abea-a5f25b7648b3>
##SAMPLE=<ID=PRIMARY,SampleTCGABarcode=TCGA-A1-A0SB-01A-11D-A142-09,SampleName=TCGA-A1-A0SB-01A-11D-A142-09,Individual=TCGA-A1-A0SB,Description="Primary Tumor",Platform=Illumina,Source=dbGaP,Accession=.,softwareName=<muTect,CallIndelsPipeline>,softwareVer=<119,65>,softwareParam=<.>,File=TCGA-A1-A0SB-01A-11D-A142-09,SampleUUID=db9d40fb-bfce-4c3b-a6c2-41c5c88982f1,MetadataResource=https://tcga-data.nci.nih.gov/uuid/uuidws/mapping/xml/uuid/db9d40fb-bfce-4c3b-a6c2-41c5c88982f1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Depth of reads supporting alleles 0/1/2/3...">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth at this position in the sample">
##FORMAT=<ID=FA,Number=.,Type=Float,Description="Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample">
##FORMAT=<ID=MQ0,Number=1,Type=Integer,Description="Number of Mapping Quality Zero Reads per sample">
##FORMAT=<ID=SS,Number=1,Type=Integer,Description="Variant status relative to non-adjacent Normal,0=wildtype,1=germline,2=somatic,3=LOH,4=post-transcriptional modification,5=unknown">
##FORMAT=<ID=BQ,Number=.,Type=Integer,Description="Average base quality for reads supporting alleles">
##FORMAT=<ID=SSC,Number=1,Type=Integer,Description="Somatic score between 0 and 255">
##INFO=<ID=Gene,Number=1,Type=String,Description="Hugo Gene Symbol">
##INFO=<ID=VT,Number=1,Type=String,Description="Variant type, can be SNP, INS or DEL">
##INFO=<ID=VC,Number=1,Type=String,Description="Somatic variant classification">
##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of sample">
##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth across samples">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=TID,Number=1,Type=String,Description="Transcript ID">
##INFO=<ID=VLSC,Number=1,Type=Integer,Description="Final somatic score between 0 and 255 when multiple lines of evidence are available">
##FILTER=<ID=mf1,Description="Filtered out by MuTect v.1">
##FILTER=<ID=oxoG3,Description="Filtered out by OxoG Artifact Filter v3">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL PRIMARY
1 44476442 . C T 43 PASS DP=127;Gene=SLC6A9;MQ0=0;SOMATIC;SS=Somatic;VC=5'UTR;VT=SNP;TID=ENST00000372307.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:69,0:69:0.000:0:.:2:. 0/1:42,16:58:0.276:0:31:2:.
1 244583577 . G T 6 PASS DP=113;Gene=ADSS;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000366535.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:77,0:77:0.000:0:.:2:. 0/1:33,3:36:0.083:0:32:2:.
2 25678299 . C T 24 PASS DP=50;Gene=DTNB;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000406818.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:33,0:33:0.000:0:.:2:. 0/1:9,8:17:0.471:0:32:2:.
3 85932472 . C T 56 PASS DP=87;Gene=CADM2;MQ0=0;SOMATIC;SS=Somatic;VC=Silent;VT=SNP;TID=ENST00000383699.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:50,0:50:0.000:0:.:2:. 0/1:18,19:37:0.514:0:30:2:.
6 7986778 . G A 27 PASS DP=46;Gene=BLOC1S5-TXNDC5;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=SNP;TID=ENST00000539054.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:23,0:23:0.000:0:.:2:. 0/1:13,10:23:0.435:0:29:2:.
7 75609837 . C G 13 PASS DP=19;Gene=POR;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=SNP;TID=ENST00000394893.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:8,0:8:0.000:0:.:2:. 0/1:6,5:11:0.455:0:28:2:.
7 149129243 . G A 16 PASS DP=45;Gene=ZNF777;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000247930.4;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:18,0:18:0.000:0:.:2:. 0/1:21,6:27:0.222:0:32:2:.
7 150840441 . C T 26 PASS DP=52;Gene=AGAP3;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000463381.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:28,0:28:0.000:0:.:2:. 0/1:15,9:24:0.375:0:32:2:.
10 116247760 . T C 26 PASS DP=117;Gene=ABLIM1;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000392952.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:72,0:72:0.000:0:.:2:. 0/1:34,11:45:0.244:0:28:2:.
12 43944926 . T C 44 PASS DP=82;Gene=ADAMTS20;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000389420.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:50,0:50:0.000:0:.:2:. 0/1:17,15:32:0.469:0:31:2:.
13 50464902 . T C 8 PASS DP=140;Gene=.;MQ0=0;SOMATIC;SS=Somatic;VC=IGR;VT=SNP;TID=.;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:75,0:75:0.000:0:.:2:. 0/1:60,5:65:0.077:0:26:2:.
14 65266493 . T C 20 PASS DP=41;Gene=SPTB;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000556626.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:21,0:21:0.000:0:.:2:. 0/1:12,8:20:0.400:0:27:2:.
15 91043489 . C T 9 PASS DP=24;Gene=IQGAP1;MQ0=0;SOMATIC;SS=Somatic;VC=3'UTR;VT=SNP;TID=ENST00000268182.5;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:18,0:18:0.000:0:.:2:. 0/1:3,3:6:0.500:0:32:2:.
16 88790292 . T C 20 PASS DP=59;Gene=PIEZO1;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000301015.9;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:34,0:34:0.000:0:.:2:. 0/1:18,7:25:0.280:0:33:2:.
17 40272381 . G A 99 PASS DP=96;Gene=KAT2A;MQ0=0;SOMATIC;SS=Somatic;VC=Silent;VT=SNP;TID=ENST00000225916.5;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:35,0:35:0.000:0:.:2:. 0/1:29,32:61:0.525:0:32:2:.
19 42585066 . G A 17 PASS DP=44;Gene=ZNF574;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000600245.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:19,0:19:0.000:0:.:2:. 0/1:18,7:25:0.280:0:29:2:.
20 16730581 . G A 21 PASS DP=95;Gene=OTOR;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000246081.2;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:52,0:52:0.000:0:.:2:. 0/1:35,8:43:0.186:0:33:2:.
22 23040479 . C G 41 PASS DP=48;Gene=IGLV2-23;MQ0=0;SOMATIC;SS=Somatic;VC=RNA;VT=SNP;TID=ENST00000390306.2;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:23,0:23:0.000:0:.:2:. 0/1:12,13:25:0.520:0:33:2:.
X 51076024 rs143435240 G A 6 PASS DB;DP=122;Gene=NUDT10;MQ0=0;SOMATIC;SS=Somatic;VC=Silent;VT=SNP;TID=ENST00000376006.3;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:77,1:78:0.013:0:35:2:. 0/1:41,3:44:0.068:0:32:2:.
X 77160816 . A G 7 PASS DP=74;Gene=COX7B;MQ0=0;SOMATIC;SS=Somatic;VC=3'UTR;VT=SNP;TID=ENST00000481445.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:51,0:51:0.000:0:.:2:. 0/1:20,3:23:0.130:0:32:2:.
X 77160852 . T A 7 PASS DP=52;Gene=COX7B;MQ0=0;SOMATIC;SS=Somatic;VC=3'UTR;VT=SNP;TID=ENST00000481445.1;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:35,0:35:0.000:0:.:2:. 0/1:14,3:17:0.176:0:30:2:.
X 78216689 . C T 26 PASS DP=117;Gene=P2RY10;MQ0=0;SOMATIC;SS=Somatic;VC=Silent;VT=SNP;TID=ENST00000171757.2;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:63,0:63:0.000:0:.:2:. 0/1:44,10:54:0.185:0:33:2:.
X 122757148 . A T 6 PASS DP=61;Gene=THOC2;MQ0=0;SOMATIC;SS=Somatic;VC=Intron;VT=SNP;TID=ENST00000245838.8;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:48,0:48:0.000:0:.:2:. 0/1:10,3:13:0.231:0:28:2:.
X 152684244 . T G 6 PASS DP=113;Gene=ZFP92;MQ0=0;SOMATIC;SS=Somatic;VC=Missense_Mutation;VT=SNP;TID=ENST00000338647.5;VLSC=255 GT:AD:DP:FA:MQ0:BQ:SS:SSC 0/0:62,0:62:0.000:0:.:2:. 0/1:48,3:51:0.059:0:34:2:.
tumor_sample normal_sample_barcode tumor_sample_barcode vcf_type present_GA file_id_GA file_name_GA file_size_GA
TCGA-A1-A0SB TCGA-A1-A0SB-10B-01D-A142-09 TCGA-A1-A0SB-01A-11D-A142-09 snv X 36a044b2-f8da-420f-b8a3-e61046755266 TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.oxoG.snp.capture.tcga.vcf 6.53
TCGA-A1-A0SB TCGA-A1-A0SB-10B-01D-A142-09 TCGA-A1-A0SB-01A-11D-A142-09 indel X 5164f318-f4d0-410e-bc44-083e58735c04 TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.indel.capture.tcga.vcf 0.0129
This source diff could not be displayed because it is too large. You can view the blob instead.
tumor_sample normal_sample_barcode tumor_sample_barcode vcf_type present_HiSeq file_id_HiSeq file_name_HiSeq file_size_HiSeq
TCGA-A1-A0SD TCGA-A1-A0SD-10A-01D-A110-09 TCGA-A1-A0SD-01A-11D-A10Y-09 snv X d23e3a12-207c-4b47-9d36-0977c427ba84 genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.vcf.gz 19.1
TCGA-A1-A0SD TCGA-A1-A0SD-10A-01D-A110-09 TCGA-A1-A0SD-01A-11D-A10Y-09 exome
TCGA-A1-A0SD TCGA-A1-A0SD-10A-01D-A110-09 TCGA-A1-A0SD-01A-11D-A10Y-09 indel X 6637f6bc-91a8-4dd8-a315-55d95209afd4 genome.wustl.edu.TCGA-A1-A0SD.indel.0e81f9c986154ce89e59240c3f09534f.vcf.gz 0.94
Hugo_Symbol Entrez_Gene_Id NCBI_Build Chromosome Start_Position End_Position Variant_Quality Filter Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 HGVSc HGVSp HGVSp_Short all_effects Location Gene Feature Feature_type cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Consequence IMPACT STRAND SYMBOL_SOURCE HGNC_ID BIOTYPE CCDS ENSP SWISSPROT TREMBL UNIPARC EXON INTRON AF gnomAD_AF MAX_AF MAX_AF_POPS n_GT n_SS n_FA n_DP n_AD t_GT t_SS t_FA t_DP t_AD
FAM131C 348487 GRCh37 1 16386305 16386306 50 PASS Intron INS - - C rs542191066 - - c.451+58dup FAM131C,intron_variant,,ENST00000375662,NM_182623.2;CLCNKB,downstream_gene_variant,,ENST00000375667,NM_001165945.2;CLCNKB,downstream_gene_variant,,ENST00000375679,NM_000085.4;CLCNKB,downstream_gene_variant,,ENST00000431772,;FAM131C,intron_variant,,ENST00000494078,; 1:16386305-16386306 ENSG00000185519 ENST00000375662.4 Transcript - - - - - rs542191066 intron_variant MODIFIER -1 HGNC 26717 protein_coding CCDS41270.1 ENSP00000364814 Q96AQ9 UPI000022B016 5/6 0.3678 0.5051 SAS 0/0 2 0.0 11 11,0 0/1 2 0.33299999999999996 6 4,2
ZIC4 84107 GRCh37 3 147121630 147121631 50 PASS Intron DEL TC TC - rs142316820 TC TC c.135+120_135+121del ZIC4,intron_variant,,ENST00000383075,NM_032153.5;ZIC4,intron_variant,,ENST00000425731,NM_001168379.1;ZIC4,intron_variant,,ENST00000462748,;ZIC4,intron_variant,,ENST00000463250,;ZIC4,intron_variant,,ENST00000473123,;ZIC4,intron_variant,,ENST00000484399,;ZIC1,intron_variant,,ENST00000488404,;ZIC4,intron_variant,,ENST00000491672,NM_001243256.1;ZIC4,intron_variant,,ENST00000525172,NM_001168378.1;ZIC4,upstream_gene_variant,,ENST00000484586,;ZIC1,intron_variant,,ENST00000472523,;ZIC4,downstream_gene_variant,,ENST00000464144,; 3:147121630-147121631 ENSG00000174963 ENST00000525172.2 Transcript - - - - - rs142316820 intron_variant MODIFIER -1 HGNC 20393 protein_coding CCDS54652.1 ENSP00000435509 Q8N9L1 C9JZU7,C9JD04,C9J6T3,B3KPI4 UPI0001914D88 1/4 0/0 2 0.0 6 6,0 0/1 2 0.33299999999999996 6 4,2
EIF4G1 1981 GRCh37 3 184043926 184043927 50 PASS Intron DEL AC AC - rs34901174 AC AC c.3243+217_3243+218del EIF4G1,intron_variant,,ENST00000319274,;EIF4G1,intron_variant,,ENST00000342981,NM_182917.4;EIF4G1,intron_variant,,ENST00000346169,NM_198241.2;EIF4G1,intron_variant,,ENST00000350481,NM_198242.2;EIF4G1,intron_variant,,ENST00000352767,NM_001194947.1;EIF4G1,intron_variant,,ENST00000382330,NM_001194946.1;EIF4G1,intron_variant,,ENST00000392537,NM_198244.2;EIF4G1,intron_variant,,ENST00000411531,;EIF4G1,intron_variant,,ENST00000414031,;EIF4G1,intron_variant,,ENST00000424196,;EIF4G1,intron_variant,,ENST00000427845,;EIF4G1,intron_variant,,ENST00000434061,NM_004953.4;EIF4G1,intron_variant,,ENST00000435046,;EIF4G1,intron_variant,,ENST00000441154,;EIF2B5,intron_variant,,ENST00000444495,;EIF4G1,intron_variant,,ENST00000448284,;EIF4G1,downstream_gene_variant,,ENST00000421110,;EIF4G1,downstream_gene_variant,,ENST00000426123,;EIF4G1,downstream_gene_variant,,ENST00000427607,;EIF4G1,downstream_gene_variant,,ENST00000428387,;EIF4G1,downstream_gene_variant,,ENST00000444134,;EIF4G1,downstream_gene_variant,,ENST00000444861,;EIF4G1,downstream_gene_variant,,ENST00000450424,;EIF4G1,downstream_gene_variant,,ENST00000457456,;SNORD66,downstream_gene_variant,,ENST00000390856,NR_003055.1;EIF4G1,intron_variant,,ENST00000442406,;EIF4G1,intron_variant,,ENST00000466311,;EIF4G1,downstream_gene_variant,,ENST00000413967,;EIF4G1,upstream_gene_variant,,ENST00000422614,;EIF4G1,upstream_gene_variant,,ENST00000460829,;EIF4G1,upstream_gene_variant,,ENST00000464548,;EIF4G1,upstream_gene_variant,,ENST00000475721,;EIF4G1,upstream_gene_variant,,ENST00000482303,;EIF4G1,downstream_gene_variant,,ENST00000484862,;EIF4G1,downstream_gene_variant,,ENST00000493299,; 3:184043926-184043927 ENSG00000114867 ENST00000424196.1 Transcript - - - - - rs34901174 intron_variant MODIFIER 1 HGNC 3296 protein_coding CCDS54687.1 ENSP00000416255 Q04637 Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556 UPI00015E0966 20/31 0/0 2 0.0 7 7,0 0/1 2 0.667 6 2,4
STEAP1B 256227 GRCh37 7 22533452 22533453 50 PASS Frame_Shift_Del DEL CA CA - novel CA CA c.87_88del p.His29GlnfsTer24 p.H29Qfs*24 STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000404369,NM_001164460.1;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000424363,;STEAP1B,frameshift_variant,p.His29GlnfsTer24,ENST00000439708,;STEAP1B,intron_variant,,ENST00000406890,NM_207342.2;STEAP1B,splice_region_variant,,ENST00000483679,; 7:22533452-22533453 ENSG00000105889 ENST00000404369.4 Transcript 503-504 87-88 29-30 HE/QX caTGag/caag - frameshift_variant,splice_region_variant HIGH -1 HGNC 41907 protein_coding CCDS56469.1 ENSP00000384370 C9JL51,C9JE84,B5MCI2 UPI000173A267 3/5 0/0 2 0.0 22 22,0 0/1 2 0.33299999999999996 6 4,2
TEX12 56158 GRCh37 11 112042480 112042480 50 PASS Intron DEL T T - rs1225064086 T T c.228-9del TEX12,intron_variant,,ENST00000280358,NM_031275.4;TEX12,intron_variant,,ENST00000530752,;AP002884.3,intron_variant,,ENST00000532612,;BCO2,upstream_gene_variant,,ENST00000357685,;BCO2,upstream_gene_variant,,ENST00000361053,NM_001256398.1;BCO2,upstream_gene_variant,,ENST00000393032,NM_031938.5;BCO2,upstream_gene_variant,,ENST00000438022,;BCO2,upstream_gene_variant,,ENST00000526088,NM_001037290.2,NM_001256397.1;BCO2,upstream_gene_variant,,ENST00000531169,;BCO2,upstream_gene_variant,,ENST00000532593,NM_001256400.1;RP11-356J5.4,intron_variant,,ENST00000527589,;SDHD,intron_variant,,ENST00000525468,;SDHD,intron_variant,,ENST00000525987,;SDHD,intron_variant,,ENST00000531744,;SDHD,intron_variant,,ENST00000532699,;BCO2,upstream_gene_variant,,ENST00000460924,;BCO2,upstream_gene_variant,,ENST00000461480,;BCO2,upstream_gene_variant,,ENST00000494860,;BCO2,upstream_gene_variant,,ENST00000527939,;BCO2,upstream_gene_variant,,ENST00000531003,;BCO2,upstream_gene_variant,,ENST00000534122,;BCO2,upstream_gene_variant,,ENST00000534550,; 11:112042480 ENSG00000150783 ENST00000280358.4 Transcript - - - - - rs1225064086 intron_variant MODIFIER 1 HGNC 11734 protein_coding CCDS31679.1 ENSP00000280358 Q9BXU0 UPI00001377E3 4/4 1.711e-05 8.319e-05 gnomAD_AFR 0/0 2 0.0 31 31,0 0/1 2 0.33299999999999996 6 4,2
KMT2D 8085 GRCh37 12 49431403 49431404 50 PASS Frame_Shift_Ins INS - - T novel - - c.9735dup p.Pro3246ThrfsTer5 p.P3246Tfs*5 KMT2D,frameshift_variant,p.Pro3246ThrfsTer5,ENST00000301067,NM_003482.3;KMT2D,upstream_gene_variant,,ENST00000549743,;KMT2D,downstream_gene_variant,,ENST00000549799,; 12:49431403-49431404 ENSG00000167548 ENST00000301067.7 Transcript 9735-9736 9735-9736 3245-3246 -/X -/A - frameshift_variant HIGH -1 HGNC 7133 protein_coding CCDS44873.1 ENSP00000301067 O14686 Q6PIA1,Q59FG6,F8VWW4 UPI0000EE84D6 34/54 0/0 2 0.0 48 48,0 0/1 2 0.212 33 26,7
PDS5B 23047 GRCh37 13 33332314 33332314 50 PASS Frame_Shift_Del DEL A A - novel A A c.3148del p.Thr1050GlnfsTer12 p.T1050Qfs*12 PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000315596,NM_015032.3;PDS5B,frameshift_variant,p.Thr4GlnfsTer12,ENST00000447833,;PDS5B,frameshift_variant,p.Thr1050GlnfsTer12,ENST00000450460,; 13:33332314 ENSG00000083642 ENST00000315596.10 Transcript 3332 3146 1049 Q/X cAa/ca - frameshift_variant HIGH 1 HGNC 20418 protein_coding CCDS41878.1 ENSP00000313851 Q9NTI5 UPI000006D4A9 27/35 0/0 2 0.0 47 47,0 0/1 2 0.33299999999999996 6 4,2
CCR7 1236 GRCh37 17 38712161 38712161 50 PASS Intron DEL T T - rs532551852 T T c.61-91del CCR7,intron_variant,,ENST00000246657,NM_001838.3;CCR7,intron_variant,,ENST00000578085,;CCR7,intron_variant,,ENST00000579344,; 17:38712161 ENSG00000126353 ENST00000246657.2 Transcript - - - - - rs532551852 intron_variant MODIFIER -1 HGNC 1608 protein_coding CCDS11369.1 ENSP00000246657 P32248 J3KTN5,J3KSS9,A0N0Q0 UPI0000001C2F 2/2 0.0008 0.004 EAS 0/0 2 0.0 5 5,0 0/1 2 0.33299999999999996 6 4,2
ATP9A 10079 GRCh37 20 50342307 50342308 50 PASS Intron DEL TC TC - novel TC TC c.327+50_327+51del ATP9A,intron_variant,,ENST00000311637,;ATP9A,intron_variant,,ENST00000338821,NM_006045.1;ATP9A,intron_variant,,ENST00000402822,;ATP9A,downstream_gene_variant,,ENST00000477492,;,regulatory_region_variant,,ENSR00001225305,; 20:50342307-50342308 ENSG00000054793 ENST00000338821.5 Transcript - - - - - - intron_variant MODIFIER -1 HGNC 13540 protein_coding CCDS33489.1 ENSP00000342481 O75110 Q2NLD0,B4DR18 UPI000004D334 3/27 0/0 2 0.0 22 22,0 0/1 2 0.4 15 9,6
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter Hugo_Symbol Variant_Classification Variant_Type Transcript_ID n_GT n_SS n_FA n_DP n_AD t_GT t_SS t_FA t_DP t_AD
1 16386305 rs143272992 G GC 50 PASS FAM131C Intron INS ENST00000375662.4 0/0 2 0.000 11 11,0 0/1 2 0.333 6 4,2
3 147121629 ATC A 50 PASS ZIC4 Intron DEL ENST00000491672.1 0/0 2 0.000 6 6,0 0/1 2 0.333 6 4,2
3 184043925 rs112208190 AAC A 50 PASS EIF4G1 Intron DEL ENST00000392537.2 0/0 2 0.000 7 7,0 0/1 2 0.667 6 2,4
7 22533451 rs116873396 TCA T 50 PASS STEAP1B Frame_Shift_Del DEL ENST00000404369.4 0/0 2 0.000 22 22,0 0/1 2 0.333 6 4,2
11 112042479 CT C 50 PASS TEX12 Intron DEL ENST00000280358.4 0/0 2 0.000 31 31,0 0/1 2 0.333 6 4,2
12 49431403 G GT 50 PASS KMT2D Frame_Shift_Ins INS ENST00000301067.7 0/0 2 0.000 48 48,0 0/1 2 0.212 33 26,7
13 33332313 CA C 50 PASS PDS5B Frame_Shift_Del DEL ENST00000315596.10 0/0 2 0.000 47 47,0 0/1 2 0.333 6 4,2
17 38712160 CT C 50 PASS CCR7 Intron DEL ENST00000246657.2 0/0 2 0.000 5 5,0 0/1 2 0.333 6 4,2
20 50342306 TTC T 50 PASS ATP9A Intron DEL ENST00000338821.5 0/0 2 0.000 22 22,0 0/1 2 0.400 15 9,6
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter Hugo_Symbol Variant_Classification Variant_Type Transcript_ID n_GT n_SS n_FA n_DP n_AD t_GT t_SS t_FA t_DP t_AD
1 44476442 C T 43 PASS SLC6A9 5'UTR SNP ENST00000372307.3 0/0 2 0.000 69 69,0 0/1 2 0.276 58 42,16
1 244583577 G T 6 PASS ADSS Missense_Mutation SNP ENST00000366535.3 0/0 2 0.000 77 77,0 0/1 2 0.083 36 33,3
2 25678299 C T 24 PASS DTNB Missense_Mutation SNP ENST00000406818.3 0/0 2 0.000 33 33,0 0/1 2 0.471 17 9,8
3 85932472 C T 56 PASS CADM2 Silent SNP ENST00000383699.3 0/0 2 0.000 50 50,0 0/1 2 0.514 37 18,19
6 7986778 G A 27 PASS BLOC1S5-TXNDC5 Intron SNP ENST00000539054.1 0/0 2 0.000 23 23,0 0/1 2 0.435 23 13,10
7 75609837 C G 13 PASS POR Intron SNP ENST00000394893.1 0/0 2 0.000 8 8,0 0/1 2 0.455 11 6,5
7 149129243 G A 16 PASS ZNF777 Missense_Mutation SNP ENST00000247930.4 0/0 2 0.000 18 18,0 0/1 2 0.222 27 21,6
7 150840441 C T 26 PASS AGAP3 Missense_Mutation SNP ENST00000463381.1 0/0 2 0.000 28 28,0 0/1 2 0.375 24 15,9
10 116247760 T C 26 PASS ABLIM1 Missense_Mutation SNP ENST00000392952.3 0/0 2 0.000 72 72,0 0/1 2 0.244 45 34,11
12 43944926 T C 44 PASS ADAMTS20 Missense_Mutation SNP ENST00000389420.3 0/0 2 0.000 50 50,0 0/1 2 0.469 32 17,15
13 50464902 T C 8 PASS IGR SNP 0/0 2 0.000 75 75,0 0/1 2 0.077 65 60,5
14 65266493 T C 20 PASS SPTB Missense_Mutation SNP ENST00000556626.1 0/0 2 0.000 21 21,0 0/1 2 0.400 20 12,8
15 91043489 C T 9 PASS IQGAP1 3'UTR SNP ENST00000268182.5 0/0 2 0.000 18 18,0 0/1 2 0.500 6 3,3
16 88790292 T C 20 PASS PIEZO1 Missense_Mutation SNP ENST00000301015.9 0/0 2 0.000 34 34,0 0/1 2 0.280 25 18,7
17 40272381 G A 99 PASS KAT2A Silent SNP ENST00000225916.5 0/0 2 0.000 35 35,0 0/1 2 0.525 61 29,32
19 42585066 G A 17 PASS ZNF574 Missense_Mutation SNP ENST00000600245.1 0/0 2 0.000 19 19,0 0/1 2 0.280 25 18,7
20 16730581 G A 21 PASS OTOR Missense_Mutation SNP ENST00000246081.2 0/0 2 0.000 52 52,0 0/1 2 0.186 43 35,8
22 23040479 C G 41 PASS IGLV2-23 RNA SNP ENST00000390306.2 0/0 2 0.000 23 23,0 0/1 2 0.520 25 12,13
X 51076024 rs143435240 G A 6 PASS NUDT10 Silent SNP ENST00000376006.3 0/0 2 0.013 78 77,1 0/1 2 0.068 44 41,3
X 77160816 A G 7 PASS COX7B 3'UTR SNP ENST00000481445.1 0/0 2 0.000 51 51,0 0/1 2 0.130 23 20,3
X 77160852 T A 7 PASS COX7B 3'UTR SNP ENST00000481445.1 0/0 2 0.000 35 35,0 0/1 2 0.176 17 14,3
X 78216689 C T 26 PASS P2RY10 Silent SNP ENST00000171757.2 0/0 2 0.000 63 63,0 0/1 2 0.185 54 44,10
X 122757148 A T 6 PASS THOC2 Intron SNP ENST00000245838.8 0/0 2 0.000 48 48,0 0/1 2 0.231 13 10,3
X 152684244 T G 6 PASS ZFP92 Missense_Mutation SNP ENST00000338647.5 0/0 2 0.000 62 62,0 0/1 2 0.059 51 48,3
## ENSEMBL VARIANT EFFECT PREDICTOR v99.2
## Output produced at 2020-08-13 17:08:23
## Using cache in /Users/ypradat/.vep/homo_sapiens/99_GRCh37
## Using API version 99, DB version ?
## ensembl-io version 99.441b05b
## ensembl-funcgen version 99.0832337
## ensembl version 99.d3e7d31
## ensembl-variation version 99.a7f8736
## assembly version GRCh37.p13
## dbSNP version 151
## sift version sift5.2.2
## gnomAD version r2.1
## genebuild version 2011-04
## ClinVar version 201810
## regbuild version 1.0
## gencode version GENCODE 19
## polyphen version 2.2.2
## 1000genomes version phase3
## HGMD-PUBLIC version 20174
## ESP version 20141103
## COSMIC version 86
## Column descriptions:
## Uploaded_variation : Identifier of uploaded variant
## Location : Location of variant in standard coordinate format (chr:start or chr:start-end)
## Allele : The variant allele used to calculate the consequence
## Gene : Stable ID of affected gene
## Feature : Stable ID of feature
## Feature_type : Type of feature - Transcript, RegulatoryFeature or MotifFeature
## Consequence : Consequence type
## cDNA_position : Relative position of base pair in cDNA sequence
## CDS_position : Relative position of base pair in coding sequence
## Protein_position : Relative position of amino acid in protein
## Amino_acids : Reference and variant amino acids
## Codons : Reference and variant codon sequence
## Existing_variation : Identifier(s) of co-located known variants
## Extra column keys:
## IMPACT : Subjective impact classification of consequence type
## DISTANCE : Shortest distance from variant to transcript
## STRAND : Strand of the feature (1/-1)
## FLAGS : Transcript quality flags
## SYMBOL : Gene symbol (e.g. HGNC)
## SYMBOL_SOURCE : Source of gene symbol
## HGNC_ID : Stable identifer of HGNC gene symbol
## BIOTYPE : Biotype of transcript or regulatory feature
## CANONICAL : Indicates if transcript is canonical for this gene
## MANE : MANE (Matched Annotation by NCBI and EMBL-EBI) Transcript
## TSL : Transcript support level
## APPRIS : Annotates alternatively spliced transcripts as primary or alternate based on a range of computational methods
## CCDS : Indicates if transcript is a CCDS transcript
## ENSP : Protein identifer
## SWISSPROT : UniProtKB/Swiss-Prot accession
## TREMBL : UniProtKB/TrEMBL accession
## UNIPARC : UniParc accession
## SIFT : SIFT prediction and/or score
## PolyPhen : PolyPhen prediction and/or score
## EXON : Exon number(s) / total
## INTRON : Intron number(s) / total
## HGVSc : HGVS coding sequence name
## HGVSp : HGVS protein sequence name
## HGVS_OFFSET : Indicates by how many bases the HGVS notations for this variant have been shifted
## AF : Frequency of existing variant in 1000 Genomes combined population
## AFR_AF : Frequency of existing variant in 1000 Genomes combined African population
## AMR_AF : Frequency of existing variant in 1000 Genomes combined American population
## EAS_AF : Frequency of existing variant in 1000 Genomes combined East Asian population
## EUR_AF : Frequency of existing variant in 1000 Genomes combined European population
## SAS_AF : Frequency of existing variant in 1000 Genomes combined South Asian population
## AA_AF : Frequency of existing variant in NHLBI-ESP African American population
## EA_AF : Frequency of existing variant in NHLBI-ESP European American population
## gnomAD_AF : Frequency of existing variant in gnomAD exomes combined population
## gnomAD_AFR_AF : Frequency of existing variant in gnomAD exomes African/American population
## gnomAD_AMR_AF : Frequency of existing variant in gnomAD exomes American population
## gnomAD_ASJ_AF : Frequency of existing variant in gnomAD exomes Ashkenazi Jewish population
## gnomAD_EAS_AF : Frequency of existing variant in gnomAD exomes East Asian population
## gnomAD_FIN_AF : Frequency of existing variant in gnomAD exomes Finnish population
## gnomAD_NFE_AF : Frequency of existing variant in gnomAD exomes Non-Finnish European population
## gnomAD_OTH_AF : Frequency of existing variant in gnomAD exomes other combined populations
## gnomAD_SAS_AF : Frequency of existing variant in gnomAD exomes South Asian population
## MAX_AF : Maximum observed allele frequency in 1000 Genomes, ESP and ExAC/gnomAD
## MAX_AF_POPS : Populations in which maximum allele frequency was observed
## CLIN_SIG : ClinVar clinical significance of the dbSNP variant
## SOMATIC : Somatic status of existing variant
## PHENO : Indicates if existing variant(s) is associated with a phenotype, disease or trait; multiple values correspond to multiple variants
## PUBMED : Pubmed ID(s) of publications that cite existing variant
## MOTIF_NAME : The source and identifier of a transcription factor binding profile (TFBP) aligned at this position
## MOTIF_POS : The relative position of the variation in the aligned TFBP
## HIGH_INF_POS : A flag indicating if the variant falls in a high information position of the TFBP
## MOTIF_SCORE_CHANGE : The difference in motif score of the reference and variant sequences for the TFBP
#Uploaded_variation Location Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Extra
rs143272992 1:16386305-16386306 C ENSG00000185519 ENST00000375662.4 Transcript intron_variant - - - - - rs542191066 IMPACT=MODIFIER;STRAND=-1;SYMBOL=FAM131C;SYMBOL_SOURCE=HGNC;HGNC_ID=26717;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS41270.1;ENSP=ENSP00000364814;SWISSPROT=Q96AQ9;UNIPARC=UPI000022B016;INTRON=5/6;HGVSc=ENST00000375662.4:c.451+58dup;AF=0.3678;AFR_AF=0.2731;AMR_AF=0.3573;EAS_AF=0.3581;EUR_AF=0.3757;SAS_AF=0.5051;MAX_AF=0.5051;MAX_AF_POPS=SAS
3_147121630_TC/- 3:147121630-147121631 - ENSG00000174963 ENST00000525172.2 Transcript intron_variant - - - - - rs142316820 IMPACT=MODIFIER;STRAND=-1;SYMBOL=ZIC4;SYMBOL_SOURCE=HGNC;HGNC_ID=20393;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS54652.1;ENSP=ENSP00000435509;SWISSPROT=Q8N9L1;TREMBL=C9JZU7,C9JD04,C9J6T3,B3KPI4;UNIPARC=UPI0001914D88;INTRON=1/4;HGVSc=ENST00000525172.2:c.135+120_135+121del
rs112208190 3:184043926-184043927 - ENSG00000114867 ENST00000424196.1 Transcript intron_variant - - - - - rs34901174 IMPACT=MODIFIER;STRAND=1;SYMBOL=EIF4G1;SYMBOL_SOURCE=HGNC;HGNC_ID=3296;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS54687.1;ENSP=ENSP00000416255;SWISSPROT=Q04637;TREMBL=Q96I65,C9JWW9,C9JWH7,C9JSU8,C9J987,C9J6B6,C9J556;UNIPARC=UPI00015E0966;INTRON=20/31;HGVSc=ENST00000424196.1:c.3243+217_3243+218del;HGVS_OFFSET=30
rs116873396 7:22533452-22533453 - ENSG00000105889 ENST00000404369.4 Transcript frameshift_variant,splice_region_variant 503-504 87-88 29-30 HE/QX caTGag/caag - IMPACT=HIGH;STRAND=-1;SYMBOL=STEAP1B;SYMBOL_SOURCE=HGNC;HGNC_ID=41907;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS56469.1;ENSP=ENSP00000384370;TREMBL=C9JL51,C9JE84,B5MCI2;UNIPARC=UPI000173A267;EXON=3/5;HGVSc=ENST00000404369.4:c.87_88del;HGVSp=ENSP00000384370.4:p.His29GlnfsTer24
11_112042480_T/- 11:112042480 - ENSG00000150783 ENST00000280358.4 Transcript intron_variant - - - - - rs1225064086 IMPACT=MODIFIER;STRAND=1;SYMBOL=TEX12;SYMBOL_SOURCE=HGNC;HGNC_ID=11734;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS31679.1;ENSP=ENSP00000280358;SWISSPROT=Q9BXU0;UNIPARC=UPI00001377E3;INTRON=4/4;HGVSc=ENST00000280358.4:c.228-9del;HGVS_OFFSET=6;gnomAD_AF=1.711e-05;gnomAD_AFR_AF=8.319e-05;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=8.285e-05;gnomAD_FIN_AF=0;gnomAD_NFE_AF=1.133e-05;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=8.319e-05;MAX_AF_POPS=gnomAD_AFR
12_49431404_-/T 12:49431403-49431404 T ENSG00000167548 ENST00000301067.7 Transcript frameshift_variant 9735-9736 9735-9736 3245-3246 -/X -/A - IMPACT=HIGH;STRAND=-1;SYMBOL=KMT2D;SYMBOL_SOURCE=HGNC;HGNC_ID=7133;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS44873.1;ENSP=ENSP00000301067;SWISSPROT=O14686;TREMBL=Q6PIA1,Q59FG6,F8VWW4;UNIPARC=UPI0000EE84D6;EXON=34/54;HGVSc=ENST00000301067.7:c.9735dup;HGVSp=ENSP00000301067.7:p.Pro3246ThrfsTer5
13_33332314_A/- 13:33332314 - ENSG00000083642 ENST00000315596.10 Transcript frameshift_variant 3332 3146 1049 Q/X cAa/ca - IMPACT=HIGH;STRAND=1;SYMBOL=PDS5B;SYMBOL_SOURCE=HGNC;HGNC_ID=20418;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS41878.1;ENSP=ENSP00000313851;SWISSPROT=Q9NTI5;UNIPARC=UPI000006D4A9;EXON=27/35;HGVSc=ENST00000315596.10:c.3148del;HGVSp=ENSP00000313851.10:p.Thr1050GlnfsTer12;HGVS_OFFSET=2
17_38712161_T/- 17:38712161 - ENSG00000126353 ENST00000246657.2 Transcript intron_variant - - - - - rs532551852 IMPACT=MODIFIER;STRAND=-1;SYMBOL=CCR7;SYMBOL_SOURCE=HGNC;HGNC_ID=1608;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS11369.1;ENSP=ENSP00000246657;SWISSPROT=P32248;TREMBL=J3KTN5,J3KSS9,A0N0Q0;UNIPARC=UPI0000001C2F;INTRON=2/2;HGVSc=ENST00000246657.2:c.61-91del;AF=0.0008;AFR_AF=0;AMR_AF=0;EAS_AF=0.004;EUR_AF=0;SAS_AF=0;MAX_AF=0.004;MAX_AF_POPS=EAS
20_50342307_TC/- 20:50342307-50342308 - ENSG00000054793 ENST00000338821.5 Transcript intron_variant - - - - - - IMPACT=MODIFIER;STRAND=-1;SYMBOL=ATP9A;SYMBOL_SOURCE=HGNC;HGNC_ID=13540;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS33489.1;ENSP=ENSP00000342481;SWISSPROT=O75110;TREMBL=Q2NLD0,B4DR18;UNIPARC=UPI000004D334;INTRON=3/27;HGVSc=ENST00000338821.5:c.327+50_327+51del
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter n_GT n_SS n_TIR n_TAR n_DP n_DP4 n_AD n_depth n_ref_count n_alt_count t_GT t_SS t_TIR t_TAR t_DP t_DP4 t_AD t_depth t_ref_count t_alt_count
1 24486218 T TTTG 0/0 24 24.0 24.0 0.0 0/1 2 22 0,22,0,6 22.0 22.0 6.0
1 27107272 CGT C 0/0 0,0 21,21 26 0 26.0 21.0 0.0 0/1 2 3,3 14,14 18 4,14,2,1 4 18.0 14.0 3.0
1 117122285 G GTCC 0/0 8 8.0 8.0 0.0 0/1 2 16 4,12,1,6 16.0 16.0 7.0
1 171607569 CAG C 0/0 0,0 12,12 15 12 15.0 12.0 0.0 0/1 2 4,4 7,7 11 10,1,3,1 7 11.0 7.0 4.0
1 175116046 C CT,CTT 0/0 38 0 38.0 38.0 0.0 0/2 2 56 57,2,11,1 7 56.0 59.0 12.0
1 201981005 C CA 0/0 0 0.0 0/1 2 3
1 223284687 G GA 0/0 0,0 91,91 93 91 93.0 91.0 0.0 0/1 2 37,37 35,36 78 32,46,18,19 35 78.0 35.0 37.0
1 226553491 AAAC A,AAACA 0/0 12 12,0,0,0 12.0 12.0 0.0 0/1 2 9 4,0,5,0 9.0 4.0 5.0
2 11273407 GTC G,GTCTC 0/0 9 0 9.0 9.0 0.0 0/2 2 23 24,2,7,1 4 23.0 26.0 8.0
2 42996759 AAGAG A 0/0 5 5.0 5.0 0.0 0/1 2 11 11,0,7,0 11.0 11.0 7.0
2 55181022 TA TAA,T 0/0 20 0 20.0 20.0 0.0 0/2 2 20 25,0,4,0 3 20.0 25.0 4.0
2 153471255 T TCAAAA,TCAAAACAAAACAAAACAAAA 0/0 8 0 8.0 8.0 0.0 0/2 2 8 8,0,3,0 4 8.0 8.0 3.0
2 176988290 C CGCA 0/0 0 0.0 0/1 2 12
3 38355176 T TGCGCGCGCGCGC,TGCGCGCGTGTGTGTGTGTGTGTGCGCGC 0/0 20 1 20.0 20.0 0.0 0/1 2 18 18,2,5,0 8 18.0 20.0 5.0
3 148711906 G GT 0/0 26 0 26.0 26.0 0.0 0/1 2 22 21,2,1,1 4 22.0 23.0 2.0
3 164776647 T TACACAC,TACAC,TACACACAC 0/0 44 0 44.0 44.0 0.0 0/2 2 19 25,0,9,0 3 19.0 25.0 9.0
3 195792288 CGGGG C 0/0 7 0 7.0 7.0 0.0 0/1 2 11 11,0,5,0 5 11.0 11.0 5.0
3 195956726 AAG A 0/0 50 0 50.0 50.0 0.0 0/1 2 75 70,5,9,1 8 75.0 75.0 10.0
4 100472247 T TA 0/0 10 0,10,0,0 10.0 10.0 0.0 0/1 2 16 0,11,0,5 16.0 11.0 5.0
4 108608100 CTCTAACACT C 0/0 8 34 8.0 8.0 0.0 1/1 2 48 48,0,39,0 42 48.0 48.0 39.0
4 121739411 GCACA G 0/0 0 0.0 0/1 2 4
5 1093609 G GGGGCGGGGACT 0/0 0 0.0 0/1 2 18
5 174940299 TAAAAAAAAAAAAA T 0/0 0 0.0 0/1 2 3
6 32487441 A AC 0/0 4 4.0 4.0 0.0 1/1 2 20 2,18,2,18 20.0 20.0 20.0
6 32548732 C CT 0/0 31 31.0 31.0 0.0 0/1 2 106 1,87,0,18 106.0 88.0 18.0
6 32557600 T TC 0/0 42 42.0 42.0 0.0 0/1 2 68 1,57,0,10 68.0 58.0 10.0
6 64289938 ATT AT,ATTT,A 0/0 36 0 36.0 36.0 0.0 0/3 2 22 29,2,4,1 3 22.0 31.0 5.0
6 75950109 TA TAA,T,TAAA 0/0 45 0 45.0 45.0 0.0 0/2 2 27 6,32,1,6 5 27.0 38.0 7.0
6 90577711 TCTTTGCCCAGACATGGA T 0/0 36 36.0 36.0 0.0 0/1 2 71 18,37,3,13 71.0 55.0 16.0
6 105300084 G GTT 0/0 6 9 6.0 6.0 0.0 1/1 2 30 26,3,20,3 8 30.0 29.0 23.0
6 117631463 T TTAA 0/0 21 1 21.0 21.0 0.0 0/1 2 25 5,20,2,5 5 25.0 25.0 7.0
6 152765726 GA GAA,G 0/0 0 0.0 0/1 2 5
6 158484691 CAAAAAAAAAAA C 0/0 0 0.0 0/1 2 3
6 163899794 CT CTT,C 0/0 36 0 36.0 36.0 0.0 0/2 2 22 24,2,3,0 3 22.0 26.0 3.0
7 122269207 CT C 0/0 0 0.0 0/1 2 4
7 135099044 TA T,TAA 0/0 0 0.0 0/1 2 9
7 144532829 A AG 0/0 0 0.0 0/1 2 3
7 150937074 CT C 0/0 0 0.0 0/1 2 3
8 55542730 CTTTGAAATGCTTGGTCAA C 0/0 0,0 73,73 58 73 58.0 73.0 0.0 0/1 2 7,7 27,27 30 21,9,3,4 27 30.0 27.0 7.0
8 124382376 TA T 0/0 0 0.0 0/1 2 3
9 37305829 GAT G 0/0 7 0 7.0 7.0 0.0 0/1 2 12 0,12,0,2 3 12.0 12.0 2.0
9 95039956 TA T 0/0 6 0 6.0 6.0 0.0 0/1 2 6 6,0,2,0 3 6.0 6.0 2.0
9 130950344 ACCC A 0/0 7 0,7,0,0 7.0 7.0 0.0 0/1 2 7 0,4,0,3 7.0 4.0 3.0
9 136249406 GATAATG GATG,GATATATG,GATA 0/0 21 21.0 21.0 0.0 0/1 2 14 12,0,8,0 14.0 12.0 8.0
9 140161631 TGTGGGGCTGAG T,TGTGGAG 0/0 15 0 15.0 15.0 0.0 0/2 2 20 7,11,1,4 4 20.0 18.0 5.0
10 8115955 A ACC 0/0 0,0 58,58 69 58 69.0 58.0 0.0 0/1 2 30,30 65,66 107 24,72,8,20 65 107.0 65.0 30.0
10 36811687 G GGT 0/0 77 0 77.0 77.0 0.0 0/1 2 95 97,3,24,0 11 95.0 100.0 24.0
10 75672607 C CA,CAA 0/0 16 0 16.0 16.0 0.0 0/2 2 13 14,2,9,2 4 13.0 16.0 11.0
10 78839127 A AGT,AGTGT 0/0 0 0.0 0/2 2 7
11 120348771 G GT 0/0 0 0.0 0/1 2 3
12 58187016 CT C 0/0 24 0 24.0 24.0 0.0 0/1 2 19 1,19,0,3 3 19.0 20.0 3.0
12 75893478 CA C 0/0 0 0.0 0/1 2 3
12 100930180 CT C 0/0 21 0 21.0 21.0 0.0 0/1 2 16 16,0,3,0 3 16.0 16.0 3.0
12 110463769 C CT 0/0 0 0.0 0/1 2 3
13 95227137 AAAAG A,AA 0/0 18 3 18.0 18.0 0.0 0/2 2 44 3,31,2,8 0 44.0 34.0 10.0
14 35515606 TGG T 0/0 4 0 4.0 4.0 0.0 0/1 2 7 6,0,6,0 4 7.0 6.0 6.0
14 72941206 G GA 0/0 9 9.0 9.0 0.0 0/1 2 105 67,1,39,1 105.0 68.0 40.0
14 92563254 AT ATT,A 0/0 24 0 24.0 24.0 0.0 0/2 2 32 0,35,0,6 3 32.0 35.0 6.0
15 28473275 T TA 0/0 0 0.0 0/1 2 3
15 81187286 TA T,TAA 0/0 0 0.0 0/2 2 4
15 88690736 CCTTCTTCTTCTTCTTCTTCTT C 0/0 0,0 12,12 18 0 18.0 12.0 0.0 0/1 2 5,5 5,6 27 0,28,0,4 4 27.0 5.0 5.0
16 460578 CT C 0/0 0 0.0 0/1 2 3
16 628994 TGGGC T 0/0 17 0 17.0 17.0 0.0 0/1 2 41 0,41,0,19 7 41.0 41.0 19.0
16 4700318 CA C 0/0 15 0 15.0 15.0 0.0 0/1 2 17 18,2,7,0 3 17.0 20.0 7.0
16 58200464 ACT A 0/0 142 7 142.0 142.0 0.0 0/1 2 126 96,30,9,4 13 126.0 126.0 13.0
16 84230067 GCAACCCCTTCGCT G 0/0 0 0.0 0/1 2 4
16 84230082 AACCCCTTC A 0/0 10 10.0 10.0 0.0 0/1 2 12 8,0,4,0 12.0 8.0 4.0
16 84230091 GCTCAA G 0/0 12 12.0 12.0 0.0 0/1 2 11 7,0,4,0 11.0 7.0 4.0
16 89167075 C CCCCAGGAGGCTCCCGGGAG 0/0 0 0.0 0/1 2 3
17 1264611 TA T 0/0 26 0 26.0 26.0 0.0 0/1 2 32 4,28,1,9 5 32.0 32.0 10.0
17 2297571 GCA G 0/0 0 0.0 0/1 2 7
17 3352494 C CA 0/0 26 0 26.0 26.0 0.0 0/1 2 20 0,20,0,11 3 20.0 20.0 11.0
17 4802255 GGCCTCTGCCTCGCTCCACCC G 0/0 5 0 5.0 5.0 0.0 0/1 2 13 4,9,1,3 6 13.0 13.0 4.0
17 55075670 CA CAA,C 0/0 31 0 31.0 31.0 0.0 0/2 2 36 41,0,7,0 8 36.0 41.0 7.0
17 58525216 GA G 0/0 0 0.0 0/1 2 3
17 67101527 TC T 0/0 0 0.0 0/1 2 3
17 76456454 GAGTGTA G,GAGTGTGCA 0/0 34 34.0 34.0 0.0 0/2 2 38 0,37,0,7 38.0 37.0 7.0
18 43666280 TAGTTAATATATTAATACCTTAAGA T,TAGTTAATATATTAATACCTTAAGAT 0/0 25 36 25.0 25.0 0.0 0/2 2 15 4,6,3,2 35 15.0 10.0 5.0
19 2901114 CGCCGAAGTCT C 0/0 4 0 4.0 4.0 0.0 0/1 2 18 2,16,1,5 5 18.0 18.0 6.0
19 4199809 C CA 0/0 0,0 3,3 3 0 3.0 3.0 0.0 0/1 2 7,7 1,1 8 5,2,5,2 5 8.0 1.0 7.0
19 16513357 CT C 0/0 0 0.0 0/1 2 4
19 41062902 AC A 0/0 0,0 13,13 20 0 20.0 13.0 0.0 0/1 2 13,13 14,14 46 9 46.0 14.0 13.0
20 17928175 CCTG C 0/0 0,0 249,249 255 249 255.0 249.0 0.0 0/1 2 69,70 239,239 330 240,90,49,17 239 330.0 239.0 69.0
20 30060720 GCA G 0/0 0,0 37,37 40 37 40.0 37.0 0.0 0/1 2 9,9 40,41 54 49,5,7,1 40 54.0 40.0 9.0
20 30354257 GGT GGTGT,G 0/0 48 1 48.0 48.0 0.0 0/1 2 41 48,0,9,0 3 41.0 48.0 9.0
20 46279833 GCAA G 0/0 70 70.0 70.0 0.0 0/1 2 108 82,25,8,3 108.0 107.0 11.0
21 30338153 C CA 0/0 0 0.0 0/1 2 3
21 34726106 A AT 0/0 0,0 191,191 229 191 229.0 191.0 0.0 0/1 2 57,57 148,148 253 47,204,14,36 148 253.0 148.0 57.0
21 41384834 C CTT 0/0 4 0 4.0 4.0 0.0 0/1 2 6 6,0,2,0 3 6.0 6.0 2.0
21 45712357 AC A 0/0 3 0 3.0 3.0 0.0 0/1 2 8 0,7,0,4 4 8.0 7.0 4.0
22 31301792 A AGCCACC 0/0 0 0.0 0/1 2 4
22 41918653 GT G 0/0 0,0 2,2 2 0 2.0 2.0 0.0 0/1 2 4,4 0,0 4 4 4.0 0.0 4.0
X 23724675 CAAA C,CA 0/0 28 0 28.0 28.0 0.0 0/1 2 20 26,0,3,0 3 20.0 26.0 3.0
X 54972154 C CGT 0/0 13 13.0 13.0 0.0 0/1 2 5 0,6,0,2 5.0 6.0 2.0
X 55027964 AT A 0/0 0 0.0 0/1 2 5
X 70361651 CA CAA,C 0/0 39 0 39.0 39.0 0.0 0/2 2 35 38,7,20,2 5 35.0 45.0 22.0
X 106461963 CA C 0/0 23 0 23.0 23.0 0.0 0/1 2 13 14,0,3,0 3 13.0 14.0 3.0
X 129203198 C CA 0/0 11 0 11.0 11.0 0.0 0/1 2 9 12,0,5,0 3 9.0 12.0 5.0
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 14 2020
@author: Yoann Pradat
CentraleSupelec
MICS laboratory
9 rue Juliot Curie, Gif-Sur-Yvette, 91190 France
Example of how to annotate a list of VCF from one project/study.
Example
-----------
python main/data/vcf/run/vcf2maf_tcga_GA.py \
--i_split 1 \
--n_split 1 \
--vcf2maf ~/Documents/biotools/informatics/VCF/mskcc-vcf2maf-5453f80/vcf2maf.pl \
--vep_folder ~/Documents/biotools/informatics/VCF/ensembl-vep \
--vep_data ~/.vep \
--fasta ~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa
"""
import argparse
import os
import pandas as pd
import sys
if "." not in sys.path:
sys.path.append(".")
from src.main import run_annotator
#### # SCRIPT PARAMETERS
#### #####################################################################################################
parser = argparse.ArgumentParser()
parser.add_argument('--i_split' , type=int , default=1 , help='the split processed')
parser.add_argument('--n_split' , type=int , default=1 , help='total number of splits')
parser.add_argument('--vcf2maf' , type=str , default="" , help='path to the vcf2maf perl script')
parser.add_argument('--vep_folder' , type=str , default="" , help='path to the folder of the vep command')
parser.add_argument('--vep_data' , type=str , default="" , help='path to the .vep data folder')
parser.add_argument('--fasta' , type=str , default="" , help='path to reference genome FASTA file')
args = parser.parse_args()
print("Parameters", flush=True)
for arg in vars(args):
print("%s: %s" % (arg, getattr(args, arg)), flush=True)
#### # SCRIPT FUNCTION
#### #####################################################################################################
if __name__ == "__main__":
vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/"
vcf_meta_path = os.path.join(vcf_folder, "vcf_meta.txt")
#### paths to results folders
dt_folders = {
'manual_out_folder' : os.path.join(out_folder, "tmp/out_manual"),
'vcf2maf_tmp_folder' : os.path.join(out_folder, "tmp/tmp_vcf2maf"),
'vcf2maf_out_folder' : os.path.join(out_folder, "tmp/out_vcf2maf"),
'vep_out_folder' : os.path.join(out_folder, "tmp/out_vep"),
'maf_folder' : os.path.join(out_folder, "maf"),
}
#### # 1. LOAD
#### # ##################################################################################################
for k, v in dt_folders.items():
if "folder" in k:
os.makedirs(v, exist_ok=True)
#### load meta data
df_meta = pd.read_csv(
filepath_or_buffer = vcf_meta_path,
sep = "\t"
)
vcf_files = [x for x in os.listdir(vcf_folder) if x.endswith(".vcf")]
#### # 2. SPLIT
#### # ##################################################################################################
count_one_split = len(vcf_files)//args.n_split
if args.i_split == args.n_split:
vcf_files = vcf_files[(args.i_split-1)*count_one_split:]
else:
vcf_files = vcf_files[(args.i_split-1)*count_one_split:args.i_split*count_one_split]
count = 0
count_total = len(vcf_files)
#### # 3. PROCESS
#### # ##################################################################################################
#### loop over the list
for vcf_file in vcf_files:
count += 1
print("="*80, flush=True)
print("vcf %d/%d" % (count, count_total), flush=True)
print("processing %s\n" % vcf_file, flush=True)
#### get vcf identifiers
mask_vcf_file = df_meta["file_name_GA"] == vcf_file
index_vcf_file = mask_vcf_file[mask_vcf_file].index[0]
vcf_identifiers = {
"Tumor_Sample" : df_meta.loc[index_vcf_file, "tumor_sample"],
"Tumor_Sample_Barcode" : df_meta.loc[index_vcf_file, "tumor_sample_barcode"],
"Matched_Norm_Sample_Barcode" : df_meta.loc[index_vcf_file, "normal_sample_barcode"],
"Tumor_Sample_Site" : df_meta.loc[index_vcf_file, "tumor_sample_barcode"].split("-")[3][:2],
}
#### get parameter values
col_normal = "NORMAL"
if vcf_identifiers["Tumor_Sample_Site"] == "01":
col_tumor = "PRIMARY"
else:
col_tumor = "METASTATIC"
normal_id = vcf_identifiers["Matched_Norm_Sample_Barcode"],