Commit 63901538 authored by Pradat Yoann's avatar Pradat Yoann

[dev] succesfully run annotator one

parent 6e9a67fd
[run]
omit = */__init__.py
# Project specific
/data/
/results/
/examples/
/htmlcov/
# Logs
log/
......
......@@ -12,7 +12,7 @@ see [VEP's options page](https://www.ensembl.org/info/docs/tools/vep/script/vep_
### 1.1. Manual parsing
Relies on tags specified by the user to extract relevant info like genotype (GT), somatic status (SS), quality and filter info (QUAL, INFO) and most importantly reads information (AD, DP, FA, DP4, TAR, TIR). The parser has been tested on TCGA VCF files as produced by
Relies on tags specified by the user to extract relevant info like genotype (GT), somatic status (SS), quality and filter info (QUAL, INFO) and most importantly reads information (AD, DP, FA, DP4, TAR, TIR). The parser has been tested on VCF files as produced by
- Mutect v.1 (TCGA GA SNV) and Strelka (TCGA GA Indel)
- sets of callers VarScanSomatic-Strelka-Sniper-Samtools (TCGA HS SNP), GatkSomaticIndel-Pindel-Strelka-VarScanSomatic (TCGA HS Indel).
- Mutect v.1.1.7 with no header
......
This diff is collapsed.
......@@ -291,8 +291,7 @@ def process_assemble(df_vcf: DataFrame, df_vcf_info: DataFrame, df_vcf_reads: Da
#### # MAIN FUNCTION
#### #####################################################################################################
def run_manual_annotator(vcf_path: str, out_path:str, col_normal: str, col_tumor: str, infos_n_reads: list,
infos_other: list):
def run_manual_annotator(vcf_path: str, out_path:str, col_normal: str, col_tumor: str, infos_n_reads: list, infos_other: list):
"""
Manually parse VCF file and save at the path specified.
......
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 13 2020
@author: Yoann Pradat
CentraleSupelec
MICS laboratory
9 rue Juliot Curie, Gif-Sur-Yvette, 91190 France
Test functions from vep module.
"""
import os
from ..main import run_annotator_one
def test_main():
vcf2maf = "~/Documents/biotools/informatics/VCF/mskcc-vcf2maf-5453f80/vcf2maf.pl"
vep_folder = "~/Documents/biotools/informatics/VCF/ensembl-vep"
vep_data = "~/.vep"
fasta = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"
#### # 1. TCGA GA
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/"
#### paths to results folders
dt_folders = {
'manual_out_folder' : os.path.join(out_folder, "tmp/out_manual"),
'vcf2maf_tmp_folder' : os.path.join(out_folder, "tmp/tmp_vcf2maf"),
'vcf2maf_out_folder' : os.path.join(out_folder, "tmp/out_vcf2maf"),
'vep_out_folder' : os.path.join(out_folder, "tmp/out_vep"),
'maf_folder' : os.path.join(out_folder, "maf"),
}
#### make folders if they do not exist already
for k, v in dt_folders.items():
os.makedirs(v, exist_ok=True)
#### Indel TCGA_GA
vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.indel.capture.tcga.vcf"
col_normal = "NORMAL"
col_tumor = "PRIMARY"
normal_id = "TCGA-A1-A0SD-10A-01D-A110-09"
tumor_id = "TCGA-A1-A0SD-01A-11D-A10Y-09"
infos_n_reads = ["AD", "DP4", "DP", "TAR", "TIR"]
infos_other = ["SS", "GT"]
run_annotator_one(
vcf_folder = vcf_folder,
vcf_file = vcf_file,
col_normal = col_normal,
col_tumor = col_tumor,
normal_id = normal_id,
tumor_id = tumor_id,
infos_n_reads = infos_n_reads,
infos_other = infos_other,
vcf2maf = vcf2maf,
vep_folder = vep_folder,
vep_data = vep_data,
fasta = fasta,
dt_folders = dt_folders
)
#### SNP TCGA_GA
vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.oxoG.snp.capture.tcga.vcf"
#### # 2. TCGA HS
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/"
#### Indel TCGA_HS
vcf_file = "genome.wustl.edu.TCGA-A1-A0SD.indel.0e81f9c986154ce89e59240c3f09534f.vcf"
#### SNP TCGA_HS
vcf_file = "genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.vcf"
......@@ -20,7 +20,7 @@ def test_manual():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/out_manual"
out_folder = "./examples/results/TCGA_GA/tmp/out_manual"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_GA
......@@ -61,7 +61,7 @@ def test_manual():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/"
out_folder = "./examples/results/TCGA_HS/out_manual"
out_folder = "./examples/results/TCGA_HS/tmp/out_manual"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_HS
......
......@@ -24,8 +24,8 @@ def test_vcf2maf():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/"
tmp_folder = "./examples/results/TCGA_GA/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_GA/out_vcf2maf"
tmp_folder = "./examples/results/TCGA_GA/tmp/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_GA/tmp/out_vcf2maf"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_GA
......@@ -68,8 +68,8 @@ def test_vcf2maf():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/"
tmp_folder = "./examples/results/TCGA_HS/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_HS/out_vcf2maf"
tmp_folder = "./examples/results/TCGA_HS/tmp/tmp_vcf2maf"
out_folder = "./examples/results/TCGA_HS/tmp/out_vcf2maf"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_HS
......
......@@ -23,7 +23,7 @@ def test_vep():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_GA/"
out_folder = "./examples/results/TCGA_GA/out_vep"
out_folder = "./examples/results/TCGA_GA/tmp/out_vep"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_GA
......@@ -54,7 +54,7 @@ def test_vep():
#### # ########################################################################################################
vcf_folder = "./examples/data/TCGA_HS/"
out_folder = "./examples/results/TCGA_HS/out_vep"
out_folder = "./examples/results/TCGA_HS/tmp/out_vep"
os.makedirs(out_folder, exist_ok=True)
#### Indel TCGA_HS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment