test_vep.py 2.65 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 13 2020

@author: Yoann Pradat

    CentraleSupelec
    MICS laboratory
    9 rue Juliot Curie, Gif-Sur-Yvette, 91190 France

Test functions from vep module.
"""

import os
15 16
from .._util import set_wd_to_repo
from .._vep import run_vep_annotator
17 18 19

def test_vep():
    vep_data   = "~/.vep"
20
    vep_n_fork = 4
21 22
    fasta      = "~/.vep/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"

23 24
    current_wd = set_wd_to_repo()

25 26 27 28
    #### # 1. TCGA GA
    #### # ########################################################################################################

    vcf_folder = "./examples/data/TCGA_GA/"
29
    out_folder = "./examples/results/TCGA_GA/tmp/out_vep"
30 31 32 33 34 35 36 37 38 39
    os.makedirs(out_folder, exist_ok=True)

    #### Indel TCGA_GA
    vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.indel.capture.tcga.vcf"
    out_file = vcf_file.replace(".vcf", ".txt")

    run_vep_annotator(
        vep_data   = vep_data,
        vcf_path   = os.path.join(vcf_folder, vcf_file),
        out_path   = os.path.join(out_folder, out_file),
40 41
        fasta      = fasta,
        vep_n_fork = vep_n_fork
42 43 44 45 46 47 48 49 50 51
    )

    #### SNP TCGA_GA
    vcf_file = "TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.oxoG.snp.capture.tcga.vcf"
    out_file = vcf_file.replace(".vcf", ".txt")

    run_vep_annotator(
        vep_data   = vep_data,
        vcf_path   = os.path.join(vcf_folder, vcf_file),
        out_path   = os.path.join(out_folder, out_file),
52 53
        fasta      = fasta,
        vep_n_fork = vep_n_fork
54 55 56 57 58 59
    )

    #### # 2. TCGA HS
    #### # ########################################################################################################

    vcf_folder = "./examples/data/TCGA_HS/"
60
    out_folder = "./examples/results/TCGA_HS/tmp/out_vep"
61 62 63 64 65 66 67 68 69 70
    os.makedirs(out_folder, exist_ok=True)

    #### Indel TCGA_HS
    vcf_file =  "genome.wustl.edu.TCGA-A1-A0SD.indel.0e81f9c986154ce89e59240c3f09534f.vcf"
    out_file = vcf_file.replace(".vcf", ".txt")

    run_vep_annotator(
        vep_data   = vep_data,
        vcf_path   = os.path.join(vcf_folder, vcf_file),
        out_path   = os.path.join(out_folder, out_file),
71 72
        fasta      = fasta,
        vep_n_fork = vep_n_fork
73 74 75 76 77 78 79 80 81 82
    )

    #### SNP TCGA_HS
    vcf_file =  "genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.vcf"
    out_file = vcf_file.replace(".vcf", ".txt")

    run_vep_annotator(
        vep_data   = vep_data,
        vcf_path   = os.path.join(vcf_folder, vcf_file),
        out_path   = os.path.join(out_folder, out_file),
83 84
        fasta      = fasta,
        vep_n_fork = vep_n_fork
85
    )