_vep.py 3.1 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
"""
3
4
@created: Aug 13 2020
@modified: Oct 30 2020
5
6
7
8
9
10
11
12
@author: Yoann Pradat
    CentraleSupelec
    MICS laboratory
    9 rue Juliot Curie, Gif-Sur-Yvette, 91190 France
Python wrapper around VEP command.
"""

import os
13
14
from typing import Union
from ._util import get_path_to_repo
15

16
def run_vep_annotator(vep_data: str, vcf_path: str, out_path: str, fasta: str, vep_custom: Union[str,list]=None, overwrite: bool=False, vep_n_fork: int=4):
17
18
19
20
21
22
23
24
25
26
27
28
29
    """
    Run variant ensembl predictor alone with custom options. See options details at
    https://www.ensembl.org/info/docs/tools/vep/script/vep_options.html#opt_af
    Parameters
    ---------
    vep_data: str
        path to the .vep data where the reference genome is located
    vcf_path: str
        path to the vcf file
    out_path: str
        path where output should be saved
    fasta: str
        relative path to fasta file from vep_folder
Pradat Yoann's avatar
Pradat Yoann committed
30
31
32
    vep_custom: str or list
        additional options to add to the vep cmd. For instance
        '~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN'
33
    overwrite: bool, optional.
34
        if the output file already exists (from previous run), should it be overwritten?
35
36
    vep_n_fork: int, optional.
        number of forks to be used when running VEP.
37
    """
38
39
40

    repo_path = get_path_to_repo()
    vep_path  = os.path.join(repo_path, "tools/ensembl-vep/vep")
41
42
43
44
45
46
    need_run = True

    if os.path.exists(out_path) and not overwrite:
        need_run = False

    if need_run:
47
48
49
50
51
52
        print("STATUS: RUNNING VEP")

        if os.path.exists(out_path):
            os.remove(out_path)
            print("removed existing file: %s" % out_path)

Pradat Yoann's avatar
Pradat Yoann committed
53
        cmd = """%s \
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
            --dir %s \
            --af \
            --af_gnomad \
            --af_esp \
            --clin_sig_allele 0 \
            --max_af \
            --af_1k \
            --no_progress \
            --no_stats \
            --appris \
            --biotype \
            --buffer_size 500 \
            --canonical \
            --ccds \
            --check_existing \
            --distance 5000 \
            --hgvs \
71
            --fork %s \
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
            --numbers \
            --mane \
            --pick \
            --polyphen b \
            --protein \
            --pubmed \
            --regulatory \
            --sift b \
            --species homo_sapiens \
            --symbol \
            --transcript_version \
            --tsl \
            --uniprot \
            --input_file %s \
            --output_file %s \
            --fasta %s \
88
            --cache \
89
            --offline """ % (vep_path, vep_data, vep_n_fork, vcf_path, out_path, fasta)
Pradat Yoann's avatar
Pradat Yoann committed
90
91
92
93
94
95
96
97
98
99
100

        if vep_custom is not None:
            if type(vep_custom) == list:
                for v_custom in vep_custom:
                    cmd += "--custom %s " % v_custom
            elif type(vep_custom) == str:
                cmd += "--custom %s " % vep_custom
            else:
                raise ValueError("vep_custom should be of type list or str")

        os.system(cmd)
101
102
    else:
        print("output file %s already exists and overwrite is set to False" % out_path)