Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MICS_Biomathematics
bioinfo
VariantAnnotator
Commits
133cab8e
Commit
133cab8e
authored
Aug 28, 2020
by
Pradat Yoann
Browse files
version v1.0
parent
6310ccc3
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
examples/results/TCGA_GA/maf/TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.indel.capture.tcga.txt
View file @
133cab8e
This diff is collapsed.
Click to expand it.
examples/results/TCGA_GA/maf/TCGA-A1-A0SB_db9d40fb-bfce-4c3b-a6c2-41c5c88982f1_a3254f8e-3bbd-42fc-abea-a5f25b7648b3.oxoG.snp.capture.tcga.txt
View file @
133cab8e
This diff is collapsed.
Click to expand it.
examples/results/TCGA_HS/maf/genome.wustl.edu.TCGA-AR-A2LE.indel.97aa5e766ea447c79da152a341d09996.txt
0 → 100644
View file @
133cab8e
This diff is collapsed.
Click to expand it.
examples/results/TCGA_HS/tmp/out_manual/genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.txt
View file @
133cab8e
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter
_VCF
n_GT n_SS n_FA n_DP n_DP4 n_AD n_depth n_ref_count n_alt_count t_GT t_SS t_FA t_DP t_DP4 t_AD t_depth t_ref_count t_alt_count
Chromosome Position dbSNP_RS Tumor_Seq_Allele1 Tumor_Seq_Allele2 Variant_Quality Filter n_GT n_SS n_FA n_DP n_DP4 n_AD n_depth n_ref_count n_alt_count t_GT t_SS t_FA t_DP t_DP4 t_AD t_depth t_ref_count t_alt_count
1 2520302 T C 0/0 0 5 5.0 5.0 0.0 0/1 2 0.5556 9 2,2,3,2 4,5 9.0 4.0 5.0
1 8421092 C T 0/0 0 0 6 6,0,0,0 6.0 6.0 0.0 0/1 2 0.636 11 3,0,7,1 3,7 11.0 3.0 7.0
1 16386416 G A 0/0 0 0 8 8.0 8.0 0.0 0/1 2 0.6207 29 7,4,11,7 11,19 29.0 11.0 19.0
...
...
examples/results/TCGA_HS/tmp/out_vcf2maf/genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.txt
deleted
100644 → 0
View file @
6310ccc3
This diff is collapsed.
Click to expand it.
examples/results/TCGA_HS/tmp/out_vep/genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.txt
View file @
133cab8e
This diff is collapsed.
Click to expand it.
examples/results/TCGA_HS/tmp/tmp_vcf2maf/genome.wustl.edu.TCGA-A1-A0SD.snv.0e81f9c986154ce89e59240c3f09534f.vep.vcf
deleted
100644 → 0
View file @
6310ccc3
This diff is collapsed.
Click to expand it.
examples/run_example_tcga_GA.py
View file @
133cab8e
...
...
@@ -15,7 +15,7 @@ Example
python examples/run_example_tcga_GA.py
\
--i_split 1
\
--n_split 1
\
--vcf2maf ~/Documents/biotools/informatics/VCF/vcf2maf/vcf2maf.pl
\
--vcf2maf
_path
~/Documents/biotools/informatics/VCF/vcf2maf/vcf2maf.pl
\
--vep_folder ~/Documents/biotools/informatics/VCF/ensembl-vep
\
--vep_data ~/.vep
\
--vep_n_fork 4
\
...
...
@@ -30,6 +30,8 @@ if "." not in sys.path:
sys
.
path
.
append
(
"."
)
from
src.main
import
run_annotator
from
src.main
import
Vcf2mafConfig
from
src.main
import
VepConfig
#### # SCRIPT PARAMETERS
#### #####################################################################################################
...
...
@@ -37,7 +39,7 @@ from src.main import run_annotator
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--i_split'
,
type
=
int
,
default
=
1
,
help
=
'the split processed'
)
parser
.
add_argument
(
'--n_split'
,
type
=
int
,
default
=
1
,
help
=
'total number of splits'
)
parser
.
add_argument
(
'--vcf2maf
'
,
type
=
str
,
default
=
""
,
help
=
'path to the vcf2maf perl script'
)
parser
.
add_argument
(
'--vcf2maf
_path'
,
type
=
str
,
default
=
""
,
help
=
'path to the vcf2maf perl script'
)
parser
.
add_argument
(
'--vep_folder'
,
type
=
str
,
default
=
""
,
help
=
'path to the folder of the vep command'
)
parser
.
add_argument
(
'--vep_data'
,
type
=
str
,
default
=
""
,
help
=
'path to the .vep data folder'
)
parser
.
add_argument
(
'--vep_n_fork'
,
type
=
int
,
default
=
4
,
help
=
'number of forks to be used by VEP'
)
...
...
@@ -94,7 +96,28 @@ if __name__ == "__main__":
count
=
0
count_total
=
len
(
vcf_files
)
#### # 3. PROCESS
#### # 3. CONFIG
#### # ##################################################################################################
#### configure vep (for inside vcf2maf and for custom if set to use custom vep commands)
vep_config
=
VepConfig
(
folder
=
args
.
vep_folder
,
data
=
args
.
vep_data
,
n_fork
=
args
.
vep_n_fork
,
fasta
=
args
.
fasta
,
custom_run
=
False
,
# custom_opt = "~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN",
custom_overwrite
=
False
,
)
#### configure vcf2maf
vcf2maf_config
=
Vcf2mafConfig
(
path
=
args
.
vcf2maf_path
,
run
=
True
,
overwrite
=
False
)
#### # 4. ANNOTATE
#### # ##################################################################################################
#### loop over the list
...
...
@@ -135,14 +158,8 @@ if __name__ == "__main__":
tumor_id
=
tumor_id
,
infos_n_reads
=
infos_n_reads
,
infos_other
=
infos_other
,
vcf2maf
=
args
.
vcf2maf
,
vep_folder
=
args
.
vep_folder
,
vep_data
=
args
.
vep_data
,
# vep_custom = "~/.vep/custom/ClinVar/clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN",
vep_n_fork
=
args
.
vep_n_fork
,
vep_overwrite
=
True
,
vcf2maf_overwrite
=
True
,
fasta
=
args
.
fasta
,
dt_folders
=
dt_folders
,
dt_identifiers
=
dt_identifiers
dt_identifiers
=
dt_identifiers
,
vep_config
=
vep_config
,
vcf2maf_config
=
vcf2maf_config
)
src/main.py
View file @
133cab8e
...
...
@@ -229,7 +229,11 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
#### vcf2maf
ddf_maf
[
"vcf2maf"
].
columns
=
[
"%s_VCF2MAF"
%
x
for
x
in
ddf_maf
[
"vcf2maf"
].
columns
]
for
column
in
ddf_maf
[
"vcf2maf"
].
columns
:
maf_columns
.
append
(
ddf_maf
[
"vcf2maf"
][
column
])
if
column
in
dt_identifiers
.
keys
():
#### prioritize identifiers from input
pass
else
:
maf_columns
.
append
(
ddf_maf
[
"vcf2maf"
][
column
])
#### vep
ddf_maf
[
"alone"
].
columns
=
[
"%s_VEP"
%
x
for
x
in
ddf_maf
[
"alone"
].
columns
]
...
...
@@ -239,7 +243,11 @@ def run_annotator(vcf_folder: str, vcf_file: str, col_normal: str, col_tumor: st
elif
vcf2maf_config
.
run
:
#### vcf2maf
for
column
in
ddf_maf
[
"vcf2maf"
].
columns
:
maf_columns
.
append
(
ddf_maf
[
"vcf2maf"
][
column
])
if
column
in
dt_identifiers
.
keys
():
#### prioritize identifiers from input
pass
else
:
maf_columns
.
append
(
ddf_maf
[
"vcf2maf"
][
column
])
elif
vep_config
.
custom_run
:
#### vep
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment