B:BD[
4.7047] → [
4.7047:8221]
B:BD[
4.8221] → [
2.34002:47097]
∅:D[
2.47097] → [
4.16413:18528]
B:BD[
4.16413] → [
4.16413:18528]
y", line 159, in <listcomp>
y_ref = np.mean([ann.models[m].predict(x_ref) for m in range(5)], axis=0)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2382, in predict
tmp_batch_outputs = self.predict_function(iterator)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2169, in predict_function
return step_function(self, iterator)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2155, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2143, in run_step
outputs = model.predict_step(
data)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2111, in predict_step
return self(x, training=False)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 558, in __call__
return super().__call__(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/functional.py", line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/layers/convolutional/base_conv.py", line 290, in call
outputs = self.convolution_op(inputs, self.kernel)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/layers/convolutional/base_conv.py", line 262, in convolution_op
return tf.nn.convolution(
Node: 'model_1/conv1d_3/Conv1D'
DNN library is not found.
[[{{node model_1/conv1d_3/Conv1D}}]] [Op:__inference_predict_function_22195]
#+end_quote
***** DONE GPU: chr20 ok
CLOSED: [2023-09-26 Tue 11:50]
LD_PRELOAD=/lib64/libcuda.so spliceai -I NA12878-sanger-20-2-T2T.vep.vcf.gz -O output-20-2-gpu.vcf -R /Work/Groups/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa -A ~/t2t.txt
temps d'exécution : 5min
***** STRT GPU: toutes les données :GPU:spliceai:
****** DONE Run : 70GB, 3h30
CLOSED: [2023-09-27 Wed 10:37] SCHEDULED: <2023-09-26 Tue>
32G insufissant ! Il faut 70GB :
Job ID: 17340
Cluster: mesoubfc
User/Group: apraga/mesousers
State: COMPLETED (exit code 0)
Cores: 1
CPU Utilized: 03:11:53
CPU Efficiency: 93.55% of 03:25:07 core-walltime
Job Wall-clock time: 03:25:07
Memory Utilized: 67.75 GB
Memory Efficiency: 52.93% of 128.00 GB
#+begin_src slurm
#!/bin/bash -l
# Fichier submission.SBATCH
#SBATCH --job-name="spliceai-gpu"
#SBATCH --output=%x.%J.out ## %x=nom_du_job, %J=id du job
#SBATCH --error=%x.%J.out
# walltime (hh:mm::ss) max is 8 days
#SBATCH -t 24:00:00
#SBATCH --partition=gpu
#SBATCH --gres=gpu:1
## To request more memory, use --mem option.
## Please don't use more than 128g.
#SBATCH --mem=64G
## votre dresse mail pour les notifs
#SBATCH --mail-user=apraga@chu-besancon.fr
#SBATCH --mail-type=END,FAIL
nvidia-smi
module purge
module load nix/2.11.0
LD_PRELOAD=/lib64/libcuda.so spliceai -I NA12878-sanger-all-T2T.vep.vcf.gz -O output-all-gpu.vcf -R /Work/Groups/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa -A ~/t2t.txt
#+end_src
****** TODO Annoter la sortie de VEP avec ce VCF
Générer un fichier d'annotation
#+begin_src
bcftools annotate -x INFO/CSQ output-all-gpu.vcf -o spliceai.vcf.gz
bcftools index spliceai.vcf.gz
#+end_src
Annoter avec vep
#+begin_src sh
ln -s /Work/Projects/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa .
ln -s /Work/Projects/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa.fai .
ln -s /Work/Projects/bisonex/data/vep/chm13v2.0/106 .
ln -s /Work/Projects/bisonex/data/clinvar/chm13v2.0/clinvar.vcf.gz .
ln -s /Work/Projects/bisonex/data/clinvar/chm13v2.0/clinvar.vcf.gz.tbi .
#+end_src
#+begin_src sh
#vep -i output-all-gpu.vcf -o output-all-gpu-filtered.vcf --appris --biotype --canonical --ccds --compress_output bgzip --domains --exclude_predicted --flag_pick --hgvs --hgvsg --gene_phenotype --numbers --mane --protein --offline --uniprot --symbol --tsl --use_given_ref --variant_class --vcf --plugin NMD --custom clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN --plugin SpliceAI,snv=spliceai.vcf.gz,indel=spliceai.vcf.gz --fasta chm13v2.0.fa --assembly T2T-CHM13v2.0 --species homo_sapiens_gca009914755v4/ --cache --cache_version 106 --dir_cache 106
vep -i lol.vcf --force -o test.vcf.gz --appris --biotype --canonical --ccds --compress_output bgzip --domains --exclude_predicted --flag_pick --hgvs --hgvsg --gene_phenotype --numbers --mane --protein --offline --uniprot --symbol --tsl --use_given_ref --variant_class --vcf --plugin NMD --custom clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN --custom spliceai.vcf.gz,SpliceAI,vcf,exact,0,DS_AG%DS_AL --fasta chm13v2.0.fa --assembly T2T-CHM13v2.0 --species homo_sapiens_gca009914755v4/ --cache --cache_version 106 --dir_cache ${PWD}/106
#+end_src
****** KILL Save
CLOSED: [2023-09-27 Wed 21:40]
# ****** DONE Filtre vep avec spliceAI: 37365 -> 6130. SpliceAI n'apporte rien
# CLOSED: [2023-09-27 Wed 19:37] SCHEDULED: <2023-09-27 Wed>
# :PROPERTIES:
# :ID: c9b2009a-503b-4561-94c6-29ae21a3188d
# :END:
# #+begin_src sh
# filter_vep -i output-all-gpu.vcf --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA) or (SpliceAI_pred_DS_AG and SpliceAI_pred_DS_AG >= 0.2) or (SpliceAI_pred_DS_AL and SpliceAI_pred_DS_AL >= 0.2) or (SpliceAI_pred_DS_DG and SpliceAI_pred_DS_DG >= 0.2) or (SpliceAI_pred_DS_DL and SpliceAI_pred_DS_DL >= 0.2) " --only_matched -o output-all-gpu-filtered.vcf
# #+end_src
# filter_vep -i output-all-gpu.vcf --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA)" --only_matched | grep -c -v '^#'
# 6130
# $ grep -c -v '^#' output-all-gpu-filtered.vcf
# 6130
# ****** DONE Re-vérifier filtre avec spip: 7730 -> probable problème avec spip
# CLOSED: [2023-09-27 Wed 20:54] SCHEDULED: <2023-09-27 Wed>
# filter_vep -i NA12878-sanger-all-T2T.vep.vcf.gz --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA) or (SPIP_spipScore and SPIP_spipScore >= 20)" --only_matched | grep -c -v '^#'
# perl: warning: Setting locale failed.
# perl: warning: Please check that your locale settings:
# LANGUAGE = (unset),
# LC_ALL = (unset),
# LANG = "en_US.utf8"
# are supported and installed on your system.
# perl: warning: Falling back to the standard locale ("C").
# 7730
****** TODO vérifier si tests sanger passent
SCHEDULED: <2023-09-27 Wed>
***** TODO Avec pip: echec
2023-09-24 08:28:46.361434: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU.
***** DONE Tester conda: echec
CLOSED: [2023-09-23 Sat 21:43] SCHEDULED: <2023-09-23 Sat>
Ananconda: N'arrive pas à installer
#+begin_quote
- feature:/linux-64::__glibc==2.28=0
- python=3.11 -> libgcc-ng[version='>=11.2.0'] -> __glibc[version='>=2.17']
- spliceai -> tensorflow[version='>=1.13.0'] -> __cuda
- spliceai -> tensorflow[version='>=1.13.0'] -> __glibc[version='>=2.17']
Your installed version is: 2.28
#+end_quote
Il faut utiliser mamba
**** TODO Ajout LOEUF et pli
plugin VEP
**** TODO NMD
plugin VEP
**** KILL Ajout LOEUF
CLOSED: [2023-04-19 mer. 16:32]
plugin VEP
**** DONE Spip
CLOSED: [2023-05-01 Mon 23:07] SCHEDULED: <2023-04-30 Sun>
BED ne semble pas bien marcher (il faut définir une zone)
VCF : trop d’information
Attention, plusieurs transcripts mais résultats identiques. On supprimer les doublons
***** DONE interpretation + score + intervalle de confiance séparé
CLOSED: [2023-05-01 Mon 23:07] SCHEDULED: <2023-04-30 Sun>
Tests :
dans tests/
vep -i 63004925-small.vcf -o postvep.vcf --vcf --fasta genomeRef.fna --dir 109 --merged --pick --offline --custom ../script/spip_annotation.vcf.gz,SPIP,vcf,exact,0,spipInterp,spipScore,spipConfidence
***** DONE Score
CLOSED: [2023-04-22 Sat 15:30]
**** DONE CADD: remplacer par plugin VEP
CLOSED: [2023-05-07 Sun 14:45] SCHEDULED: <2023-05-07 Sun>
***** Test
#+begin_src
vep -i test.vcf -o lol.vcf --offline --dir /Work/Projects/bisonex/data/vep/GRCh38/ --merged --vcf --fasta /Work/Projects/bisonex/data/genome/GRCh38.p13/genomeRef.fna --plugin CADD,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.snv.tsv.gz,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.indel.tsv.gz --dir_plugins ../VEP_plugins/ -v
#+end_src
Test
#+begin_src sh
vep --id "1 230710048 230710048 A/G 1" --offline --dir /Work/Projects/bisonex/data/vep/GRCh38/ --merged --vcf --fasta /Work/Projects/bisonex/data/genome/GRCh38.p13/genomeRef.fna --plugin CADD,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.snv.tsv.gz,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.indel.tsv.gz --hgvsg --plugin pLI --plugin LOEUF -o lol
#+end_src
CSQ=G|missense_variant|MODERATE|AGT|ENSG00000135744|Transcript|ENST00000366667|protein_coding|2/5||||843|776|259|M/T|aTg/aCg|||-1||HGNC|HGNC:333||Ensembl||A|A||1:g.230710048A>G|0.347|-0.277922|
Correspond bien à https://www.ensembl.org/Homo_sapiens/Tools/VEP/Results?tl=I7ZsIbrj14P6lD43-9115494
***** DONE Utiliser whole genome
CLOSED: [2023-04-29 Sat 15:46]
***** KILL Renommer les chromosome avant ...
CLOSED: [2023-05-01 Mon 09:14] SCHEDULED: <2023-04-30 Sun>
Trop long !
- Téléchargement de CADD: 4h20
- renommer les chromosome pour SNV : 6h20
- tabix sur les SNV : job tué au bout de 21h....
***** DONE annoter séparément et fusionner les tableaux
CLOSED: [2023-05-07 Sun 14:45] SCHEDULED: <2023-05-01 Mon>
NB: on pourrait filtrer CADD avec tabix pour se restreindre à nos variants
**** DONE clinvar
CLOSED: [2023-04-22 Sat 15:31]
**** KILL Vérifier résultats HGVS avec mutalyzer
CLOSED: [2023-05-01 Mon 09:26]
**** HOLD Parallélisation
***** HOLD par chromosome avec workflow VEP
https://github.com/Ensembl/ensembl-vep/blob/release/109/nextflow/workflows/run_vep.nf
***** HOLD Avec option --fork
**** DONE Utiliser la version de nf-core de VEP
CLOSED: [2023-05-13 Sat 18:27] SCHEDULED: <2023-05-07 Sun>
**** DONE OMIM
CLOSED: [2023-08-31 Thu 10:38] SCHEDULED: <2023-08-29 Tue>
**** DONE plI et LOEUF depuis gnomad
CLOSED: [2023-08-31 Thu 10:38] SCHEDULED: <2023-08-29 Tue>
**** DONE Grantham
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-30 Wed>
**** DONE Corriger spliceAI
CLOSED: [2023-08-31 Thu 13:51] SCHEDULED: <2023-08-31 Thu>
Pas d'annotation
- chromosome ? essai 1 au lieu de chr1 : idem. Et fonctionne pour CADD
- index ?
-
retélécharger
- indexer nous-meme
**** DONE Supprimer score spip en double
CLOSED: [2023-08-31 Thu 14:17] SCHEDULED: <2023-08-31 Thu>
**** DONE Vérifier variant 63126867
CLOSED: [2023-08-31 Thu 10:52] SCHEDULED: <2023-08-31 Thu>
**** DONE Ajouter tronquant ou non
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** DONE Ajouter récessif
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** KILL Corriger allelic depth
CLOSED: [2023-08-31 Thu 11:18] SCHEDULED: <2023-08-31 Thu>
Problème lié à libre office
**** DONE Regénérer annotation pour na12878, inserted et patient PEX1
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** TODO ACMG incidental
**** DONE Sortie VCF (pour avoir la fraction allélique AF)
CLOSED: [2023-08-28 Mon 17:22]
**** DONE VCF -> tsv avec bcftools
CLOSED: [2023-08-29 Tue 11:03] SCHEDULED: <2023-08-28 Mon>
**** DONE Un seul transcrit après VEP avec filter_vep :filter:
CLOSED: [2023-08-29 Tue 11:03] SCHEDULED: <2023-08-28 Mon>
Avec mise à jour VEP 110, pick_flag semble fonctionner.
***** DONE Test chr20: Pas de variant "perdus"
CLOSED: [2023-08-28 Mon 17:31] SCHEDULED: <2023-08-28 Mon>
contrairement au résultat communiqué à alexis par mail
#+begin_src sh :dir out/annotate
bcftools +counts vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz
#+end_src
Number of samples: 1
Number of SNPs: 123
Number of INDELs: 32
Number of MNPs: 53
Number of others: 0
Number of sites: 208
#+begin_src sh
filter_vep -i vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz --filter 'PICK' | bcftools +counts
#+end_src
Number of samples: 1
Number of SNPs: 123
Number of INDELs: 32
Number of MNPs: 53
Number of others: 0
Number of sites: 208
2nd vérification
#+begin_src sh :dir out/annotate
filter_vep -i vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz --filter 'PICK' --soft_filter | grep fail
#+end_src
***** DONE Test NA12878 + variants sanger : variants perdus avec --pick ?
CLOSED: [2023-08-29 Tue 1
y", line 159, in <listcomp>
y_ref = np.mean([ann.models[m].predict(x_ref) for m in range(5)], axis=0)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2382, in predict
tmp_batch_outputs = self.predict_function(iterator)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2169, in predict_function
return step_function(self, iterator)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2155, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2143, in run_step
outputs = model.predict_step(data)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 2111, in predict_step
return self(x, training=False)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/training.py", line 558, in __call__
return super().__call__(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/functional.py", line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/layers/convolutional/base_conv.py", line 290, in call
outputs = self.convolution_op(inputs, self.kernel)
File "/Softs/helios/gpu/anaconda3/2023.03-1/envs/tensorflow-gpu-2.12.0+py3.9/lib/python3.9/site-packages/keras/layers/convolutional/base_conv.py", line 262, in convolution_op
return tf.nn.convolution(
Node: 'model_1/conv1d_3/Conv1D'
DNN library is not found.
[[{{node model_1/conv1d_3/Conv1D}}]] [Op:__inference_predict_function_22195]
#+end_quote
***** DONE GPU: chr20 ok
CLOSED: [2023-09-26 Tue 11:50]
LD_PRELOAD=/lib64/libcuda.so spliceai -I NA12878-sanger-20-2-T2T.vep.vcf.gz -O output-20-2-gpu.vcf -R /Work/Groups/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa -A ~/t2t.txt
temps d'exécution : 5min
***** DONE GPU: toutes les données :GPU:spliceai:
CLOSED: [2023-09-28 Thu 01:34]
****** DONE Run : 70GB, 3h30
CLOSED: [2023-09-27 Wed 10:37] SCHEDULED: <2023-09-26 Tue>
32G insufissant ! Il faut 70GB :
Job ID: 17340
Cluster: mesoubfc
User/Group: apraga/mesousers
State: COMPLETED (exit code 0)
Cores: 1
CPU Utilized: 03:11:53
CPU Efficiency: 93.55% of 03:25:07 core-walltime
Job Wall-clock time: 03:25:07
Memory Utilized: 67.75 GB
Memory Efficiency: 52.93% of 128.00 GB
#+begin_src slurm
#!/bin/bash -l
# Fichier submission.SBATCH
#SBATCH --job-name="spliceai-gpu"
#SBATCH --output=%x.%J.out ## %x=nom_du_job, %J=id du job
#SBATCH --error=%x.%J.out
# walltime (hh:mm::ss) max is 8 days
#SBATCH -t 24:00:00
#SBATCH --partition=gpu
#SBATCH --gres=gpu:1
## To request more memory, use --mem option.
## Please don't use more than 128g.
#SBATCH --mem=64G
## votre dresse mail pour les notifs
#SBATCH --mail-user=apraga@chu-besancon.fr
#SBATCH --mail-type=END,FAIL
nvidia-smi
module purge
module load nix/2.11.0
LD_PRELOAD=/lib64/libcuda.so spliceai -I NA12878-sanger-all-T2T.vep.vcf.gz -O output-all-gpu.vcf -R /Work/Groups/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa -A ~/t2t.txt
#+end_src
****** DONE Annoter la sortie de VEP avec ce VCF
CLOSED: [2023-09-28 Thu 01:32]
Problème: SpliceAI est dans un INFO différent et donc pas avec les transcrits. filter_vep ne semble pas gérer ce cas.
On doit donc fusionner : le plus simple est de le convertir en fichier d'annotation et d'utiliser vep (comme pour Spip )
Générer un fichier d'annotation
#+begin_src
bcftools annotate -x INFO/CSQ output-all-gpu.vcf -o spliceai.vcf.gz
bcftools index spliceai.vcf.gz
#+end_src
Annoter avec vep
#+begin_src sh
ln -s /Work/Projects/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa .
ln -s /Work/Projects/bisonex/data/fasta/chm13v2.0/chm13v2.0.fa.fai .
ln -s /Work/Projects/bisonex/data/vep/chm13v2.0/106 .
ln -s /Work/Projects/bisonex/data/clinvar/chm13v2.0/clinvar.vcf.gz .
ln -s /Work/Projects/bisonex/data/clinvar/chm13v2.0/clinvar.vcf.gz.tbi .
#+end_src
Essai 1: on coupe le fichier en 2 pour utiliser le plugin spliceai
filter -i 'POS<15000' spliceai.vcf.gz -o spliceai1.vcf.gz
bcftools filter -i 'POS>=15000' spliceai.vcf.gz -o spliceai2.vcf.gz
vep -i output-all-gpu.vcf -o output-all-gpu-annotated.vcf.gz --appris --biotype --canonical --ccds --compress_output bgzip --domains --exclude_predicted --flag_pick --hgvs --hgvsg --gene_phenotype --numbers --mane --protein --offline --uniprot --symbol --tsl --use_given_ref --variant_class --vcf --plugin NMD --custom clinvar.vcf.gz,ClinVar,vcf,exact,0,CLNSIG,CLNREVSTAT,CLNDN --plugin SpliceAI,snv=spliceai1.vcf.gz,indel=spliceai2.vcf.gz --fasta chm13v2.0.fa --assembly T2T-CHM13v2.0 --species homo_sapiens_gca009914755v4/ --cache --cache_version 106 --dir_cache 106
test
filter_vep -i output-all-gpu-annotated.vcf.gz --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA) or (SpliceAI_pred_DS_AG and SpliceAI_pred_DS_AG >= 0.2) or (SpliceAI_pred_DS_AL and SpliceAI_pred_DS_AL >= 0.2) or (SpliceAI_pred_DS_DG and SpliceAI_pred_DS_DG >= 0.2) or (SpliceAI_pred_DS_DL and SpliceAI_pred_DS_DL >= 0.2) " --only_matched -o output-all-gpu-filtered.vcf.gz
Méthode un peu sale car on a des "." dans l'annotation spliceai
****** KILL Save
CLOSED: [2023-09-27 Wed 21:40]
# ****** DONE Filtre vep avec spliceAI: 37365 -> 6130. SpliceAI n'apporte rien
# CLOSED: [2023-09-27 Wed 19:37] SCHEDULED: <2023-09-27 Wed>
# :PROPERTIES:
# :ID: c9b2009a-503b-4561-94c6-29ae21a3188d
# :END:
# #+begin_src sh
# filter_vep -i output-all-gpu.vcf --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA) or (SpliceAI_pred_DS_AG and SpliceAI_pred_DS_AG >= 0.2) or (SpliceAI_pred_DS_AL and SpliceAI_pred_DS_AL >= 0.2) or (SpliceAI_pred_DS_DG and SpliceAI_pred_DS_DG >= 0.2) or (SpliceAI_pred_DS_DL and SpliceAI_pred_DS_DL >= 0.2) " --only_matched -o output-all-gpu-filtered.vcf
# #+end_src
# filter_vep -i output-all-gpu.vcf --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA)" --only_matched | grep -c -v '^#'
# 6130
# $ grep -c -v '^#' output-all-gpu-filtered.vcf
# 6130
# ****** DONE Re-vérifier filtre avec spip: 7730 -> probable problème avec spip
# CLOSED: [2023-09-27 Wed 20:54] SCHEDULED: <2023-09-27 Wed>
# filter_vep -i NA12878-sanger-all-T2T.vep.vcf.gz --format vcf --filter " not(Consequence matches non_coding_transcript or Consequence matches stream or Consequence matches intergenic_variant or Consequence matches UTR or Consequence matches intron_variant or Consequence matches synonymous or BIOTYPE matches pseudogene or BIOTYPE matches misc_RNA) or (SPIP_spipScore and SPIP_spipScore >= 20)" --only_matched | grep -c -v '^#'
# perl: warning: Setting locale failed.
# perl: warning: Please check that your locale settings:
# LANGUAGE = (unset),
# LC_ALL = (unset),
# LANG = "en_US.utf8"
# are supported and installed on your system.
# perl: warning: Falling back to the standard locale ("C").
# 7730
****** DONE vérifier si tests sanger passent: ok
CLOSED: [2023-09-28 Thu 01:32] SCHEDULED: <2023-09-27 Wed>
ok !
Haplotypecaller : /Work/Users/apraga/bisonex/out//call_variant/haplotypecaller/NA12878-sanger-all-T2T/NA12878-sanger-all-T2T.haplotypecaller.vcf.gz/Work/Users/apraga/bisonex/out//call_variant/haplotypecaller/NA12878-sanger-all-T2T/NA12878-sanger-all-T2T.haplotypecaller.vcf.gz
144 found over 146
2×3 DataFrame
Row │ variant meanQual depth
│ String Float64 Int64
─────┼──────────────────────────────────────
1 │ chr12:g.13594572C>T 60.0 1
2 │ chr17:g.10204026T>A 60.0 1
/Work/Users/apraga/bisonex/tests/spliceai/output-all-gpu-filtered.vcf.gz
144 found over 146
spliceai : another 0 missed variants
0×3 DataFrame
Row │ variant meanQual depth
│ String Float64 Int64
─────┴──────────────────────────
***** KILL Avec pip: echec
CLOSED: [2023-09-28 Thu 01:34]
2023-09-24 08:28:46.361434: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU.
***** DONE Tester conda: echec
CLOSED: [2023-09-23 Sat 21:43] SCHEDULED: <2023-09-23 Sat>
Ananconda: N'arrive pas à installer
#+begin_quote
- feature:/linux-64::__glibc==2.28=0
- python=3.11 -> libgcc-ng[version='>=11.2.0'] -> __glibc[version='>=2.17']
- spliceai -> tensorflow[version='>=1.13.0'] -> __cuda
- spliceai -> tensorflow[version='>=1.13.0'] -> __glibc[version='>=2.17']
Your installed version is: 2.28
#+end_quote
Il faut utiliser mamba
***** TODO Mail Paul
SCHEDULED: <2023-09-28 Thu>
Au total:
- pas de filtre sur l'épissage pour "rattraper" variants intronique : 6130 variants mais on en perd 4 (donner un exemple)
- avec spip: pas de variant perdu (hormis les 2 lié au bam) mais 7 332 variants au total et exécution lente (1H) mais possible sur serveur
- avec spliceai sur GPU (annotation "à la volée"): pas de variant perdu mais 6609 variants au total. 3h30 de calcul sur le mésocentre, impossible à faire chez nous car GPU
Rejoint ce que disait Yannis....
J'ai l'impression que c'est lié à un grand nombre de missense [1]
Note: j'ai plus confiance dans l'annottaion spliceAI pour T2T car moins compliqué à porter
[1] en prenant le transcrit avec la "pire" conséquences, on a 80% de missense (total = 6609)
11 3_prime_utr_variant
9 3_prime_utr_variant&nmd_transcript_variant
6 5_prime_utr_variant
48 coding_sequence_variant
5 coding_sequence_variant&nmd_transcript_variant
121 frameshift_variant
9 frameshift_variant&nmd_transcript_variant
1 frameshift_variant&splice_donor_region_variant
9 frameshift_variant&splice_region_variant
78 inframe_deletion
3 inframe_deletion&nmd_transcript_variant
2 inframe_deletion&splice_region_variant
84 inframe_insertion
2 inframe_insertion&nmd_transcript_variant
1 inframe_insertion&splice_region_variant
156 intron_variant
8 intron_variant&nmd_transcript_variant
24 intron_variant&non_coding_transcript_variant
5305 missense_variant
205 missense_variant&nmd_transcript_variant
3 missense_variant&splice_donor_5th_base_variant
110 missense_variant&splice_region_variant
9 missense_variant&splice_region_variant&nmd_transcript_variant
11 non_coding_transcript_exon_variant
12 splice_acceptor_variant
1 splice_acceptor_variant&nmd_transcript_variant
2 splice_acceptor_variant&non_coding_transcript_variant
9 splice_donor_5th_base_variant&intron_variant
1 splice_donor_5th_base_variant&intron_variant&nmd_transcript_variant
16 splice_donor_region_variant&intron_variant
4 splice_donor_region_variant&intron_variant&non_coding_transcript_variant
19 splice_donor_variant
1 splice_donor_variant&nmd_transcript_variant
3 splice_donor_variant&non_coding_transcript_variant
1 splice_donor_variant&splice_donor_5th_base_variant&3_prime_utr_variant&intron_variant&nmd_transcript_variant
3 splice_donor_variant&splice_donor_5th_base_variant&coding_sequence_variant&intron_variant
1 splice_donor_variant&splice_donor_5th_base_variant&intron_variant
39 splice_polypyrimidine_tract_variant&intron_variant
5 splice_polypyrimidine_tract_variant&intron_variant&nmd_transcript_variant
10 splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant
1 splice_region_variant&3_prime_utr_variant
1 splice_region_variant&5_prime_utr_variant
9 splice_region_variant&intron_variant
1 splice_region_variant&intron_variant&nmd_transcript_variant
2 splice_region_variant&intron_variant&non_coding_transcript_variant
5 splice_region_variant&non_coding_transcript_exon_variant
1 splice_region_variant&non_coding_transcript_variant
43 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant
2 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant&nmd_transcript_variant
6 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant
15 splice_region_variant&synonymous_variant
14 start_lost
44 stop_gained
4 stop_gained&frameshift_variant
2 stop_gained&frameshift_variant&splice_region_variant
3 stop_gained&nmd_transcript_variant
3 stop_gained&splice_region_variant
2 stop_gained&splice_region_variant&nmd_transcript_variant
2 stop_lost
1 stop_lost&nmd_transcript_variant
6 stop_retained_variant
2 stop_retained_variant&nmd_transcript_variant
89 synonymous_variant
2 synonymous_variant&nmd_transcript_variant
1 transcript_ablation
1 upstream_gene_variant
En prenant tous les transcrits, 66% missense (total = 22085)
39 3_prime_UTR_variant
120 3_prime_UTR_variant&NMD_transcript_variant
22 5_prime_UTR_variant
2 5_prime_UTR_variant&NMD_transcript_variant
94 coding_sequence_variant
13 coding_sequence_variant&NMD_transcript_variant
527 downstream_gene_variant
257 frameshift_variant
21 frameshift_variant&NMD_transcript_variant
2 frameshift_variant&splice_donor_region_variant
20 frameshift_variant&splice_region_variant
1 frameshift_variant&splice_region_variant&NMD_transcript_variant
1 incomplete_terminal_codon_variant&coding_sequence_variant
211 inframe_deletion
18 inframe_deletion&NMD_transcript_variant
6 inframe_deletion&splice_region_variant
242 inframe_insertion
22 inframe_insertion&NMD_transcript_variant
4 inframe_insertion&splice_region_variant
983 intron_variant
244 intron_variant&NMD_transcript_variant
358 intron_variant&non_coding_transcript_variant
14690 missense_variant
1416 missense_variant&NMD_transcript_variant
6 missense_variant&splice_donor_5th_base_variant
374 missense_variant&splice_region_variant
34 missense_variant&splice_region_variant&NMD_transcript_variant
383 non_coding_transcript_exon_variant
53 splice_acceptor_variant
11 splice_acceptor_variant&NMD_transcript_variant
11 splice_acceptor_variant&non_coding_transcript_variant
20 splice_donor_5th_base_variant&intron_variant
4 splice_donor_5th_base_variant&intron_variant&NMD_transcript_variant
9 splice_donor_5th_base_variant&intron_variant&non_coding_transcript_variant
59 splice_donor_region_variant&intron_variant
11 splice_donor_region_variant&intron_variant&NMD_transcript_variant
24 splice_donor_region_variant&intron_variant&non_coding_transcript_variant
79 splice_donor_variant
6 splice_donor_variant&NMD_transcript_variant
17 splice_donor_variant&non_coding_transcript_variant
1 splice_donor_variant&splice_donor_5th_base_variant&3_prime_UTR_variant&intron_variant&NMD_transcript_variant
21 splice_donor_variant&splice_donor_5th_base_variant&coding_sequence_variant&intron_variant
3 splice_donor_variant&splice_donor_5th_base_variant&intron_variant
1 splice_donor_variant&splice_donor_5th_base_variant&non_coding_transcript_exon_variant&intron_variant
176 splice_polypyrimidine_tract_variant&intron_variant
27 splice_polypyrimidine_tract_variant&intron_variant&NMD_transcript_variant
48 splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant
1 splice_region_variant&3_prime_UTR_variant
24 splice_region_variant&3_prime_UTR_variant&NMD_transcript_variant
9 splice_region_variant&5_prime_UTR_variant
61 splice_region_variant&intron_variant
23 splice_region_variant&intron_variant&NMD_transcript_variant
37 splice_region_variant&intron_variant&non_coding_transcript_variant
26 splice_region_variant&non_coding_transcript_exon_variant
5 splice_region_variant&non_coding_transcript_variant
145 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant
27 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant&NMD_transcript_variant
41 splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant
37 splice_region_variant&synonymous_variant
3 splice_region_variant&synonymous_variant&NMD_transcript_variant
30 start_lost
5 start_lost&NMD_transcript_variant
135 stop_gained
13 stop_gained&frameshift_variant
3 stop_gained&frameshift_variant&NMD_transcript_variant
2 stop_gained&frameshift_variant&splice_region_variant
14 stop_gained&NMD_transcript_variant
5 stop_gained&splice_region_variant
2 stop_gained&splice_region_variant&NMD_transcript_variant
4 stop_lost
1 stop_lost&NMD_transcript_variant
9 stop_retained_variant
6 stop_retained_variant&NMD_transcript_variant
311 synonymous_variant
24 synonymous_variant&NMD_transcript_variant
1 transcript_ablation
390 upstream_gene_variant
**** TODO Ajout LOEUF et pli
plugin VEP
**** TODO NMD
plugin VEP
**** KILL Ajout LOEUF
CLOSED: [2023-04-19 mer. 16:32]
plugin VEP
**** DONE Spip
CLOSED: [2023-05-01 Mon 23:07] SCHEDULED: <2023-04-30 Sun>
BED ne semble pas bien marcher (il faut définir une zone)
VCF : trop d’information
Attention, plusieurs transcripts mais résultats identiques. On supprimer les doublons
***** DONE interpretation + score + intervalle de confiance séparé
CLOSED: [2023-05-01 Mon 23:07] SCHEDULED: <2023-04-30 Sun>
Tests :
dans tests/
vep -i 63004925-small.vcf -o postvep.vcf --vcf --fasta genomeRef.fna --dir 109 --merged --pick --offline --custom ../script/spip_annotation.vcf.gz,SPIP,vcf,exact,0,spipInterp,spipScore,spipConfidence
***** DONE Score
CLOSED: [2023-04-22 Sat 15:30]
**** DONE CADD: remplacer par plugin VEP
CLOSED: [2023-05-07 Sun 14:45] SCHEDULED: <2023-05-07 Sun>
***** Test
#+begin_src
vep -i test.vcf -o lol.vcf --offline --dir /Work/Projects/bisonex/data/vep/GRCh38/ --merged --vcf --fasta /Work/Projects/bisonex/data/genome/GRCh38.p13/genomeRef.fna --plugin CADD,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.snv.tsv.gz,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.indel.tsv.gz --dir_plugins ../VEP_plugins/ -v
#+end_src
Test
#+begin_src sh
vep --id "1 230710048 230710048 A/G 1" --offline --dir /Work/Projects/bisonex/data/vep/GRCh38/ --merged --vcf --fasta /Work/Projects/bisonex/data/genome/GRCh38.p13/genomeRef.fna --plugin CADD,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.snv.tsv.gz,/Work/Users/apraga/bisonex/work/13/9287a7fef17ab9365f5696f20710cd/gnomad.genomes.r3.0.indel.tsv.gz --hgvsg --plugin pLI --plugin LOEUF -o lol
#+end_src
CSQ=G|missense_variant|MODERATE|AGT|ENSG00000135744|Transcript|ENST00000366667|protein_coding|2/5||||843|776|259|M/T|aTg/aCg|||-1||HGNC|HGNC:333||Ensembl||A|A||1:g.230710048A>G|0.347|-0.277922|
Correspond bien à https://www.ensembl.org/Homo_sapiens/Tools/VEP/Results?tl=I7ZsIbrj14P6lD43-9115494
***** DONE Utiliser whole genome
CLOSED: [2023-04-29 Sat 15:46]
***** KILL Renommer les chromosome avant ...
CLOSED: [2023-05-01 Mon 09:14] SCHEDULED: <2023-04-30 Sun>
Trop long !
- Téléchargement de CADD: 4h20
- renommer les chromosome pour SNV : 6h20
- tabix sur les SNV : job tué au bout de 21h....
***** DONE annoter séparément et fusionner les tableaux
CLOSED: [2023-05-07 Sun 14:45] SCHEDULED: <2023-05-01 Mon>
NB: on pourrait filtrer CADD avec tabix pour se restreindre à nos variants
**** DONE clinvar
CLOSED: [2023-04-22 Sat 15:31]
**** KILL Vérifier résultats HGVS avec mutalyzer
CLOSED: [2023-05-01 Mon 09:26]
**** HOLD Parallélisation
***** HOLD par chromosome avec workflow VEP
https://github.com/Ensembl/ensembl-vep/blob/release/109/nextflow/workflows/run_vep.nf
***** HOLD Avec option --fork
**** DONE Utiliser la version de nf-core de VEP
CLOSED: [2023-05-13 Sat 18:27] SCHEDULED: <2023-05-07 Sun>
**** DONE OMIM
CLOSED: [2023-08-31 Thu 10:38] SCHEDULED: <2023-08-29 Tue>
**** DONE plI et LOEUF depuis gnomad
CLOSED: [2023-08-31 Thu 10:38] SCHEDULED: <2023-08-29 Tue>
**** DONE Grantham
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-30 Wed>
**** DONE Corriger spliceAI
CLOSED: [2023-08-31 Thu 13:51] SCHEDULED: <2023-08-31 Thu>
Pas d'annotation
- chromosome ? essai 1 au lieu de chr1 : idem. Et fonctionne pour CADD
- index ?
- retélécharger
- indexer nous-meme
**** DONE Supprimer score spip en double
CLOSED: [2023-08-31 Thu 14:17] SCHEDULED: <2023-08-31 Thu>
**** DONE Vérifier variant 63126867
CLOSED: [2023-08-31 Thu 10:52] SCHEDULED: <2023-08-31 Thu>
**** DONE Ajouter tronquant ou non
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** DONE Ajouter récessif
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** KILL Corriger allelic depth
CLOSED: [2023-08-31 Thu 11:18] SCHEDULED: <2023-08-31 Thu>
Problème lié à libre office
**** DONE Regénérer annotation pour na12878, inserted et patient PEX1
CLOSED: [2023-08-31 Thu 22:08] SCHEDULED: <2023-08-31 Thu>
**** TODO ACMG incidental
**** DONE Sortie VCF (pour avoir la fraction allélique AF)
CLOSED: [2023-08-28 Mon 17:22]
**** DONE VCF -> tsv avec bcftools
CLOSED: [2023-08-29 Tue 11:03] SCHEDULED: <2023-08-28 Mon>
**** DONE Un seul transcrit après VEP avec filter_vep :filter:
CLOSED: [2023-08-29 Tue 11:03] SCHEDULED: <2023-08-28 Mon>
Avec mise à jour VEP 110, pick_flag semble fonctionner.
***** DONE Test chr20: Pas de variant "perdus"
CLOSED: [2023-08-28 Mon 17:31] SCHEDULED: <2023-08-28 Mon>
contrairement au résultat communiqué à alexis par mail
#+begin_src sh :dir out/annotate
bcftools +counts vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz
#+end_src
Number of samples: 1
Number of SNPs: 123
Number of INDELs: 32
Number of MNPs: 53
Number of others: 0
Number of sites: 208
#+begin_src sh
filter_vep -i vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz --filter 'PICK' | bcftools +counts
#+end_src
Number of samples: 1
Number of SNPs: 123
Number of INDELs: 32
Number of MNPs: 53
Number of others: 0
Number of sites: 208
2nd vérification
#+begin_src sh :dir out/annotate
filter_vep -i vep/NA12878-sanger-chr20-GRCh38/NA12878-sanger-chr20-GRCh38.vep.vcf.gz --filter 'PICK' --soft_filter | grep fail
#+end_src
***** DONE Test NA12878 + variants sanger : variants perdus avec --pick ?
CLOSED: [2023-08-29 Tue 1