∅:D[
3.237] → [
7.378:1892]
B:BD[
7.378] → [
7.378:1892]
B:BD[
8.493] → [
3.240:1086]
∅:D[
3.1086] → [
8.493:522]
B:BD[
8.493] → [
8.493:522]
B:BD[
8.522] → [
3.1087:1206]
∅:D[
3.1206] → [
8.641:690]
B:BD[
8.641] → [
8.641:690]
B:BD[
8.690] → [
3.1207:1275]
∅:D[
3.1275] → [
8.758:826]
B:BD[
8.758] → [
8.758:826]
B:BD[
8.826] → [
3.1276:1448]
∅:D[
3.1448] → [
8.879:901]
B:BD[
8.879] → [
8.879:901]
B:BD[
8.901] → [
3.1449:1516]
∅:D[
3.1516] → [
8.969:970]
B:BD[
8.969] → [
8.969:970]
∅:D[
8.970] → [
7.1892:1893]
B:BD[
7.1892] → [
7.1892:1893]
B:BD[
7.1893] → [
3.1517:1569]
∅:D[
3.1569] → [
7.1914:1958]
B:BD[
7.1914] → [
7.1914:1958]
B:BD[
7.1958] → [
8.971:1019]
B:BD[
7.2415] → [
7.2415:2437]
B:BD[
7.2439] → [
7.2439:2459]
∅:D[
7.2459] → [
9.125:227]
B:BD[
6.664] → [
9.125:227]
B:BD[
6.800] → [
6.800:854]
B:BD[
6.854] → [
9.228:265]
B:BD[
6.938] → [
6.938:982]
B:BD[
6.982] → [
9.266:325]
∅:D[
9.325] → [
6.1238:1272]
B:BD[
6.1238] → [
6.1238:1272]
B:BD[
6.1272] → [
9.326:747]
B:BD[
9.747] → [
3.1570:1580]
∅:D[
3.1580] → [
6.1501:1502]
B:BD[
6.1501] → [
6.1501:1502]
B:BD[
6.1502] → [
3.1581:1592]
∅:D[
9.1152] → [
6.1721:1722]
∅:D[
3.1592] → [
6.1721:1722]
B:BD[
6.1721] → [
6.1721:1722]
#+begin_src python :results output
import pandas as pd
from pathlib import Path
def readExome(root):
e = pd.read_csv(root /
"Suivi exomes_2022-02-02_12-34-15.tsv",
sep= "\t")
# Clean up data first
e = e[['patientID', 'specimenID',
'presta', 'capture', 'sequencage',
'date_reception',
'resultat', 'incidental', 'commentaires', 'runComments']]
return e
def readPanel(root):
d = pd.read_csv(root /
"MiSeq coverage_2022-02-02_12-31-54.tsv",
sep= "\t")
# Clean up data first
return d[['patientID', 'specimenID', 'target']]
def readDNA(root):
dna= pd.read_csv(root /
"DNAThèque_2022-02-02_11-46-28.tsv",
sep="\t")
dna = dna [['patientID',
'sampleID',
'sampleIDdate_de_reception',
'sampleIDorigine_de_l_echantillon',
'commentaires']]
# Clean column name
dna.columns = dna.columns.str.replace('dataSetsPatients', '')
return dna
def readPatients(root):
patients = pd.read_csv(root / "Patients_2022-02-02_11-44-03.tsv",
sep = "\t")
patients = patients[['patientID', 'nom', 'prenom', 'date_de_naissance',
'date_de_deces', 'sexe', 'parente', 'statut', 'foetus', 'consanguinite',
'clinicien_referent', 'clinicien_referentinstitution', 'pathologie',
'gene', 'commentaires']]
return patients
def cleanExome(d):
cols = ['nom', 'prenom', 'date_de_naissance',
'sexe', 'parente', 'statut',
'clinicien_referent', 'clinicien_referentinstitution', 'pathologie',
'gene',
'sampleIDdate_de_reception', 'sampleIDorigine_de_l_echantillon',
'patientIDexome', 'presta', 'capture', 'sequencage',
'date_reception', 'resultat', 'incidental', 'commentairesexome',
'commentaires' ,
'commentairesdna']
d = d[cols].rename(columns = {"gene": "gene_responsable"})
# Remove none
d['gene_responsable'].replace("None", "", inplace=True)
return d
def getExome(root, dna, p):
# Merge exome and sample
e = readExome(root)
d = dna.set_index("sampleID").join(e.set_index("specimenID"),
how="inner",
lsuffix="dna", rsuffix="exome")
d2 = d.set_index('patientIDdna').join(p.set_index("patientID"),
how="inner")
# Reorder columns
d2 = cleanExome(d2)
d2.to_excel(root / "exome dijons.xlsx")
def readReports(root):
d = pd.read_csv(root /
"Compte-rendus_2022-02-02_12-37-37.tsv",
sep= "\t")
# Clean up data first
return d[['patientID', 'sampleID',
'examen', 'resultat', 'interpretation', 'signataire', 'date_rendu']]
def cleanPanel(d):
cols = ['nom', 'prenom', 'date_de_naissance',
'sexe', 'parente', 'statut',
'clinicien_referent', 'clinicien_referentinstitution', 'pathologie',
'gene',
'sampleIDdate_de_reception', 'sampleIDorigine_de_l_echantillon',
'target',
'examen', 'resultat', 'signataire', 'date_rendu',
'commentaires', 'commentairespatient']
d = d[cols].rename(columns = {"gene": "gene_responsable"})
# Remove none
d['gene_responsable'].replace("None", "", inplace=True)
return d
def getPanel(root, dna, p):
# Merge panel and sample
r = readPanel(root)
d = dna.set_index("sampleID").join(r.set_index("specimenID"),
how="inner",
lsuffix="dna", rsuffix="panel")
d2 = d.set_index('patientIDdna').join(p.set_index("patientID"),
how="inner", lsuffix="patient")
rep = readReports(root)
d3 = d2.join(rep.set_index("patientID"), how="inner", lsuffix="cr")
# Reorder columns
d3 = cleanPanel(d3)
d3.to_excel(root / "panel dijon.xlsx")
root = Path("/mnt/c/Users/alexi/Documents/mustard")
dna = readDNA(root)
p = readPatients(root)
# getExome(root, dna, p)
getPanel(root, dna, p)
#+end_src
#+RESULTS:
#+begin_src python
# Subset of columns
cols = ['nom', 'prenom',
'date_de_naissance', 'sexe', 'parente', 'statut',
'clinicien_referentinstitution', 'pathologie',
'sampleIDdate_de_reception',
'sampleIDorigine_de_l_echantillon',
'capture', 'sequencage',
'date_reception',
'resultat', 'incidental',
'commentairesdna',
'commentairesexome']
d2 = d[cols].rename(columns = {
'clinicien_referentinstitution': 'provenance',
'sampleIDorigine_de_l_echantillon': 'Biopsie',
'sampleIDdate_de_reception': 'Date prélèvement',
'date_reception': 'Date réception',
'pathologie': 'Malformation'
})
print(d2.columns)
# Does not work
# d2.applymap(lambda x: "" if str(x) == "Non renseigné" else x)
#+end_src
#+RESULTS:
***** TODO Exomes
****** TODO Enlever les doublons
***** DONE Panel
CLOSED: [2022-09-12 Mon 22:44]
****** DONE Enlever les doublons
CLOSED: [2022-09-12 Mon 22:44]
***** TODO Variations à vérifier