Set reference genome for selected RNA samples
Vince-janv opened this issue · comments
Description
The Tomte Pipeline requires the reference_genome field to be set on the sample lever. Now when ordering Tomte analyses in old samples the analyses can't start since the field is not set for the old samples.
Solution:
After a meeting with KN, EFC and VI plus communication with the affected customers the following patch was decided upon.
For cust002:
Set the reference_genome field for all RNA-samples missing it to hg19.
For cust003:
Set the reference_genome field for all RNA-samples missing it to hg38.
RNA-samples can be identified by their apptag. If sample.application_version.application.prep_category=="wts"
then the sample is an RNA-sample
Script performing the changes described above
from cg.store.database import initialize_database
from cg.store.store import Store
from sqlalchemy import select
from cg.store.models import Sample, Customer, Application, ApplicationVersion
initialize_database(
"database-string"
)
status_db = Store()
def get_rna_samples(customer_id: str) -> list[Sample]:
statement = (
select(Sample)
.join(Sample.customer)
.join(Sample.application_version)
.join(ApplicationVersion.application)
.where(Customer.internal_id == customer_id)
.where(Application.prep_category == "wts")
)
return status_db.session.execute(statement).scalars().all()
def set_reference_genome(sample: Sample, reference_genome: str) -> None:
if not sample.reference_genome:
sample.reference_genome = reference_genome
def set_reference_genome_for_rna_samples(customer_id: str, reference_genome) -> None:
samples: list[Sample] = get_rna_samples(customer_id)
for sample in samples:
set_reference_genome(sample, reference_genome)
if __name__ == "__main__":
set_reference_genome_for_rna_samples(customer_id="cust002", reference_genome="hg19")
set_reference_genome_for_rna_samples(customer_id="cust003", reference_genome="hg38")
status_db.session.commit()
@Vince-janv Looks good ✅
Tests pass in stage
Script run in production