From bdcc956dd4d30465399b71ee20066e63fc4e823e Mon Sep 17 00:00:00 2001 From: Ela Praznik Date: Wed, 22 Apr 2026 17:24:23 +0200 Subject: [PATCH] Add batch correction part of quiz --- .../single-cell-analysis/quiz-02/index.md | 87 +++++++++++++++++-- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/_chapters/single-cell-analysis/quiz-02/index.md b/_chapters/single-cell-analysis/quiz-02/index.md index e8703ce..706982f 100644 --- a/_chapters/single-cell-analysis/quiz-02/index.md +++ b/_chapters/single-cell-analysis/quiz-02/index.md @@ -71,12 +71,12 @@ b) Filter cells based on a minimum number of 500 and a maximum number of 3000 ex points={1} type="multi" question="Why do we filter out cells with extremely high or low gene expression counts?" - scorer={(answer) => answer === "All of the above."} + scorer={(answer) => answer === "all of the above"} options={[ - "Cells with very few detected genes may be damaged or of low quality.", - "Cells with an unusually high number of expressed genes may be multiplets or artifacts.", - "Removing extreme cells improves the accuracy of downstream clustering and visualization.", - "All of the above." + "Cells with very few detected genes may be damaged or of low quality", + "Cells with an unusually high number of expressed genes may be multiplets or artifacts", + "Removing extreme cells improves the accuracy of downstream clustering and visualization", + "All of the above" ]} neutralOptions={["I don't understand the question."]} trials={2} @@ -113,7 +113,7 @@ Normalize expression values for each gene in each cell to counts per 10000, loga type="multi" question="Why do we need to normalize the gene expression values in single-cell analysis?" scorer={(answer) => answer === "to account for sequencing depth and make gene expression values comparable across cells"} - options={["To eliminate biological variation between different cell types.", "To account for sequencing depth and make gene expression values comparable across cells.", "To change gene expression values so that all genes have the same expression level.", "Normalization is only needed for datasets with a small number of cells."]} + options={["To eliminate biological variation between different cell types", "To account for sequencing depth and make gene expression values comparable across cells", "To change gene expression values so that all genes have the same expression level", "Normalization is only needed for datasets with a small number of cells"]} neutralOptions={["I don't understand the question."]} trials={2} timeout={10}> @@ -150,5 +150,80 @@ Plot the preprocessed and annotated data in a new t-SNE plot and compare it to t ### Task 4 - Batch Effect Correction +Download the sample of a pancreas single cell gene expression dataset ([pancreas_sampled_1k5k.tab](http://file.biolab.si/datasets/pancreas_sampled_1k5k.tab)) and load it into Orange. Generate a t-SNE plot. + answer === "3"} + options={["11", "2", "3"]} + neutralOptions={["I don't understand the question."]} + trials={2} + timeout={10}> + + + + + answer === "to align datasets from different sources"} + options={["To normalize the data", "To align datasets from different sources", "To reduce the size of the dataset", "To separate datasets from different sources"]} + neutralOptions={["I don't understand the question."]} + trials={2} + timeout={10}> + + + +**Apply two different batch-effect correction methods to the dataset:** + +a) Using Align Datasets widget (set the Data source indicator to Batch and leave all other parameters at default values) + + +b) Using Harmony widget (leave all parameters at their default values) + +**For each method, generate a t-SNE embedding of the corrected data. Compare t-SNE plots (uncorrected, Align Datasets corrected, Harmony corrected) side by side.** + + + answer === "align datasets"} + options={["Harmony", "Neither removed any batch effects", "Align Datasets", "Both produce the same plot"]} + neutralOptions={["I don't understand the question."]} + trials={2} + timeout={10}> + + + + + +Start from the uncorrected dataset and create a second Harmony workflow: add a new Harmony widget, set the parameter theta to 2.5, and leave all other parameters at their default values. Connect the output of this widget to a new t-SNE plot and set the number of PC components used to 30. + +Compare this plot with the previous t-SNE plot obtained using Harmony with default parameters. Focus on how the change in theta affects the mixing of batches and the separation of clusters. + + + answer === "yes, both are improved"} + options={[ + "Yes, both are improved", + "No, there is no noticeable improvement in either", + "Batch mixing improves, but cluster separation becomes worse" + ]} + neutralOptions={["I don't understand the question."]} + trials={2} + timeout={10}> + + +