Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions scripts/create_resources/test_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ viash run src/methods/cellpose/config.vsh.yaml -- \
--output $DATASET_DIR/prediction.h5ad

# run one metric
# TODO: implement this!
# viash run src/metrics/ari/config.vsh.yaml -- \
# --input_prediction $DATASET_DIR/prediction.h5ad \
# --input_scrnaseq_reference $DATASET_DIR/scrnaseq_reference.h5ad \
# --output $DATASET_DIR/score.h5ad
# TODO files need to be changed
viash run src/metrics/ari/config.vsh.yaml -- \
--input_scrnaseq_reference $RAW_DATA/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad \
--input_prediction $DATASET_DIR/output_scrnaseq_reference.h5ad \
--output $DATASET_DIR/score.h5ad

# write manual state.yaml. this is not actually necessary but you never know it might be useful
cat > $DATASET_DIR/state.yaml << HERE
Expand Down
27 changes: 27 additions & 0 deletions src/control_methods/random_labels/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Base component API configuration
__merge__: ../../api/comp_control_method.yaml

# Component configuration
name: "random_labels"
label: Random Labels
summary: "Negative control by randomly generating labels."
description: "This method serves as a negative control, where random labels are generated for the data."
info:
preferred_normalization: counts
variants:
random_features:

# Script configuration
resources:
- type: python_script
path: script.py

# Platform configuration
engines:
- type: docker
image: openproblems/base_python:1.0.0
runners:
- type: executable
- type: nextflow
directives:
label: [lowtime, lowmem, lowcpu]
39 changes: 39 additions & 0 deletions src/control_methods/random_labels/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import anndata as ad
import random
import pandas as pd

## VIASH START
par = {
"input": "resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad",
"output": "resources_test/task_spatial_segmentation/mouse_brain_combined/reference_prediction.h5ad",
"seed": 123,
"label": "cell_type"
}
meta = {
"name": "random_labels",
}
## VIASH END

if par["seed"]:
print(f">> Setting seed to {par['seed']}")
random.seed(par["seed"])

print("Load input data", flush=True)
input = ad.read_h5ad(par["input"])

print("Create random labels", flush=True)
input.obs[par["label"]] = [random.randint(1, 10) for _ in range(input.n_obs)]

print("Create output AnnData", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(input.obs[par["label"]]),
uns={
"dataset_id": input.uns["dataset_id"],
"normalization_id": input.uns["normalization_id"],
"method_id": meta["name"],
},
)

print("Write output to file", flush=True)
output.write_h5ad(par["output"], compression="gzip")
52 changes: 52 additions & 0 deletions src/data_processors/leiden/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
__merge__: ../../api/comp_data_processor.yaml

name: process_dataset

arguments:
- name: "--label"
type: "string"
default: "cell_type"
description: Label added to anndata for prediction.
- name: "--n_neighbors"
type: "integer"
default: 20
description: Number of neighbors to use for nearest neighbors distance matrix.
- name: "--min_dist"
type: "double"
default: 0.1
description: Effective minimum distance to use for UMAP.
- name: "--spread"
type: "double"
default: 1.2
description: The effective scale of embedded points to use for UMAP.
- name: "--resolution"
type: "double"
default: 1.0
description: The resolution to use for leiden clustering.
- name: "--seed"
type: "integer"
default: 123
description: Seed.

resources:
- type: python_script
path: script.py

engines:
- type: docker
#image: openproblems/base_pytorch_nvidia:1 # TODO: ideally get gpu image to work
image: openproblems/base_python:1
setup:
- type: python
packages: scikit-learn
- type: python
packages: leidenalg
__merge__:
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [highmem, midcpu, midtime]
43 changes: 43 additions & 0 deletions src/data_processors/leiden/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

import random
import anndata as ad
import scanpy as sc
import pandas as pd

## VIASH START
par = {
'input': 'resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad',
'output': 'resources_test/task_spatial_segmentation/mouse_brain_combined/method_prediction.h5ad',
'label': 'cell_type',
'n_neighbors': 20,
'min_dist': 0.1,
'spread': 1.2,
'resolution': 1.0,
'seed': 123
}
## VIASH END

# set seed if need be
if par["seed"]:
print(f">> Setting seed to {par['seed']}")
random.seed(par["seed"])

print('>> Reading input files', flush=True)
input = ad.read_h5ad(par['input'])

print('>> Perform Leiden clustering', flush=True)
sc.pp.neighbors(input, n_neighbors=par['n_neighbors'], random_state=par['seed'])
sc.tl.umap(input, min_dist=par['min_dist'], spread=par['spread'], random_state=par['seed'])
sc.tl.leiden(input, resolution=par['resolution'], key_added=par["label"], random_state=par['seed'])

print(">> Write output AnnData to file", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(input.obs[par["label"]]),
uns={
"dataset_id": input.uns["dataset_id"],
"normalization_id": input.uns["normalization_id"],
#"method_id": input.uns["method_id"], #TODO
},
)

output.write_h5ad(par['output'], compression='gzip')
47 changes: 0 additions & 47 deletions src/metrics/accuracy/script.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,27 @@ __merge__: ../../api/comp_metric.yaml

# A unique identifier for your component (required).
# Can contain only lowercase letters or underscores.
name: accuracy
name: ari

# Metadata for your component
info:
metrics:
# A unique identifier for your metric (required).
# Can contain only lowercase letters or underscores.
- name: accuracy
- name: ari
# A relatively short label, used when rendering visualisarions (required)
label: Accuracy
label: ARI
# A one sentence summary of how this metric works (required). Used when
# rendering summary tables.
summary: "The percentage of correctly predicted labels."
summary: "Adjusted Rand index to measure the similarity between two data clusterings."
# A multi-line description of how this component works (required). Used
# when rendering reference documentation.
description: |
The percentage of correctly predicted labels.
The Rand index is the accuracy of determining if a link belongs within a cluster or not.
The Rand index has a value between 0 and 1, with 0 indicating that the two data clusterings do not agree on any pair of points and 1 indicating that the data clusterings are exactly the same.
# A reference key from the bibtex library at src/common/library.bib (required).
references:
doi: 10.48550/arXiv.2008.05756
doi: 10.1080/01621459.1971.10482356
# The minimum possible value for this metric (required)
min: 0
# The maximum possible value for this metric (required)
Expand All @@ -36,11 +37,11 @@ info:
maximize: true

# Component-specific parameters (optional)
# arguments:
# - name: "--n_neighbors"
# type: "integer"
# default: 5
# description: Number of neighbors to use.
arguments:
- name: "--label"
type: "string"
default: "leiden"
description: Label to be used to perform ARI.

# Resources required to run the component
resources:
Expand All @@ -60,6 +61,8 @@ engines:
setup:
- type: python
packages: scikit-learn
- type: python
packages: leidenalg

runners:
# This platform allows running the component natively
Expand Down
40 changes: 40 additions & 0 deletions src/metrics/ari/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

import anndata as ad
import scanpy as sc
from sklearn.metrics import adjusted_rand_score

## VIASH START
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
par = {
'input_scrnaseq_reference': 'resources_test/task_spatial_segmentation/mouse_brain_combined/reference_prediction.h5ad',
'input_prediction': 'resources_test/task_spatial_segmentation/mouse_brain_combined/method_prediction.h5ad',
'output': 'resources_test/task_spatial_segmentation/mouse_brain_combined/score.h5ad',
'label': 'cell_type'
}
meta = {
'name': 'ari'
}
## VIASH END

print('>> Reading input files', flush=True)
input_scrnaseq_reference = ad.read_h5ad(par['input_scrnaseq_reference'])
input_prediction = ad.read_h5ad(par['input_prediction'])

assert (input_prediction.obs_names == input_scrnaseq_reference.obs_names).all(), "obs_names not the same in prediction and solution inputs"

print('>> Compute metrics', flush=True)
uns_metric_ids = [ 'ari' ]
uns_metric_values = adjusted_rand_score(input_scrnaseq_reference.obs[par['label']], input_prediction.obs[par['label']])

print(">> Write output AnnData to file", flush=True)
output = ad.AnnData(
uns={
'dataset_id': input_prediction.uns['dataset_id'],
'normalization_id': input_prediction.uns['normalization_id'],
# 'method_id': input_prediction.uns['method_id'], #TODO
'metric_ids': uns_metric_ids,
'metric_values': uns_metric_values
}
)
output.write_h5ad(par['output'], compression='gzip')
Loading