Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,4 @@ cython_debug/

# PyPI configuration file
.pypirc
testing.ipynb
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
- id: yamllint
exclude: pre-commit-config.yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.15.11"
rev: "v0.15.12"
hooks:
- id: ruff-format
- id: ruff-check
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ZedProfiler

[![Coverage](https://img.shields.io/badge/coverage-87%25-green)](#quality-gates)
[![Coverage](https://img.shields.io/badge/coverage-94%25-brightgreen)](#quality-gates)

CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling.

Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"beartype>=0.19",
"bioio>=3.3",
"bioio-tifffile>=1.3",
"fire>=0.7.1",
"jinja2>=3.1.6",
"pandas>=3.0.2",
"pandera>=0.31.1",
"scikit-image>=0.26",
]
scripts.ZedProfiler = "ZedProfiler.cli:trigger"

Expand Down
12 changes: 12 additions & 0 deletions src/zedprofiler/IO/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from .feature_writing_utils import (
FeatureMetadata,
format_morphology_feature_name,
remove_underscores_from_string,
save_features_as_parquet,
)
from .loading_classes import (
ImageSetConfig,
ImageSetLoader,
ObjectLoader,
TwoObjectLoader,
)
191 changes: 191 additions & 0 deletions src/zedprofiler/IO/feature_writing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Functions for formatting morphology feature names in a consistent way.

Formats morphology feature names and saves features as parquet files.
"""

from __future__ import annotations

import dataclasses
import pathlib

import pandas
import pandera.pandas as pa
from beartype import beartype

FEATURE_NAME_COMPONENT_COLUMNS = (
"compartment",
"channel",
"feature_type",
"measurement",
)


def _coerce_dataframe_column_names_to_strings(
dataframe: pandas.DataFrame,
) -> pandas.DataFrame:
"""Ensure DataFrame column labels are string-typed before writing."""
parsed_dataframe = dataframe.copy()
parsed_dataframe.columns = [str(column) for column in parsed_dataframe.columns]
return parsed_dataframe


def _coerce_feature_name_components(
dataframe: pandas.DataFrame,
) -> pandas.DataFrame:
"""Normalize feature-name components using shared delimiter cleanup."""
parsed_dataframe = dataframe.copy()
for column in FEATURE_NAME_COMPONENT_COLUMNS:
if column in parsed_dataframe.columns:
parsed_dataframe[column] = parsed_dataframe[column].map(
remove_underscores_from_string
)
return parsed_dataframe


FEATURE_OUTPUT_SCHEMA = pa.DataFrameSchema(
columns={},
strict=False,
parsers=[pa.Parser(_coerce_dataframe_column_names_to_strings)],
)


FEATURE_NAME_COMPONENT_SCHEMA = pa.DataFrameSchema(
columns={
"compartment": pa.Column(object, nullable=False, coerce=True),
"channel": pa.Column(object, nullable=False, coerce=True),
"feature_type": pa.Column(object, nullable=False, coerce=True),
"measurement": pa.Column(object, nullable=False, coerce=True),
},
strict=True,
parsers=[pa.Parser(_coerce_feature_name_components)],
)


@beartype
def remove_underscores_from_string(string: object) -> str:
"""
Remove unwanted delimiters from a string and replace them with hyphens.

Parameters
----------
string : str
The string to remove unwanted delimiters from.

Returns
-------
str
The string with unwanted delimiters removed and replaced with hyphens.
"""
if not isinstance(string, str):
try:
string = str(string)
except Exception as e:
msg = (
f"Input string must be a string or convertible to a string. "
f"Received input: {string!r} of type {type(string)}"
)
raise ValueError(msg) from e
string = string.translate(
str.maketrans(
{
"_": "-",
".": "-",
" ": "-",
"/": "-",
Comment thread
MikeLippincott marked this conversation as resolved.
}
)
)

return string


@beartype
def format_morphology_feature_name(
Comment thread
MikeLippincott marked this conversation as resolved.
compartment: object, channel: object, feature_type: object, measurement: object
) -> str:
"""
Format a morphology feature name in a consistent way across all morphology features.
This format follows specification for the following:
https://github.com/WayScience/NF1_3D_organoid_profiling_pipeline/blob/main/docs/RFC-2119-Feature-Naming-Convention.md

Parameters
----------
compartment : str
The compartment name.
channel : str
The channel name.
feature_type : str
The feature type.
measurement : str
The measurement name.

Returns
-------
str
The formatted feature name.
"""

component_frame = pandas.DataFrame(
[
{
"compartment": compartment,
"channel": channel,
"feature_type": feature_type,
"measurement": measurement,
}
]
)
coerced_components = FEATURE_NAME_COMPONENT_SCHEMA.validate(component_frame)
parsed_row = coerced_components.iloc[0]
return (
f"{parsed_row['compartment']}_{parsed_row['channel']}_"
f"{parsed_row['feature_type']}_{parsed_row['measurement']}"
)


@beartype
@dataclasses.dataclass
class FeatureMetadata:
"""Metadata for feature output."""

compartment: str
channel: str
feature_type: str
cpu_or_gpu: str


@beartype
def save_features_as_parquet(
Comment thread
MikeLippincott marked this conversation as resolved.
Comment thread
MikeLippincott marked this conversation as resolved.
parent_path: pathlib.Path,
df: pandas.DataFrame,
metadata: FeatureMetadata,
) -> pathlib.Path:
"""Save features as parquet files in a consistent way.

Saves features as parquet files with consistent naming across morphology
features.

Parameters
----------
parent_path : pathlib.Path
The parent path to save the features to.
df : pandas.DataFrame
The dataframe containing the features to save.
metadata : FeatureMetadata
Metadata for the feature output (compartment, channel, feature_type,
cpu_or_gpu).

Returns
-------
pathlib.Path
"""
validated_df = FEATURE_OUTPUT_SCHEMA.validate(df)
output_prefix = format_morphology_feature_name(
metadata.compartment,
metadata.channel,
metadata.feature_type,
metadata.cpu_or_gpu,
)
save_path = parent_path / f"{output_prefix}_features.parquet"
validated_df.to_parquet(save_path, index=False)
return save_path
Loading
Loading