Source code for neuro_morpho.run

from pathlib import Path

import gin

import neuro_morpho.logging.base as log
import neuro_morpho.reports.generator as generator
from neuro_morpho.model import base



[docs]
def _config_line_filter(line: str) -> bool:
    return len(line) > 0 and not (line.startswith("#") or line.startswith("import"))




[docs]
def _config_line_to_pair(line: str) -> list[str]:
    return [kv.strip() for kv in line.split("=")]




[docs]
def config_str_to_dict(config_str: str) -> dict:
    """Converts a Gin.config_str() to a dict for logging with comet.ml"""
    lines = config_str.splitlines()
    return {k: v for k, v in map(_config_line_to_pair, filter(_config_line_filter, lines))}




[docs]
@gin.configurable
def run(
    model: base.BaseModel,
    training_x_dir: str | Path,
    training_y_dir: str | Path,
    validating_x_dir: str | Path,
    validating_y_dir: str | Path,
    testing_x_dir: str | Path,
    testing_y_dir: str | Path,
    model_save_dir: str | Path,
    model_out_y_dir: str | Path,
    model_out_val_y_dir: str | Path,
    model_stats_output_dir: str | Path,
    labeled_stats_output_dir: str | Path,
    report_output_dir: str | Path,
    logger: log.Logger = None,
    train: bool = False,
    get_threshold: bool = False,
    test: bool = False,
    infer: bool = False,
    cal_stats: bool = False,
):
    """Run the model on the data and save the results.

    Args:
        model (BaseModel): The model to run
        data_dir (str|Path): The directory containing the data
        output_dir (str|Path): The directory to save the results
    """
    training_x_dir = Path(training_x_dir)
    training_y_dir = Path(training_y_dir)
    validating_x_dir = Path(validating_x_dir)
    validating_y_dir = Path(validating_y_dir)
    testing_x_dir = Path(testing_x_dir)
    testing_y_dir = Path(testing_y_dir)
    model_save_dir = Path(model_save_dir)
    model_out_y_dir = Path(model_out_y_dir)
    model_out_val_y_dir = Path(model_out_val_y_dir)
    model_stats_output_dir = Path(model_stats_output_dir)
    labeled_stats_output_dir = Path(labeled_stats_output_dir)
    report_output_dir = Path(report_output_dir)

    if logger is None:
        raise ValueError("Logger is not provided. Please provide a logger to log the results.")

    model_id = logger.experiment.get_key()

    if train:
        if config := config_str_to_dict(str(gin.config_str(max_line_length=int(1e5)))):
            logger.log_parameters(config)

        logger.log_code(
            folder=Path(__file__).parent,
        )

        model = model.fit(
            training_x_dir,
            training_y_dir,
            validating_x_dir,
            validating_y_dir,
            logger=logger,
            model_id=model_id,
        )

    if get_threshold:  # if there is a need to binarize the output (soft prediction)
        if not train:  # If there was no training, we need to load the model
            checkpoint_dir = model_save_dir / model_id / "checkpoints"
            model.load_checkpoint(checkpoint_dir)
        model_dir = model_save_dir / Path(model_id)
        threshold = model.find_threshold(
            validating_x_dir,
            validating_y_dir,
            model_dir,
            model_out_val_y_dir,
        )
    else:
        threshold = None

    """
        Two following options:
        test: Run the model on the test set, consisting of same size images in testing_x_dir
        and its labels in testing_y_dir. The process includes threshold calculation for binarization purposes,
        usiing the validation images in validating_x_dir and their labels in validating_y_dir.

        infer: Run the model on the inference set, consisting of images in testing_x_dir. Images could be
        of different size, and the threshold should be provided.
    """
    if test or infer:  # One of them, not both
        if not train:  # If there was no training, we need to load the model
            checkpoint_dir = model_save_dir / model_id / "checkpoints"
            model.load_checkpoint(checkpoint_dir)
        if threshold is None:  # Get the threshold
            model_dir = model_save_dir / model_id
            threshold = model.find_threshold(validating_x_dir, validating_y_dir, model_dir, model_out_val_y_dir)

        mode = "test" if test else "infer"
        model.predict_dir(
            in_dir=testing_x_dir,
            out_dir=model_out_y_dir,
            threshold=threshold,
            mode=mode,
        )

    if cal_stats:
        generator.generate_statistics(model_out_y_dir, model_stats_output_dir)
        generator.generate_statistics(testing_y_dir, labeled_stats_output_dir)
        generator.generate_report(model_stats_output_dir, labeled_stats_output_dir, report_output_dir)