Source code for patato.useful_utilities

#  Copyright (c) Thomas Else 2023-25.
#  License: MIT

"""
Useful utilities module
==============================

This module provides miscellaneous functions and classes that are useful for PATATO.
They are largely independent of the other modules and minimally documented.
"""

from typing import Optional

import numpy as np
import pandas as pd

from ..utils.process_study import get_hdf5_files

import matplotlib.transforms as transforms
import matplotlib



[docs]
def add_axis_label(ax, label, manual_x=0, manual_y=0, font_size="large"):
    fig = ax.get_figure()
    fig.canvas.draw()
    bbox = ax.get_tightbbox(fig.canvas.get_renderer())

    x, y = fig.transSubfigure.inverted().transform([bbox.x0, bbox.y1])
    transform = fig.transSubfigure
    if ax.get_subplotspec().is_first_col():
        transform = transforms.blended_transform_factory(
            ax.get_figure().transSubfigure, transform
        )
        x = 0
        transform += transforms.ScaledTranslation(
            matplotlib.rcParams["figure.constrained_layout.w_pad"],
            0,
            ax.get_figure().dpi_scale_trans,
        )
    manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1)
    manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1)
    if manual_x != 0 or manual_y != 0:
        transform += transforms.ScaledTranslation(
            manual_x, manual_y, ax.get_figure().dpi_scale_trans
        )
    return fig.text(
        x,
        y,
        label,
        fontsize=font_size,
        fontweight="bold",
        va="top",
        ha="left",
        transform=transform,
    )




[docs]
def add_subfigure_label(subfig, ax, label, manual_x=0, manual_y=0, font_size="large"):
    manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1)
    manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1)
    t = subfig.suptitle(
        label, ha="left", va="top", fontweight="bold", fontsize=font_size
    )
    subfig.canvas.draw()
    t.set_x(0)
    transform = subfig.transSubfigure + transforms.ScaledTranslation(
        matplotlib.rcParams["figure.constrained_layout.w_pad"],
        0,
        subfig.get_figure().dpi_scale_trans,
    )
    if manual_x != 0 or manual_y != 0:
        transform += transforms.ScaledTranslation(
            manual_x, manual_y, ax.get_figure().dpi_scale_trans
        )
    t.set_transform(transform)
    return t




[docs]
def linear_regression(x, y, constant=True, x_predict=None):
    import statsmodels.api as sm

    if constant:
        x = sm.add_constant(x)
    model = sm.OLS(y, x)
    result = model.fit()
    # prediction = result.get_prediction()
    if x_predict is None:
        x_predict = x
    else:
        x_predict = sm.add_constant(x_predict)
    prediction = result.get_prediction(x_predict)
    return prediction, result




[docs]
def process_scan_name(template: str, scan_name: str) -> dict:
    """
    Process the scan name using the simple template specified.

    The template should contain something like the following:

    >>> "<date>_<initials><earmark><mouseid>_Day<timepoint>_<scantype>"

    For advanced use, note that this is converted into a regular expression,
    so certain elements of that syntax can be used in the template.

    Parameters
    ----------
    template
    scan_name

    Returns
    -------
    """
    import re

    regex_codes = {
        "Date": r"([0-9]*)",
        "Initials": r"([A-z]{2,3})",
        "EarMark": r"(NM|1L|1R|2L|2R|1L1R|1R1L|1RL|1B|IB|IL|IR)",
        "MouseID": r"([0-9]{1,6})",
        "ScanType": r"([A-z|0-9|_|\+]+)?",
        "Timepoint": r"([0-9]+)",
    }

    # template = template.replace("(", "(?:")
    for k, code in regex_codes.items():
        code = f"(?P<{k}>" + code[1:]
        template = template.replace(f"<{k}>", code)

    p = re.compile(template)
    try:
        d = p.match(scan_name).groupdict()
        for k, v in d.items():
            if type(v) == str:
                d[k] = v.upper()
        return d
    except AttributeError:
        print(f"Unable to match template to scan name: {scan_name}.")
        return {}




[docs]
def invert_dictionary_tolist(mapping):
    return {mouse: date for date, mice in mapping.items() for mouse in mice}




[docs]
def extract_data_tables(
    datafolder: str,
    name_template: str,
    analyse_rois: list,
    metrics=None,
    start_days=None,
    group_info: Optional[dict] = None,
    reconstruction_name=None,
    analyse_scan_types=None,
    just_summary=True,
    roi_kwargs=None,
    apply_function=None,
    filter_name="",
    more_details=None,
    roi_source_type=None,
    return_masks=False,
):
    if start_days is None:
        start_days = {}
    if group_info is None:
        group_info = {}
    if metrics is None:
        metrics = ["thb", "so2"]

    start_date_map = {
        timepoint: invert_dictionary_tolist(mapping)
        for timepoint, mapping in start_days.items()
    }
    group_info = {
        timepoint: invert_dictionary_tolist(mapping)
        for timepoint, mapping in group_info.items()
    }

    images = []
    tables = []

    # Share regions of interest between adjacent scans.
    datasets = list(get_hdf5_files(datafolder, filter_name=filter_name))
    dataset_details = [
        (process_scan_name(name_template, data.get_scan_name()), data)
        for _, data in datasets
    ]

    # Generate a dictionary to lookup all the scans for each scan session. Using the MouseID and Timepoint as an ID.
    scan_map = {}
    for details, data in dataset_details:
        scan_id = (details.get("MouseID", None), details.get("Timepoint", None))
        if scan_id not in scan_map:
            scan_map[scan_id] = {}
        scan_map[scan_id][details.get("ScanType")] = data

    if roi_source_type is not None:
        for scan_id in scan_map:
            if roi_source_type not in scan_map[scan_id]:
                continue
            for scan_type in scan_map[scan_id]:
                scan_map[scan_id][scan_type].external_roi_interface = scan_map[scan_id][
                    roi_source_type
                ]

    # Loop through all datasets and extract data.
    for f, data in datasets:
        print(f, data.get_scan_name())
        # Set the default reconstruction method.
        data.set_default_recon(reconstruction_name)

        # Extract useful information from the scan name
        scan_name = str.strip(data.get_scan_name())
        details = process_scan_name(name_template, scan_name)

        if analyse_scan_types is not None:
            if details.get("ScanType", None) not in analyse_scan_types:
                continue

        if not details:
            continue
        details["File"] = f
        # Mouse id - must be set for this analysis code.
        mouse_id = int(details["MouseID"])

        # Get the scan date.
        date = data.get_scan_datetime()

        # Extract details (e.g treatment, cell line etc)
        for detail, mouse_mapping in group_info.items():
            details[detail] = mouse_mapping[mouse_id]

        # Extract the time data (e.g. time since dosing started, time since implantation)
        for time_detail, mouse_mapping in start_date_map.items():
            details[time_detail] = (date - mouse_mapping[mouse_id]).days + 1

        if more_details is not None:
            for fn in more_details:
                for k, v in fn(data).items():
                    details[k] = v

        if not data.get_rois():
            continue
        else:
            measurements = data.summary_measurements(
                metrics=metrics,
                include_rois=analyse_rois,
                roi_kwargs=roi_kwargs,
                just_summary=just_summary,
                return_masks=return_masks,
            )
            for d in details:
                measurements[d] = details[d]
            measurements["Date"] = data.get_scan_datetime()
            if apply_function is not None:
                measurements = apply_function(measurements)
            tables.append(measurements)

    df = pd.concat(tables).reset_index()

    df["Radius"] = np.sqrt(df["Area"] / np.pi) * 75 * 3e-3

    df["Volume"] = df["Radius"] ** 3 * 4 * np.pi / 3

    df["Date"] = pd.to_datetime(df["Date"], utc=True, dayfirst=True).dt.tz_localize(
        None
    )

    return df, images




[docs]
def set_matplotlib_defaults(fig_width=91.5, fig_height=89):
    import matplotlib

    matplotlib.rcParams["pdf.fonttype"] = 42
    matplotlib.rcParams["ps.fonttype"] = 42
    matplotlib.rcParams["font.sans-serif"] = "Arial"
    matplotlib.rcParams["figure.dpi"] = 227
    matplotlib.rcParams["figure.figsize"] = (
        fig_width / 25.4,
        fig_height / 25.4,
    )  # OR 183 mm for double width
    matplotlib.rcParams["font.size"] = 7
    matplotlib.rcParams["axes.spines.top"] = False
    matplotlib.rcParams["axes.spines.right"] = False
    matplotlib.rcParams["savefig.pad_inches"] = 0
    matplotlib.rcParams["figure.subplot.bottom"] = 0.075
    matplotlib.rcParams["figure.subplot.hspace"] = 0.4
    matplotlib.rcParams["figure.subplot.left"] = 0.075
    matplotlib.rcParams["figure.subplot.right"] = 0.97
    matplotlib.rcParams["figure.subplot.top"] = 0.925
    matplotlib.rcParams["figure.subplot.wspace"] = 0.5
    matplotlib.rcParams["figure.titlesize"] = "medium"
    matplotlib.rcParams["axes.titlesize"] = "small"
    matplotlib.rcParams["lines.markersize"] = 3
    matplotlib.rcParams["legend.frameon"] = False