Source code for patato.useful_utilities

#  Copyright (c) Thomas Else 2023-25.
#  License: MIT

"""
Useful utilities module
==============================

This module provides miscellaneous functions and classes that are useful for PATATO.
They are largely independent of the other modules and minimally documented.
"""

from typing import Optional

import numpy as np
import pandas as pd

from ..utils.process_study import get_hdf5_files

import matplotlib.transforms as transforms
import matplotlib


[docs] def add_axis_label(ax, label, manual_x=0, manual_y=0, font_size="large"): fig = ax.get_figure() fig.canvas.draw() bbox = ax.get_tightbbox(fig.canvas.get_renderer()) x, y = fig.transSubfigure.inverted().transform([bbox.x0, bbox.y1]) transform = fig.transSubfigure if ax.get_subplotspec().is_first_col(): transform = transforms.blended_transform_factory( ax.get_figure().transSubfigure, transform ) x = 0 transform += transforms.ScaledTranslation( matplotlib.rcParams["figure.constrained_layout.w_pad"], 0, ax.get_figure().dpi_scale_trans, ) manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1) manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1) if manual_x != 0 or manual_y != 0: transform += transforms.ScaledTranslation( manual_x, manual_y, ax.get_figure().dpi_scale_trans ) return fig.text( x, y, label, fontsize=font_size, fontweight="bold", va="top", ha="left", transform=transform, )
[docs] def add_subfigure_label(subfig, ax, label, manual_x=0, manual_y=0, font_size="large"): manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1) manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1) t = subfig.suptitle( label, ha="left", va="top", fontweight="bold", fontsize=font_size ) subfig.canvas.draw() t.set_x(0) transform = subfig.transSubfigure + transforms.ScaledTranslation( matplotlib.rcParams["figure.constrained_layout.w_pad"], 0, subfig.get_figure().dpi_scale_trans, ) if manual_x != 0 or manual_y != 0: transform += transforms.ScaledTranslation( manual_x, manual_y, ax.get_figure().dpi_scale_trans ) t.set_transform(transform) return t
[docs] def linear_regression(x, y, constant=True, x_predict=None): import statsmodels.api as sm if constant: x = sm.add_constant(x) model = sm.OLS(y, x) result = model.fit() # prediction = result.get_prediction() if x_predict is None: x_predict = x else: x_predict = sm.add_constant(x_predict) prediction = result.get_prediction(x_predict) return prediction, result
[docs] def process_scan_name(template: str, scan_name: str) -> dict: """ Process the scan name using the simple template specified. The template should contain something like the following: >>> "<date>_<initials><earmark><mouseid>_Day<timepoint>_<scantype>" For advanced use, note that this is converted into a regular expression, so certain elements of that syntax can be used in the template. Parameters ---------- template scan_name Returns ------- """ import re regex_codes = { "Date": r"([0-9]*)", "Initials": r"([A-z]{2,3})", "EarMark": r"(NM|1L|1R|2L|2R|1L1R|1R1L|1RL|1B|IB|IL|IR)", "MouseID": r"([0-9]{1,6})", "ScanType": r"([A-z|0-9|_|\+]+)?", "Timepoint": r"([0-9]+)", } # template = template.replace("(", "(?:") for k, code in regex_codes.items(): code = f"(?P<{k}>" + code[1:] template = template.replace(f"<{k}>", code) p = re.compile(template) try: d = p.match(scan_name).groupdict() for k, v in d.items(): if type(v) == str: d[k] = v.upper() return d except AttributeError: print(f"Unable to match template to scan name: {scan_name}.") return {}
[docs] def invert_dictionary_tolist(mapping): return {mouse: date for date, mice in mapping.items() for mouse in mice}
[docs] def extract_data_tables( datafolder: str, name_template: str, analyse_rois: list, metrics=None, start_days=None, group_info: Optional[dict] = None, reconstruction_name=None, analyse_scan_types=None, just_summary=True, roi_kwargs=None, apply_function=None, filter_name="", more_details=None, roi_source_type=None, return_masks=False, ): if start_days is None: start_days = {} if group_info is None: group_info = {} if metrics is None: metrics = ["thb", "so2"] start_date_map = { timepoint: invert_dictionary_tolist(mapping) for timepoint, mapping in start_days.items() } group_info = { timepoint: invert_dictionary_tolist(mapping) for timepoint, mapping in group_info.items() } images = [] tables = [] # Share regions of interest between adjacent scans. datasets = list(get_hdf5_files(datafolder, filter_name=filter_name)) dataset_details = [ (process_scan_name(name_template, data.get_scan_name()), data) for _, data in datasets ] # Generate a dictionary to lookup all the scans for each scan session. Using the MouseID and Timepoint as an ID. scan_map = {} for details, data in dataset_details: scan_id = (details.get("MouseID", None), details.get("Timepoint", None)) if scan_id not in scan_map: scan_map[scan_id] = {} scan_map[scan_id][details.get("ScanType")] = data if roi_source_type is not None: for scan_id in scan_map: if roi_source_type not in scan_map[scan_id]: continue for scan_type in scan_map[scan_id]: scan_map[scan_id][scan_type].external_roi_interface = scan_map[scan_id][ roi_source_type ] # Loop through all datasets and extract data. for f, data in datasets: print(f, data.get_scan_name()) # Set the default reconstruction method. data.set_default_recon(reconstruction_name) # Extract useful information from the scan name scan_name = str.strip(data.get_scan_name()) details = process_scan_name(name_template, scan_name) if analyse_scan_types is not None: if details.get("ScanType", None) not in analyse_scan_types: continue if not details: continue details["File"] = f # Mouse id - must be set for this analysis code. mouse_id = int(details["MouseID"]) # Get the scan date. date = data.get_scan_datetime() # Extract details (e.g treatment, cell line etc) for detail, mouse_mapping in group_info.items(): details[detail] = mouse_mapping[mouse_id] # Extract the time data (e.g. time since dosing started, time since implantation) for time_detail, mouse_mapping in start_date_map.items(): details[time_detail] = (date - mouse_mapping[mouse_id]).days + 1 if more_details is not None: for fn in more_details: for k, v in fn(data).items(): details[k] = v if not data.get_rois(): continue else: measurements = data.summary_measurements( metrics=metrics, include_rois=analyse_rois, roi_kwargs=roi_kwargs, just_summary=just_summary, return_masks=return_masks, ) for d in details: measurements[d] = details[d] measurements["Date"] = data.get_scan_datetime() if apply_function is not None: measurements = apply_function(measurements) tables.append(measurements) df = pd.concat(tables).reset_index() df["Radius"] = np.sqrt(df["Area"] / np.pi) * 75 * 3e-3 df["Volume"] = df["Radius"] ** 3 * 4 * np.pi / 3 df["Date"] = pd.to_datetime(df["Date"], utc=True, dayfirst=True).dt.tz_localize( None ) return df, images
[docs] def set_matplotlib_defaults(fig_width=91.5, fig_height=89): import matplotlib matplotlib.rcParams["pdf.fonttype"] = 42 matplotlib.rcParams["ps.fonttype"] = 42 matplotlib.rcParams["font.sans-serif"] = "Arial" matplotlib.rcParams["figure.dpi"] = 227 matplotlib.rcParams["figure.figsize"] = ( fig_width / 25.4, fig_height / 25.4, ) # OR 183 mm for double width matplotlib.rcParams["font.size"] = 7 matplotlib.rcParams["axes.spines.top"] = False matplotlib.rcParams["axes.spines.right"] = False matplotlib.rcParams["savefig.pad_inches"] = 0 matplotlib.rcParams["figure.subplot.bottom"] = 0.075 matplotlib.rcParams["figure.subplot.hspace"] = 0.4 matplotlib.rcParams["figure.subplot.left"] = 0.075 matplotlib.rcParams["figure.subplot.right"] = 0.97 matplotlib.rcParams["figure.subplot.top"] = 0.925 matplotlib.rcParams["figure.subplot.wspace"] = 0.5 matplotlib.rcParams["figure.titlesize"] = "medium" matplotlib.rcParams["axes.titlesize"] = "small" matplotlib.rcParams["lines.markersize"] = 3 matplotlib.rcParams["legend.frameon"] = False