Source code for patato.useful_utilities
# Copyright (c) Thomas Else 2023-25.
# License: MIT
"""
Useful utilities module
==============================
This module provides miscellaneous functions and classes that are useful for PATATO.
They are largely independent of the other modules and minimally documented.
"""
from typing import Optional
import numpy as np
import pandas as pd
from ..utils.process_study import get_hdf5_files
import matplotlib.transforms as transforms
import matplotlib
[docs]
def add_axis_label(ax, label, manual_x=0, manual_y=0, font_size="large"):
fig = ax.get_figure()
fig.canvas.draw()
bbox = ax.get_tightbbox(fig.canvas.get_renderer())
x, y = fig.transSubfigure.inverted().transform([bbox.x0, bbox.y1])
transform = fig.transSubfigure
if ax.get_subplotspec().is_first_col():
transform = transforms.blended_transform_factory(
ax.get_figure().transSubfigure, transform
)
x = 0
transform += transforms.ScaledTranslation(
matplotlib.rcParams["figure.constrained_layout.w_pad"],
0,
ax.get_figure().dpi_scale_trans,
)
manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1)
manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1)
if manual_x != 0 or manual_y != 0:
transform += transforms.ScaledTranslation(
manual_x, manual_y, ax.get_figure().dpi_scale_trans
)
return fig.text(
x,
y,
label,
fontsize=font_size,
fontweight="bold",
va="top",
ha="left",
transform=transform,
)
[docs]
def add_subfigure_label(subfig, ax, label, manual_x=0, manual_y=0, font_size="large"):
manual_x *= matplotlib.font_manager.font_scalings.get(font_size, 1)
manual_y *= matplotlib.font_manager.font_scalings.get(font_size, 1)
t = subfig.suptitle(
label, ha="left", va="top", fontweight="bold", fontsize=font_size
)
subfig.canvas.draw()
t.set_x(0)
transform = subfig.transSubfigure + transforms.ScaledTranslation(
matplotlib.rcParams["figure.constrained_layout.w_pad"],
0,
subfig.get_figure().dpi_scale_trans,
)
if manual_x != 0 or manual_y != 0:
transform += transforms.ScaledTranslation(
manual_x, manual_y, ax.get_figure().dpi_scale_trans
)
t.set_transform(transform)
return t
[docs]
def linear_regression(x, y, constant=True, x_predict=None):
import statsmodels.api as sm
if constant:
x = sm.add_constant(x)
model = sm.OLS(y, x)
result = model.fit()
# prediction = result.get_prediction()
if x_predict is None:
x_predict = x
else:
x_predict = sm.add_constant(x_predict)
prediction = result.get_prediction(x_predict)
return prediction, result
[docs]
def process_scan_name(template: str, scan_name: str) -> dict:
"""
Process the scan name using the simple template specified.
The template should contain something like the following:
>>> "<date>_<initials><earmark><mouseid>_Day<timepoint>_<scantype>"
For advanced use, note that this is converted into a regular expression,
so certain elements of that syntax can be used in the template.
Parameters
----------
template
scan_name
Returns
-------
"""
import re
regex_codes = {
"Date": r"([0-9]*)",
"Initials": r"([A-z]{2,3})",
"EarMark": r"(NM|1L|1R|2L|2R|1L1R|1R1L|1RL|1B|IB|IL|IR)",
"MouseID": r"([0-9]{1,6})",
"ScanType": r"([A-z|0-9|_|\+]+)?",
"Timepoint": r"([0-9]+)",
}
# template = template.replace("(", "(?:")
for k, code in regex_codes.items():
code = f"(?P<{k}>" + code[1:]
template = template.replace(f"<{k}>", code)
p = re.compile(template)
try:
d = p.match(scan_name).groupdict()
for k, v in d.items():
if type(v) == str:
d[k] = v.upper()
return d
except AttributeError:
print(f"Unable to match template to scan name: {scan_name}.")
return {}
[docs]
def invert_dictionary_tolist(mapping):
return {mouse: date for date, mice in mapping.items() for mouse in mice}
[docs]
def extract_data_tables(
datafolder: str,
name_template: str,
analyse_rois: list,
metrics=None,
start_days=None,
group_info: Optional[dict] = None,
reconstruction_name=None,
analyse_scan_types=None,
just_summary=True,
roi_kwargs=None,
apply_function=None,
filter_name="",
more_details=None,
roi_source_type=None,
return_masks=False,
):
if start_days is None:
start_days = {}
if group_info is None:
group_info = {}
if metrics is None:
metrics = ["thb", "so2"]
start_date_map = {
timepoint: invert_dictionary_tolist(mapping)
for timepoint, mapping in start_days.items()
}
group_info = {
timepoint: invert_dictionary_tolist(mapping)
for timepoint, mapping in group_info.items()
}
images = []
tables = []
# Share regions of interest between adjacent scans.
datasets = list(get_hdf5_files(datafolder, filter_name=filter_name))
dataset_details = [
(process_scan_name(name_template, data.get_scan_name()), data)
for _, data in datasets
]
# Generate a dictionary to lookup all the scans for each scan session. Using the MouseID and Timepoint as an ID.
scan_map = {}
for details, data in dataset_details:
scan_id = (details.get("MouseID", None), details.get("Timepoint", None))
if scan_id not in scan_map:
scan_map[scan_id] = {}
scan_map[scan_id][details.get("ScanType")] = data
if roi_source_type is not None:
for scan_id in scan_map:
if roi_source_type not in scan_map[scan_id]:
continue
for scan_type in scan_map[scan_id]:
scan_map[scan_id][scan_type].external_roi_interface = scan_map[scan_id][
roi_source_type
]
# Loop through all datasets and extract data.
for f, data in datasets:
print(f, data.get_scan_name())
# Set the default reconstruction method.
data.set_default_recon(reconstruction_name)
# Extract useful information from the scan name
scan_name = str.strip(data.get_scan_name())
details = process_scan_name(name_template, scan_name)
if analyse_scan_types is not None:
if details.get("ScanType", None) not in analyse_scan_types:
continue
if not details:
continue
details["File"] = f
# Mouse id - must be set for this analysis code.
mouse_id = int(details["MouseID"])
# Get the scan date.
date = data.get_scan_datetime()
# Extract details (e.g treatment, cell line etc)
for detail, mouse_mapping in group_info.items():
details[detail] = mouse_mapping[mouse_id]
# Extract the time data (e.g. time since dosing started, time since implantation)
for time_detail, mouse_mapping in start_date_map.items():
details[time_detail] = (date - mouse_mapping[mouse_id]).days + 1
if more_details is not None:
for fn in more_details:
for k, v in fn(data).items():
details[k] = v
if not data.get_rois():
continue
else:
measurements = data.summary_measurements(
metrics=metrics,
include_rois=analyse_rois,
roi_kwargs=roi_kwargs,
just_summary=just_summary,
return_masks=return_masks,
)
for d in details:
measurements[d] = details[d]
measurements["Date"] = data.get_scan_datetime()
if apply_function is not None:
measurements = apply_function(measurements)
tables.append(measurements)
df = pd.concat(tables).reset_index()
df["Radius"] = np.sqrt(df["Area"] / np.pi) * 75 * 3e-3
df["Volume"] = df["Radius"] ** 3 * 4 * np.pi / 3
df["Date"] = pd.to_datetime(df["Date"], utc=True, dayfirst=True).dt.tz_localize(
None
)
return df, images
[docs]
def set_matplotlib_defaults(fig_width=91.5, fig_height=89):
import matplotlib
matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42
matplotlib.rcParams["font.sans-serif"] = "Arial"
matplotlib.rcParams["figure.dpi"] = 227
matplotlib.rcParams["figure.figsize"] = (
fig_width / 25.4,
fig_height / 25.4,
) # OR 183 mm for double width
matplotlib.rcParams["font.size"] = 7
matplotlib.rcParams["axes.spines.top"] = False
matplotlib.rcParams["axes.spines.right"] = False
matplotlib.rcParams["savefig.pad_inches"] = 0
matplotlib.rcParams["figure.subplot.bottom"] = 0.075
matplotlib.rcParams["figure.subplot.hspace"] = 0.4
matplotlib.rcParams["figure.subplot.left"] = 0.075
matplotlib.rcParams["figure.subplot.right"] = 0.97
matplotlib.rcParams["figure.subplot.top"] = 0.925
matplotlib.rcParams["figure.subplot.wspace"] = 0.5
matplotlib.rcParams["figure.titlesize"] = "medium"
matplotlib.rcParams["axes.titlesize"] = "small"
matplotlib.rcParams["lines.markersize"] = 3
matplotlib.rcParams["legend.frameon"] = False