#!/usr/bin/env python
# -*- coding: utf-8 -*-
# models.dvh.py
"""
Class to retrieve DVH data from SQL, calculate parameters dependent on DVHs, and extract plotting data
"""
# Copyright (c) 2016-2019 Dan Cutright
# This file is part of DVH Analytics, released under a BSD license.
# See the file LICENSE included with this distribution, also
# available at https://github.com/cutright/DVH-Analytics
from copy import deepcopy
from dateutil.parser import parse as date_parser
import numpy as np
from dvha.db.sql_connector import DVH_SQL
from dvha.db.sql_to_python import QuerySQL
from dvha.options import Options
MAX_DOSE_VOLUME = Options().MAX_DOSE_VOLUME
# This class retrieves DVH data from the SQL database and calculates statistical DVHs (min, max, quartiles)
# It also provides some inspection tools of the retrieved data
[docs]class DVH:
"""This class will retrieve DVHs and other data in the DVH SQL table meeting the given constraints,
it will also parse the DVH_string into python lists and retrieve the associated Rx dose
Parameters
----------
uid : list
study_instance_uid's to be included in results
dvh_condition : str
a string in SQL syntax applied to a DVH Table query
dvh_bin_width : int
retrieve every nth value from dvh_string in SQL
group : int
either 1 or 2
"""
def __init__(self, uid=None, dvh_condition=None, dvh_bin_width=5, group=1):
self.dvh_bin_width = dvh_bin_width
constraints_str = ""
if uid:
constraints_str = "study_instance_uid in ('%s')" % "', '".join(uid)
if dvh_condition:
constraints_str = "(%s) and %s" % (
dvh_condition,
constraints_str,
)
elif dvh_condition:
constraints_str = dvh_condition
# Get DVH data from SQL and set as attributes
dvh_data = QuerySQL("DVHs", constraints_str, group=group)
if dvh_data.mrn:
ignored_keys = {
"cnx",
"cursor",
"table_name",
"constraints_str",
"condition_str",
}
self.keys = []
for key, value in dvh_data.__dict__.items():
if not key.startswith("__") and key not in ignored_keys:
if key == "dvh_string":
dvh_split = [
dvh.split(",")[:: self.dvh_bin_width]
for dvh in value
]
setattr(self, key, value)
if "_string" not in key:
self.keys.append(key)
# Move mrn to beginning of self.keys
if "mrn" in self.keys:
self.keys.pop(self.keys.index("mrn"))
self.keys.insert(0, "mrn")
# uid passed into DVH init is a unique list for querying and it is not in order of query results
self.uid = self.study_instance_uid
# Add these properties to dvh_data since they aren't in the DVHs SQL table
self.count = len(self.mrn)
self.study_count = len(set(self.uid))
self.rx_dose = self.get_plan_values("rx_dose")
self.sim_study_date = self.get_plan_values("sim_study_date")
self.keys.append("rx_dose")
self.endpoints = {"data": None, "defs": None}
self.eud = None
self.ntcp_or_tcp = None
self.bin_count = max([len(dvh) for dvh in dvh_split])
self.dvh = np.zeros([self.bin_count, self.count])
# Get needed values not in DVHs table
for i in range(self.count):
# Process dvh_string to numpy array, and pad with zeros at the end
# so that all dvhs are the same length
current_dvh = np.array(dvh_split[i], dtype=np.float)
current_dvh_max = np.max(current_dvh)
if current_dvh_max > 0:
current_dvh = np.divide(current_dvh, current_dvh_max)
zero_fill = np.zeros(self.bin_count - len(current_dvh))
self.dvh[:, i] = np.concatenate((current_dvh, zero_fill))
self.dth = []
for i in range(self.count):
# Process dth_string to numpy array
try:
self.dth.append(
np.array(self.dth_string[i].split(","), dtype=np.float)
)
except Exception:
self.dth.append(np.array([0]))
# Store these now so they can be saved in DVH object without needing to query later
with DVH_SQL() as cnx:
self.physician_count = len(
cnx.get_unique_values(
"Plans",
"physician",
"study_instance_uid in ('%s')" % "','".join(self.uid),
)
)
self.total_fxs = self.get_plan_values("fxs")
self.fx_dose = self.get_rx_values("fx_dose")
else:
self.count = 0
[docs] def get_plan_values(self, plan_column):
"""Get values from the Plans table and store in order matching mrn / study_instance_uid
Parameters
----------
plan_column : str
name of the SQL column to be returned
Returns
-------
list
values from the Plans table for the DVHs stored in this class
"""
with DVH_SQL() as cnx:
condition = "study_instance_uid in ('%s')" % "','".join(
self.study_instance_uid
)
data = cnx.query(
"Plans", "study_instance_uid, %s" % plan_column, condition
)
force_date = cnx.is_sqlite_column_datetime(
"Plans", plan_column
) # returns False for pgsql
uids = [row[0] for row in data]
values = [row[1] for row in data]
if force_date: # sqlite does not have date or time like variables
for i, value in enumerate(values):
try:
if type(value) is int:
values[i] = str(date_parser(str(value)))
else:
values[i] = str(date_parser(value))
except Exception:
values[i] = "None"
return [values[uids.index(uid)] for uid in self.study_instance_uid]
[docs] def get_rx_values(self, rx_column):
"""Get values from the Rxs table and store in order matching mrn / study_instance_uid
Parameters
----------
rx_column : str
name of the SQL column to be returned
Returns
-------
list
values from the Rxs table for the DVHs stored in this class
"""
with DVH_SQL() as cnx:
condition = "study_instance_uid in ('%s')" % "','".join(
self.study_instance_uid
)
data = cnx.query(
"Rxs", "study_instance_uid, %s" % rx_column, condition
)
uids = [row[0] for row in data]
values = [row[1] for row in data]
final_values = {}
for i, uid in enumerate(uids):
if uid in list(values):
final_values[uid] = "%s,%s" % (final_values[uid], values[i])
else:
final_values[uid] = str(values[i])
return [final_values[uid] for uid in self.study_instance_uid]
@property
def x_axis(self):
"""Get the x-axis for plotting
Returns
-------
list
Dose axis
"""
bins, width = self.bin_count, self.dvh_bin_width
return np.add(np.arange(bins) * width, width / 2.0).tolist()
@property
def x_data(self):
"""Get the x-axes for plotting (one row per DVH)
Returns
-------
list
x data for plotting
"""
return [self.x_axis] * self.count
@property
def y_data(self):
"""Get y-values of the DVHs
Returns
-------
list
all DVHs in order (i.e., same as mrn, study_instance_uid)
"""
return [self.dvh[:, i].tolist() for i in range(self.count)]
[docs] def get_cds_data(self, keys=None):
"""Get data from this class in a format compatible with bokeh's ColumnDataSource.data
Parameters
----------
keys : list, optional
Specify a list of DVH properties to be included
Returns
-------
dict
data from this class
"""
if not keys:
keys = self.keys
# TODO: Is deepcopy needed here?
return deepcopy({key: getattr(self, key) for key in keys})
[docs] def get_percentile_dvh(self, percentile):
"""Get a population DVH based on percentile
Parameters
----------
percentile : float
the percentile to calculate for each dose-bin. Passed into
np.percentile()
Returns
-------
np.ndarray
a single DVH such that each bin is the given percentile of each
bin over the whole sample
"""
return np.percentile(self.dvh, percentile, 1)
[docs] def get_dose_to_volume(
self,
volume,
volume_scale="absolute",
dose_scale="absolute",
compliment=False,
):
"""Get the minimum dose to a specified volume
Parameters
----------
volume : int, float
the specified volume in cm^3
volume_scale : str, optional
either 'relative' or 'absolute'
dose_scale : str, optional
either 'relative' or 'absolute'
compliment : bool, optional
return the max dose - value
Returns
-------
list
the dose in Gy to the specified volume
"""
doses = np.zeros(self.count)
for x in range(self.count):
dvh = np.zeros(len(self.dvh))
for y in range(len(self.dvh)):
dvh[y] = self.dvh[y][x]
if volume_scale == "relative":
doses[x] = dose_to_volume(
dvh, volume, dvh_bin_width=self.dvh_bin_width
)
else:
if self.volume[x]:
doses[x] = dose_to_volume(
dvh,
volume / self.volume[x],
dvh_bin_width=self.dvh_bin_width,
)
else:
doses[x] = 0
if dose_scale == "relative":
if self.rx_dose[0]:
doses = np.divide(doses * 100, self.rx_dose[0 : self.count])
else:
self.rx_dose[
0
] = 1 # if review dvh isn't defined, the following line would crash
doses = np.divide(doses * 100, self.rx_dose[0 : self.count])
self.rx_dose[0] = 0
doses[0] = 0
if compliment:
if dose_scale == "absolute":
doses = self.max_dose[0 : self.count] - doses
else:
doses = 100.0 * np.divide(self.max_dose[0 : self.count], self.rx_dose) - doses
return doses.tolist()
[docs] def get_volume_of_dose(
self,
dose,
dose_scale="absolute",
volume_scale="absolute",
compliment=False,
):
"""Get the volume of an isodose contour defined by ``dose``
Parameters
----------
dose : int, float
input dose use to calculate a volume of dose for entire sample
dose_scale : str, optional
either 'absolute' or 'relative'
volume_scale : str, optional
either 'absolute' or 'relative'
compliment : bool, optional
return the ROI volume - value
Returns
-------
list
a list of V_dose
"""
volumes = np.zeros(self.count)
for x in range(self.count):
dvh = np.zeros(len(self.dvh))
for y in range(len(self.dvh)):
dvh[y] = self.dvh[y][x]
if dose_scale == "relative":
if isinstance(self.rx_dose[x], str):
volumes[x] = 0
else:
volumes[x] = volume_of_dose(
dvh,
dose * self.rx_dose[x],
dvh_bin_width=self.dvh_bin_width,
)
else:
volumes[x] = volume_of_dose(
dvh, dose, dvh_bin_width=self.dvh_bin_width
)
if volume_scale == "absolute":
volumes = np.multiply(volumes, self.volume[0 : self.count])
else:
volumes = np.multiply(volumes, 100.0)
if compliment:
if volume_scale == "absolute":
volumes = self.volume[0 : self.count] - volumes
else:
volumes = 100.0 * np.ones(self.count) - volumes
return volumes.tolist()
[docs] def get_resampled_x_axis(self, resampled_bin_count=5000):
"""Get the x_axis of a resampled dvh
Parameters
----------
resampled_bin_count : int, optional
Specify the number of bins used
Returns
-------
np.ndarray
the x axis of a resampled dvh
"""
x_axis, dvhs = self.resample_dvh(
resampled_bin_count=resampled_bin_count
)
return x_axis
[docs] def get_stat_dvh(
self, stat_type="mean", dose_scale="absolute", volume_scale="relative"
):
"""Get population DVH by statistical function
Parameters
----------
stat_type : str
either 'min', 'mean', 'median', 'max', or 'std'
dose_scale : str, optional
either 'absolute' or 'relative'
volume_scale : str, optional
either 'absolute' or 'relative'
Returns
-------
np.ndarray
a single dvh where each bin is the stat_type of each bin for the
entire sample
"""
if dose_scale == "relative":
x_axis, dvhs = self.resample_dvh()
else:
dvhs = self.dvh
if volume_scale == "absolute":
dvhs = self.dvhs_to_abs_vol(dvhs)
stat_function = {
"min": np.min,
"mean": np.mean,
"median": np.median,
"max": np.max,
"std": np.std,
}
dvh = stat_function[stat_type](dvhs, 1)
return dvh
[docs] def get_standard_stat_dvh(
self, dose_scale="absolute", volume_scale="relative"
):
"""Get a min, mean, median, max, and quartiles DVHs
Parameters
----------
dose_scale : str, optional
either 'absolute' or 'relative'
volume_scale : str, optional
either 'absolute' or 'relative'
Returns
-------
dict
a standard set of statistical dvhs ('min', 'q1', 'mean', 'median',
'q1', and 'max')
"""
if dose_scale == "relative":
x_axis, dvhs = self.resample_dvh()
else:
dvhs = self.dvh
if volume_scale == "absolute":
dvhs = self.dvhs_to_abs_vol(dvhs)
standard_stat_dvh = {
"min": np.min(dvhs, 1),
"q1": np.percentile(dvhs, 25, 1),
"mean": np.mean(dvhs, 1),
"median": np.median(dvhs, 1),
"q3": np.percentile(dvhs, 75, 1),
"max": np.max(dvhs, 1),
}
return standard_stat_dvh
[docs] def dvhs_to_abs_vol(self, dvhs):
"""Get DVHs in absolute volume
Parameters
----------
dvhs : np.ndarray
relative DVHs (dvh[bin, roi_index])
Returns
-------
np.ndarray
absolute DVHs
"""
return np.multiply(dvhs, self.volume)
[docs] def resample_dvh(self, resampled_bin_count=5000):
"""Resample DVHs with a new bin count
Parameters
----------
resampled_bin_count : int
Number of bins in reampled DVH
Returns
-------
tuple
x-axis, y-axis of resampled DVHs
"""
min_rx_dose = np.min(self.rx_dose) * 100.0
new_bin_count = int(
np.divide(float(self.bin_count), min_rx_dose) * resampled_bin_count
)
x1 = np.linspace(0, self.bin_count, self.bin_count)
y2 = np.zeros([new_bin_count, self.count])
for i in range(self.count):
x2 = np.multiply(
np.linspace(0, new_bin_count, new_bin_count),
self.rx_dose[i] * 100.0 / resampled_bin_count,
)
y2[:, i] = np.interp(x2, x1, self.dvh[:, i])
x2 = np.divide(
np.linspace(0, new_bin_count - 1, new_bin_count) + 0.5,
resampled_bin_count,
)
return x2, y2
[docs] def get_summary(self):
"""Get a summary of the data in this class. Used in bottom left of
main GUI
Returns
-------
str
Summary data
"""
summary = [
"Study count: %s" % self.study_count,
"DVH count: %s" % self.count,
"Institutional ROI count: %s" % len(set(self.institutional_roi)),
"Physician ROI count: %s" % len(set(self.physician_roi)),
"ROI type count: %s" % len(set(self.roi_type)),
"Physician count: %s" % self.physician_count,
"\nMin, Mean, Max",
"Rx dose (Gy): %0.2f, %0.2f, %0.2f"
% (
min(self.rx_dose),
sum(self.rx_dose) / self.count,
max(self.rx_dose),
),
"Volume (cc): %0.2f, %0.2f, %0.2f"
% (
min(self.volume),
sum(self.volume) / self.count,
max(self.volume),
),
"Min dose (Gy): %0.2f, %0.2f, %0.2f"
% (
min(self.min_dose),
sum(self.min_dose) / self.count,
max(self.min_dose),
),
"Mean dose (Gy): %0.2f, %0.2f, %0.2f"
% (
min(self.mean_dose),
sum(self.mean_dose) / self.count,
max(self.mean_dose),
),
"Max dose (Gy): %0.2f, %0.2f, %0.2f"
% (
min(self.max_dose),
sum(self.max_dose) / self.count,
max(self.max_dose),
),
]
return "\n".join(summary)
@property
def has_data(self):
"""Check that this class has queried data
Returns
-------
bool
True if there is data returned from SQL
"""
return bool(len(self.mrn))
# Returns the isodose level outlining the given volume
[docs]def dose_to_volume(dvh, rel_volume, dvh_bin_width=1):
"""Calculate the minimum dose to a relative volume for one DVH
Parameters
----------
dvh : np.ndarray
a single dvh
rel_volume : float
fractional volume
dvh_bin_width : int, optional
dose bin width of dvh
Returns
-------
float
minimum dose in Gy of specified volume
"""
# Return the maximum dose instead of extrapolating
if rel_volume < dvh[-1]:
return len(dvh) * dvh_bin_width * 0.01
dose_high = np.argmax(dvh < rel_volume)
y = rel_volume
x_range = [dose_high - 1, dose_high]
y_range = [dvh[dose_high - 1], dvh[dose_high]]
dose = np.interp(y, y_range, x_range) * dvh_bin_width * 0.01
return dose
[docs]def volume_of_dose(dvh, dose, dvh_bin_width=1):
"""Calculate the relative volume of an isodose line definted by ``dose``
for one DVH
Parameters
----------
dvh : np.ndarray
a single dvh
dose : float
dose in cGy
dvh_bin_width : int, optional
dose bin width of dvh
Returns
-------
float
volume in cm^3 of roi receiving at least the specified dose
"""
x = [
int(np.floor(dose / dvh_bin_width * 100)),
int(np.ceil(dose / dvh_bin_width * 100)),
]
if len(dvh) < x[1]:
return dvh[-1]
y = [dvh[x[0]], dvh[x[1]]]
roi_volume = np.interp(float(dose) / dvh_bin_width, x, y)
return roi_volume
[docs]def calc_eud(dvh, a, dvh_bin_width=1):
"""EUD = sum[ v(i) * D(i)^a ] ^ [1/a]
Parameters
----------
dvh : np.ndarray
a single DVH as a list of numpy 1D array with 1cGy bins
a : float
standard a-value for EUD calculations, organ and dose fractionation specific
dvh_bin_width : int, optional
dose bin width of dvh
Returns
-------
float
equivalent uniform dose
"""
v = -np.gradient(dvh)
dose_bins = np.linspace(0, np.size(dvh), np.size(dvh))
dose_bins = np.round(dose_bins, 3) * dvh_bin_width
bin_centers = dose_bins - (dvh_bin_width / 2.0)
eud = np.power(
np.sum(np.multiply(v, np.power(bin_centers, a))), 1.0 / float(a)
)
eud = np.round(eud, 2) * 0.01
return eud
[docs]def calc_tcp(gamma, td_tcd, eud):
"""Tumor Control Probability / Normal Tissue Complication Probability
1.0 / (1.0 + (``td_tcd`` / ``eud``) ^ (4.0 * ``gamma``))
Parameters
----------
gamma : float
Gamma_50
td_tcd : float
Either TD_50 or TCD_50
eud : float
equivalent uniform dose
Returns
-------
float
TCP or NTCP
"""
return 1.0 / (1.0 + (td_tcd / eud) ** (4.0 * gamma))