#!/usr/bin/env python3
"""Utilities to visualize data for use in pyllelic."""
from typing import Dict, List, Optional, Union
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
import seaborn as sns
[docs]def _create_histogram(
data: pd.DataFrame, cell_line: str, position: str, backend: str
) -> Union[go.Figure, plt.Figure]:
"""Generate a graph figure showing fractional methylation in
a given cell line at a given site.
Args:
data (pd.DataFrame): dataframe of individual data
cell_line (str): name of cell line
position (str): genomic position
backend (str): which plotting backend to use
Returns:
Union[go.Figure, plt.Figure]: plotly or matplotlib figure object
Raises:
ValueError: invalid plotting backend provided
"""
if backend == "plotly":
fig: go.Figure = go.Figure()
fig.add_trace(
go.Histogram(
x=data.loc[cell_line, position],
xbins=dict(
start=-0.1,
end=1.1,
size=0.2,
), # offset bins to center displayed bars
)
)
fig.update_layout(
title_text=f"Methylation at pos: {position} for cell line: {cell_line}",
xaxis_title_text="Fraction of Sites Methylated in Read",
yaxis_title_text="Read Count",
bargap=0.2,
template="seaborn",
)
fig.update_xaxes(range=[-0.1, 1.1])
return fig
if backend == "matplotlib":
sns.set_theme(style="white", rc={"figure.figsize": (6, 4)})
ax = sns.histplot(
data.loc[cell_line, position], color="black", discrete=True, shrink=0.2
)
ax.set(xlabel="Methylation", ylabel="Count")
return ax
raise ValueError("Invalid plotting backend")
[docs]def _create_heatmap(
df: pd.DataFrame,
min_values: int,
width: int,
height: int,
title_type: str,
backend: str,
) -> Union[go.Figure, plt.Figure]:
"""Generate a graph figure showing heatmap of mean methylation across
cell lines.
Args:
df (pd.DataFrame): dataframe of mean methylation
min_values (int): minimum number of points data must exist at a position
width (int): figure width
height (int): figure height
title_type (str): type of figure being plotted
backend (str): which plotting backend to use
Returns:
Union[go.Figure, plt.Figure]: plotly or matplotlib figure object
Raises:
ValueError: invalid plotting backend
"""
values_needed = len(df.index) - min_values
df = df.loc[:, (df.isnull().sum(axis=0) <= values_needed)]
if backend == "plotly":
fig: go.Figure = go.Figure()
fig.add_trace(
go.Heatmap(
z=df,
x=df.columns,
y=df.index,
hoverongaps=False,
),
)
fig.update_traces(
dict(showscale=False, coloraxis=None, colorscale="RdBu_r"),
selector={"type": "heatmap"},
)
fig.update_layout(
title_text=f"{title_type} Methylation Heatmap",
xaxis_title_text="Position",
yaxis_title_text="Cell Line",
# aspect="equal",
template="seaborn",
autosize=False,
width=width,
height=height,
)
return fig
if backend == "matplotlib":
mplwidth: float = width / 66.667
mplheight: float = height / 66.667
df = df.apply(pd.to_numeric)
sns.set_theme(style="white", rc={"figure.figsize": (mplwidth, mplheight)})
ax = sns.heatmap(df, cmap="vlag", cbar=False)
ax.set(xlabel="Position", ylabel="Cell Line")
return ax
raise ValueError("Invalid plotting backend")
[docs]def _create_methylation_diffs_bar_graph(
df: pd.DataFrame, backend: str
) -> Union[go.Figure, plt.Figure]:
"""Generate a graph figure showing bar graph of significant methylation across
cell lines.
Args:
df (pd.DataFrame): dataframe of significant methylation positions
backend (str): which plotting backend to use
Returns:
Union[go.Figure, plt.Figure]: plotly or matplotlib figure object
Raises:
ValueError: invalid plotting backend
"""
data = df.pivot_table(index="position", columns="cellLine", values="ad_stat")
data = data.dropna(axis=1, how="all").count(axis=1).to_frame()
if backend == "plotly":
fig = go.Figure()
fig.add_trace(go.Bar(x=data.index, y=data.values.flatten()))
fig.update_layout(
xaxis_type="linear",
showlegend=False,
title="Significant Methylation Differences",
template="seaborn",
)
fig.update_xaxes(
tickformat="r",
tickangle=45,
nticks=40,
title="Position",
range=[int(data.index.min()), int(data.index.max())],
)
fig.update_yaxes(title="# of significant differences")
fig.update_traces(width=10)
return fig
if backend == "matplotlib":
data = data.set_index(pd.to_numeric(data.index, errors="coerce"))
ax = data.plot(
kind="bar",
legend=False,
figsize=(12, 8),
)
ax.set(xlabel="Position", ylabel="# of significant differences")
return ax
raise ValueError("Invalid plotting backend")
[docs]def _make_stacked_fig(df: pd.DataFrame, backend: str) -> Union[go.Figure, plt.Figure]:
"""Generate a graph figure showing methylated and unmethylated reads across
cell lines.
Args:
df (pd.DataFrame): dataframe of individual read data
backend (str): plotting backend to use
Returns:
Union[go.Figure, plt.Figure]: plotly or matplotlib figure
Raises:
ValueError: invalid plotting backend
"""
if backend == "plotly":
return _make_stacked_plotly_fig(df)
if backend == "matplotlib":
return _make_stacked_mpl_fig(df)
raise ValueError("Invalid plotting backend")
[docs]def _make_binary(data: Optional[List[int]]) -> List[int]:
if isinstance(data, list):
new = [data.count(0), data.count(1)]
else:
new = [0, 0]
return new
[docs]def _make_methyl_df(df: pd.DataFrame, row: str) -> pd.DataFrame:
new_df = pd.DataFrame(
{each[0]: each[1].values[0] for each in df.loc[row].to_frame().iterrows()}
)
new_df = new_df.rename(index={0: "unmethylated", 1: "methylated"})
return new_df
[docs]def _make_stacked_plotly_fig(df: pd.DataFrame) -> go.Figure:
"""Generate a graph figure showing methylated and unmethylated reads across
cell lines.
Args:
df (pd.DataFrame): dataframe of individual read data
Returns:
go.Figure: plotly figure
"""
def make_it_all(df: pd.DataFrame) -> Dict[str, go.Figure]:
def make_methyl_bar(df: pd.DataFrame) -> go.Figure:
fig = px.bar(
df.T,
height=20,
width=800,
color_discrete_sequence=["white", "black"],
labels={"value": "reads", "variable": "status"},
)
fig.update_layout(showlegend=False, margin=dict(l=0, r=0, t=0, b=0))
fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)
return fig
df2 = df.applymap(_make_binary)
fig_dict = {}
for each in df2.index:
methyl_df = _make_methyl_df(df2, each)
new_fig = make_methyl_bar(methyl_df)
fig_dict[each] = new_fig
return fig_dict
fig_dict = make_it_all(df)
this_figure = sp.make_subplots(
rows=len(fig_dict),
cols=2,
column_widths=[0.1, 0.9],
horizontal_spacing=0,
vertical_spacing=0.005,
)
for i, (key, each) in enumerate(fig_dict.items()):
this_figure.append_trace(each["data"][0], row=i + 1, col=2)
this_figure.append_trace(each["data"][1], row=i + 1, col=2)
this_figure.add_annotation(
text=key,
xref="x",
yref="y",
x=1,
y=1,
showarrow=False,
font=dict(
size=12,
),
row=i + 1,
col=1,
)
this_figure.update_layout(
showlegend=False,
margin=dict(l=0, r=0, t=40, b=0),
barmode="stack",
width=800,
height=(len(fig_dict) * 40) + 40,
template="ggplot2",
paper_bgcolor="rgba(132,132,132,1)",
title_text="Methylated and Unmethylated Reads by Cell Line",
title={"font": {"color": "white"}},
)
this_figure.update_xaxes(visible=False)
this_figure.update_yaxes(visible=False)
return this_figure
[docs]def _make_stacked_mpl_fig(df: pd.DataFrame) -> plt.Figure:
"""Generate a graph figure showing methylated and unmethylated reads across
cell lines.
Args:
df (pd.DataFrame): dataframe of individual read data
Returns:
plt.Figure: matplotlib figure
"""
df2 = df.applymap(_make_binary)
df_dict = {}
for each in df2.index:
methyl_df = _make_methyl_df(df2, each)
df_dict[each] = methyl_df
new_df = pd.DataFrame()
for k, v in df_dict.items():
int_df = v.T.reset_index()
int_df["cell"] = k
new_df = pd.concat([new_df, int_df])
sns.set_theme(style="dark")
fig, ax = plt.subplots(
len(new_df["cell"].unique()),
1,
sharex="all",
sharey="all",
squeeze=False,
)
for i, each in enumerate(new_df["cell"].unique()):
tmp_df = new_df[new_df["cell"] == each].loc[
:, ["index", "unmethylated", "methylated"]
]
ax[i] = tmp_df.plot(
kind="bar",
legend=False,
width=1,
stacked=True,
color=["white", "black"],
xticks=[],
title="",
yticks=[],
xlabel="",
ylabel="",
figsize=(12, 0.5),
ax=ax[i],
)
ax[i].set_ylabel(each, rotation=0)
ax[i].yaxis.set_label_coords(-0.05, 0.3)
fig.set_size_inches(12, 0.6 * len(new_df["cell"].unique()))
plt.subplots_adjust(wspace=0, hspace=0)
fig.suptitle("Methylation of reads per cell line", y=1.02)
plt.tight_layout(pad=0)
return fig