Source code for doespy.etl.steps.colcross.subplots.box

from doespy.etl.steps.colcross.base import BasePlotConfig, BaseSubplotConfig
from doespy.etl.steps.colcross.components import (
    ColsForEach,
    Metric,
)
from doespy.etl.steps.colcross.subplots.bar import calc_positions
from doespy.etl.steps.colcross.subplots.box_hooks import BoxHooks
from matplotlib import pyplot as plt
import pandas as pd
from typing import Dict, List, Optional, Tuple, Union

from doespy.design.etl_design import MyETLBaseModel
from dataclasses import dataclass

from pydantic import Field

import gossip


[docs]class GroupedBoxplotChart(MyETLBaseModel): group_foreach: ColsForEach = Field(default_factory=ColsForEach.empty) """Generate an indiviudal group (of boxes) for each unique combination of values found within the given columns. If missing, then only a single group is formed (i.e., no groups). .. code-block:: yaml :caption: Example group_foreach: cols: [col1, col2] # create a group for each unique combination of values in col1 and col2 jp_except: "(col1 == 'A') && (col2 == 'B')" # optional: exclude specific combinations """ box_foreach: ColsForEach """Within a group (of boxes), generate a box for each unique combination of values found within the given columns. .. code-block:: yaml :caption: Example box_foreach: cols: [col3, col4] # create a box for each unique combination of values in col3 and col4 # optional: exclude specific combinations (can also use cols from group_foreach, fig_foreach, etc.) jp_except: "(col1 == 'A') && (col3 == 'B')" """ part_foreach: ColsForEach = Field(default_factory=ColsForEach.empty) """Create multiple boxes in the same x-axis position for each unique combination of values found within the given columns. In addition, the columns of the ``Metric`` (i.e., $metrics$) will also result in multiple parts. If missing, only the ``Metric`` columns will be used. If the ``Metric`` only has a single column and ``part_foreach`` is missing, then a single box (per-x-axis position) is created. .. code-block:: yaml :caption: Example # the columns under metrics also result in parts part_foreach: cols: [col5, col6] # create a box part for each unique combination of values in col5 and col6 # optional: exclude specific combinations (can also use cols from group_foreach, box_foreach, etc.) jp_except: "(col1 == 'A') && (col5 == 'B')" """ box_width: float = 0.6 """The width of each box.""" box_padding: float = 0.0 """The space between boxes within a group.""" group_padding: float = 0.1 """The space between groups.""" @dataclass class Position: """:meta private:""" group_center_pos: float box_left_pos: float box_center_pos: float def get_cols(self): """:meta private:""" return ( self.group_foreach.get_cols() + self.box_foreach.get_cols() + self.part_foreach.get_cols() ) def plot( self, ax: plt.Axes, df1: pd.DataFrame, data_id: Dict[str, str], metric: Metric, subplot_config: BaseSubplotConfig, plot_config: BasePlotConfig, ): """:meta private:""" group_ticks = set() box_ticks = set() gossip.trigger( BoxHooks.SubplotPre, ax=ax, df_subplot=df1, subplot_id=data_id, metric=metric, plot_config=plot_config, subplot_config=subplot_config, chart=self, ) for part_values, position, box_group_id, box_id, box_part_id in self.for_each( df1, metric=metric, subplot_id=data_id ): full_id = {**data_id, **box_group_id, **box_id, **box_part_id} box_config = subplot_config.artist_config(full_id, plot_config) gossip.trigger( BoxHooks.ArtistPre, ax=ax, part_values=part_values, position=position, box_id=full_id, box_config=box_config, metric=metric, plot_config=plot_config, subplot_config=subplot_config, chart=self, ) label = box_config.pop("label", None) ax.boxplot( x=part_values, positions=[position.box_center_pos], widths=[self.box_width], labels=[label], **box_config, ) gossip.trigger( BoxHooks.ArtistPost, ax=ax, part_values=part_values, position=position, box_id=full_id, box_config=box_config, metric=metric, plot_config=plot_config, subplot_config=subplot_config, chart=self, ) # keep track of the labels for each artist if self.group_foreach.label is not None: group_lbl = self.group_foreach.label.apply( {**data_id, **box_group_id}, subplot_config=subplot_config, info="group_label", ) group_ticks.add((position.group_center_pos, group_lbl)) if self.box_foreach.label is not None: box_lbl = self.box_foreach.label.apply( {**data_id, **box_group_id, **box_id}, subplot_config=subplot_config, info="box_label", ) box_ticks.add((position.box_center_pos, box_lbl)) gossip.trigger( BoxHooks.SubplotPost, ax=ax, df_subplot=df1, subplot_id=data_id, metric=metric, plot_config=plot_config, subplot_config=subplot_config, chart=self, group_ticks=group_ticks, box_ticks=box_ticks, ) def for_each(self, df1: pd.DataFrame, metric: Metric, subplot_id: Dict[str, str]): """:meta private:""" assert ( metric.error_cols is None ), f"BoxplotChart: cannot have error cols in {metric=}" box_left_pos, box_center_pos, group_center_pos = calc_positions( group_foreach=self.group_foreach, inner_foreach=self.box_foreach, df1=df1, subplot_id=subplot_id, inner_width=self.box_width, inner_padding=self.box_padding, group_padding=self.group_padding, ) i_box = 0 for i_group, df_box_group, box_group_id in self.group_foreach.for_each( df1, parent_id=subplot_id ): print(f" group_id={dict(**subplot_id, **box_group_id)}") for _i_box, df_box, box_id in self.box_foreach.for_each( df_box_group, parent_id={**subplot_id, **box_group_id} ): for _i_part, df_box_part, part_id in self.part_foreach.for_each( df_box, parent_id={**subplot_id, **box_group_id, **box_id} ): for value_col in metric.value_cols: box_part_id = {**part_id, "value_col": value_col} part_values = df_box_part[value_col] position = GroupedBoxplotChart.Position( group_center_pos=group_center_pos[i_group], box_left_pos=box_left_pos[i_box], box_center_pos=box_center_pos[i_box], ) yield part_values, position, box_group_id, box_id, box_part_id i_box += 1