Source code for lux.vislib.altair.Histogram

#  Copyright 2019-2020 The Lux Authors.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from lux.vislib.altair.AltairChart import AltairChart
import altair as alt
import math

alt.data_transformers.disable_max_rows()


[docs]class Histogram(AltairChart): """ Histogram is a subclass of AltairChart that render as a histograms. All rendering properties for histograms are set here. See Also -------- altair-viz.github.io """
[docs] def __init__(self, vis): super().__init__(vis)
def __repr__(self): return f"Histogram <{str(self.vis)}>"
[docs] def initialize_chart(self): self.tooltip = False measure = self.vis.get_attr_by_data_model("measure", exclude_record=True)[0] msr_attr = self.vis.get_attr_by_channel(measure.channel)[0] msr_attr_abv = str(msr_attr.attribute) if len(msr_attr_abv) > 17: msr_attr_abv = msr_attr_abv[:10] + "..." + msr_attr_abv[-7:] x_min, x_max = self.vis.min_max[msr_attr.attribute] x_range = abs(x_max - x_min) if isinstance(msr_attr.attribute, str): msr_attr.attribute = msr_attr.attribute.replace(".", "") markbar = compute_bin_width(self.data[msr_attr.attribute]) step = abs(self.data[msr_attr.attribute][1] - self.data[msr_attr.attribute][0]) # Default to Sturges' Rule variant when bin too small if markbar < (x_range / 24): bin_count = len(self.vis._source) ** (1 / 2) markbar = 1.38 * x_range / bin_count self.data = AltairChart.sanitize_dataframe(self.data) end_attr_abv = str(msr_attr.attribute) + "_end" self.data[end_attr_abv] = self.data[str(msr_attr.attribute)] + markbar axis_title = f"{msr_attr_abv} (binned)" if msr_attr.attribute == " ": axis_title = "Series (binned)" if measure.channel == "x": chart = ( alt.Chart(self.data) .mark_bar() .encode( x=alt.X( str(msr_attr.attribute), title=axis_title, bin=alt.Bin(binned=True, step=step), type=msr_attr.data_type, axis=alt.Axis(title=axis_title), scale=alt.Scale(domain=[x_min, x_max]), ), x2=end_attr_abv, y=alt.Y("Number of Records", type="quantitative"), ) ) elif measure.channel == "y": chart = ( alt.Chart(self.data) .mark_bar() .encode( x=alt.X("Number of Records", type="quantitative"), y=alt.Y( str(msr_attr.attribute), title=axis_title, bin=alt.Bin(binned=True, step=step), type=msr_attr.data_type, axis=alt.Axis(title=axis_title), ), y2=end_attr_abv, ) ) ##################################### ## Constructing Altair Code String ## ##################################### self.code += "import altair as alt\n" self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n" if measure.channel == "x": self.code += f""" chart = alt.Chart(visData).mark_bar().encode( x=alt.X('{msr_attr.attribute}', title='{axis_title}',bin=alt.Bin(binned=True, step={step}), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True, title='{axis_title}'), scale=alt.Scale(domain=({x_min}, {x_max}))), x2='{end_attr_abv}', y=alt.Y("Number of Records", type="quantitative") ) """ elif measure.channel == "y": self.code += f""" chart = alt.Chart(visData).mark_bar().encode( y=alt.Y('{msr_attr.attribute}', title='{axis_title}', bin=alt.Bin(binned=True, step={step}), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True, title='{axis_title}')), y2='{end_attr_abv}', x=alt.X("Number of Records", type="quantitative") ) """ return chart
[docs]def compute_bin_width(series): """ Helper function that returns optimal bin size via Freedman Diaconis's Rule Source: https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule """ import numpy as np data = np.asarray(series) num_pts = data.size IQR = np.subtract(*np.percentile(data, [75, 25])) size = 2 * IQR * (num_pts ** -1 / 3) return round(size * 3.5, 2)