Source code for lux.action.univariate

#  Copyright 2019-2020 The Lux Authors.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from lux.interestingness.interestingness import interestingness
from lux.vis.VisList import VisList
import lux
from lux.utils import utils


[docs]def univariate(ldf, *args): """ Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ import numpy as np if len(args) == 0: data_type_constraint = "quantitative" else: data_type_constraint = args[0][0] filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if data_type_constraint == "quantitative": possible_attributes = [ c for c in ldf.columns if ldf.data_type_lookup[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.", } # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) if len(ldf) < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.", } elif data_type_constraint == "temporal": intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.", } # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) if len(ldf) < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # vlist = vlist.topK(15) # Basic visualizations should not be capped vlist.sort() recommendation["collection"] = vlist return recommendation