Source code for lux.action.filter

#  Copyright 2019-2020 The Lux Authors.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import lux
from lux.interestingness.interestingness import interestingness
from lux.vis.Vis import Vis
from lux.vis.VisList import VisList
from lux.processor.Compiler import Compiler
from lux.utils import utils
from lux.utils.utils import get_filter_specs


[docs]def add_filter(ldf):
    """
    Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Filter action.
    """
    filters = utils.get_filter_specs(ldf._intent)
    filter_values = []
    output = []
    # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable
    column_spec = utils.get_attrs_specs(ldf._intent)
    column_spec_attr = list(map(lambda x: x.attribute, column_spec))
    if len(filters) == 1:
        # get unique values for all categorical values specified and creates corresponding filters
        fltr = filters[0]

        if ldf.data_type[fltr.attribute] == "nominal":
            recommendation = {
                "action": "Filter",
                "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value.",
                "long_description": f"Swap out the filter value for {fltr.attribute} to other possible values, while keeping all else the same. Visualizations are ranked based on interestingness",
            }
            unique_values = ldf.unique_values[fltr.attribute]
            filter_values.append(fltr.value)
            # creates vis with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Clause(attribute=fltr.attribute, value=val)
                    new_spec.append(new_filter)
                    temp_vis = Vis(new_spec)
                    output.append(temp_vis)
        elif ldf.data_type[fltr.attribute] == "quantitative":
            recommendation = {
                "action": "Filter",
                "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.",
                "long_description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.",
            }

            def get_complementary_ops(fltr_op):
                if fltr_op == ">":
                    return "<="
                elif fltr_op == "<":
                    return ">="
                elif fltr_op == ">=":
                    return "<"
                elif fltr_op == "<=":
                    return ">"
                # TODO: need to support case where fltr_op is "=" --> auto-binned ranges

            # Create vis with complementary filter operations
            new_spec = column_spec.copy()
            new_filter = lux.Clause(
                attribute=fltr.attribute,
                filter_op=get_complementary_ops(fltr.filter_op),
                value=fltr.value,
            )
            new_spec.append(new_filter)
            temp_vis = Vis(new_spec, score=1)
            output.append(temp_vis)
    # if no existing filters, create filters using unique values from all categorical variables in the dataset
    else:
        intended_attrs = ", ".join(
            [
                str(clause.attribute)
                for clause in ldf._intent
                if clause.value == "" and clause.attribute != "Record"
            ]
        )
        recommendation = {
            "action": "Filter",
            "description": f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.",
            "long_description": f"Adding any filter while keeping the attributes on the x and y axes fixed. Visualizations are ranked based on interestingness",
        }
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if 1 < ldf.cardinality[col] < 30 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for val in unique_values:
                new_spec = column_spec.copy()
                new_filter = lux.Clause(attribute=cat, filter_op="=", value=val)
                new_spec.append(new_filter)
                temp_vis = Vis(new_spec)
                output.append(temp_vis)
    if (
        ldf.current_vis is not None
        and len(ldf.current_vis) == 1
        and ldf.current_vis[0].mark == "line"
        and len(get_filter_specs(ldf.intent)) > 0
    ):
        recommendation = {
            "action": "Similarity",
            "description": "Show other charts that are visually similar to the Current vis.",
            "long_description": "Show other charts that are visually similar to the Current vis.",
        }
        last = get_filter_specs(ldf.intent)[-1]
        output = ldf.intent.copy()[0:-1]
        # array of possible values for attribute
        arr = ldf[last.attribute].unique().tolist()
        output.append(lux.Clause(last.attribute, last.attribute, arr))
    vlist = lux.vis.VisList.VisList(output, ldf)
    vlist_copy = lux.vis.VisList.VisList(output, ldf)
    for i in range(len(vlist_copy)):
        vlist[i].score = interestingness(vlist_copy[i], ldf)
    vlist.sort()
    vlist = vlist.showK()
    if recommendation["action"] == "Similarity":
        recommendation["collection"] = vlist[1:]
    else:
        recommendation["collection"] = vlist
    return recommendation