Source code for lux.processor.Parser

#  Copyright 2019-2020 The Lux Authors.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from lux.vis.Clause import Clause
from lux.core.frame import LuxDataFrame
from typing import List, Union


[docs]class Parser: """ The parser takes in the user's input specifications (with string `description` fields), then generates the Lux internal specification through lux.Clause. """
[docs] @staticmethod def parse(intent: List[Union[Clause, str]]) -> List[Clause]: """ Given the string description from a list of input Clauses (intent), assign the appropriate clause.attribute, clause.filter_op, and clause.value. Parameters ---------- intent : List[Clause] Underspecified list of lux.Clause objects. Returns ------- List[Clause] Parsed list of lux.Clause objects. """ if type(intent) != list: raise TypeError( "Input intent must be a list consisting of string descriptions or lux.Clause objects." "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" ) import re new_context = [] # checks for and converts users' string inputs into lux specifications for clause in intent: valid_values = [] if isinstance(clause, list): valid_values = [] for v in clause: # and v in list(ldf.columns): #TODO: Move validation check to Validator if type(v) is str: valid_values.append(v) temp_spec = Clause(attribute=valid_values) new_context.append(temp_spec) elif isinstance(clause, Clause): new_context.append(clause) else: if isinstance(clause, str): # case where user specifies a filter if "=" in clause: eqInd = clause.index("=") var = clause[0:eqInd] if "|" in clause: values = clause[eqInd + 1 :].split("|") for v in values: # if v in ldf.unique_values[var]: #TODO: Move validation check to Validator valid_values.append(v) else: valid_values = clause[eqInd + 1 :] # if var in list(ldf.columns): #TODO: Move validation check to Validator temp_spec = Clause(attribute=var, filter_op="=", value=valid_values) new_context.append(temp_spec) # case where user specifies a variable else: if "|" in clause: values = clause.split("|") for v in values: # if v in list(ldf.columns): #TODO: Move validation check to Validator valid_values.append(v) else: valid_values = clause temp_spec = Clause(attribute=valid_values) new_context.append(temp_spec) else: temp_spec = Clause(attribute=clause) new_context.append(temp_spec) intent = new_context for clause in intent: if clause.description: # TODO: Move validation check to Validator # if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes # clause.description contain ">","<". or "=" if type(clause.description) == str and any( ext in [">", "<", "=", "!="] for ext in clause.description ): # then parse it and assign to clause.attribute, clause.filter_op, clause.values clause.filter_op = re.findall(r"/.*/|>|=|<|>=|<=|!=", clause.description)[0] split_description = clause.description.split(clause.filter_op) clause.attribute = split_description[0] clause.value = split_description[1] if re.match(r"^-?\d+(?:\.\d+)?$", clause.value): clause.value = float(clause.value) elif type(clause.description) == str: clause.attribute = clause.description elif type(clause.description) == list: clause.attribute = clause.description else: # then it is probably a value clause.value = clause.description return intent