Source code for lux.processor.Parser

#  Copyright 2019-2020 The Lux Authors.
# 
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from lux.vis.Clause import Clause
from lux.core.frame import LuxDataFrame
from typing import List, Union
[docs]class Parser: """ The parser takes in the user's input specifications (with string `description` fields), then generates the Lux internal specification through lux.Clause. """
[docs] @staticmethod def parse(intent: List[Union[Clause,str]]) -> List[Clause]: """ Given the string description from a list of input Clauses (intent), assign the appropriate clause.attribute, clause.filter_op, and clause.value. Parameters ---------- intent : List[Clause] Underspecified list of lux.Clause objects. Returns ------- List[Clause] Parsed list of lux.Clause objects. """ if type(intent)!=list: raise TypeError("Input intent must be a list consisting of string descriptions or lux.Clause objects." "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" ) import re # intent = ldf.get_context() new_context = [] #checks for and converts users' string inputs into lux specifications for clause in intent: valid_values = [] if isinstance(clause,list): valid_values = [] for v in clause: if type(v) is str: # and v in list(ldf.columns): #TODO: Move validation check to Validator valid_values.append(v) temp_spec = Clause(attribute = valid_values) new_context.append(temp_spec) elif isinstance(clause,str): #case where user specifies a filter if "=" in clause: eqInd = clause.index("=") var = clause[0:eqInd] if "|" in clause: values = clause[eqInd+1:].split("|") for v in values: # if v in ldf.unique_values[var]: #TODO: Move validation check to Validator valid_values.append(v) else: valid_values = clause[eqInd+1:] # if var in list(ldf.columns): #TODO: Move validation check to Validator temp_spec = Clause(attribute = var, filter_op = "=", value = valid_values) new_context.append(temp_spec) #case where user specifies a variable else: if "|" in clause: values = clause.split("|") for v in values: # if v in list(ldf.columns): #TODO: Move validation check to Validator valid_values.append(v) else: valid_values = clause temp_spec = Clause(attribute = valid_values) new_context.append(temp_spec) elif type(clause) is Clause: new_context.append(clause) intent = new_context # ldf._intent = new_context for clause in intent: if (clause.description): #TODO: Move validation check to Validator #if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes if any(ext in [">","<","=","!="] for ext in clause.description): # clause.description contain ">","<". or "=" # then parse it and assign to clause.attribute, clause.filter_op, clause.values clause.filter_op = re.findall(r'/.*/|>|=|<|>=|<=|!=', clause.description)[0] split_description = clause.description.split(clause.filter_op) clause.attribute = split_description[0] clause.value = split_description[1] if re.match(r'^-?\d+(?:\.\d+)?$', clause.value): clause.value = float(clause.value) elif (type(clause.description) == str): clause.attribute = clause.description elif (type(clause.description)==list): clause.attribute = clause.description # else: # then it is probably a value # clause.values = clause.description return intent
# ldf._intent = intent