Source code for lux.processor.Validator

#  Copyright 2019-2020 The Lux Authors.
# 
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

# from ..luxDataFrame.LuxDataframe import LuxDataFrame
from lux.core.frame import LuxDataFrame
from lux.vis.Clause import Clause
from typing import List
from lux.utils.date_utils import is_datetime_series,is_datetime_string
import warnings
[docs]class Validator: ''' Contains methods for validating lux.Clause objects in the intent. '''
[docs] def __init__(self): self.name = "Validator"
def __repr__(self): return f"<Validator>"
[docs] @staticmethod def validate_intent(intent: List[Clause], ldf:LuxDataFrame) -> None: """ Validates input specifications from the user to find inconsistencies and errors. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- None Raises ------ ValueError Ensures input intent are consistent with DataFrame content. """ def validate_clause(clause): if not((clause.attribute and clause.attribute == "?") or (clause.value and clause.value=="?")): if isinstance(clause.attribute,list): for attr in clause.attribute: if attr not in list(ldf.columns): warnings.warn(f"The input attribute '{attr}' does not exist in the DataFrame.") else: if (clause.attribute!="Record"): #we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation if (clause.attribute and not is_datetime_string(clause.attribute)): if not clause.attribute in list(ldf.columns): warnings.warn(f"The input attribute '{clause.attribute}' does not exist in the DataFrame.") if (clause.value and clause.attribute and clause.filter_op=="="): series = ldf[clause.attribute] if (not is_datetime_series(series)): if isinstance(clause.value, list): vals = clause.value else: vals = [clause.value] for val in vals: if (val not in series.values):#(not series.str.contains(val).any()): warnings.warn(f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame.") for clause in intent: if type(clause) is list: for s in clause: validate_clause(s) else: validate_clause(clause)