Source code for lux._config.config

"""
This config file was largely borrowed from Pandas config.py set_action functionality.
For more resources, see https://github.com/pandas-dev/pandas/blob/master/pandas/_config
"""
from collections import namedtuple
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
import lux
import warnings
from lux.utils.tracing_utils import LuxTracer
import os
from lux._config.template import postgres_template, mysql_template

RegisteredOption = namedtuple("RegisteredOption", "name action display_condition args")


[docs]class Config: """ Class for Lux configurations applied globally across entire session """
[docs] def __init__(self): self._default_display = "pandas" self.plotting_style = None self.SQLconnection = "" self.executor = None # holds registered option metadata self.actions: Dict[str, RegisteredOption] = {} # flags whether or not an action has been registered or removed and should be re-rendered by frame.py self.update_actions: Dict[str, bool] = {} self.update_actions["flag"] = False self._plotting_backend = "vegalite" self._plotting_scale = 1 self._topk = 15 self._number_of_bars = 10 # max no of bars displayed (rest shown as "+ k more") self._label_len = 25 # max length of x and y axis labels self._sort = "descending" self._pandas_fallback = True self._interestingness_fallback = True self.heatmap_bin_size = 40 self.tracer_relevant_lines = [] self.tracer = LuxTracer() self.query_templates = {} self.handle_quotes = True ##################################### #### Optimization Configurations #### ##################################### self._sampling_start = 100000 self._sampling_cap = 1000000 self._sampling_flag = True self._heatmap_flag = True self._heatmap_start = 5000 self.lazy_maintain = True self.early_pruning = True self.early_pruning_sample_cap = 30000 # Apply sampling only if the dataset is 150% larger than the sample cap self.early_pruning_sample_start = self.early_pruning_sample_cap * 1.5 self.streaming = False self.render_widget = True
@property def number_of_bars(self): return self._number_of_bars @number_of_bars.setter def number_of_bars(self, k: int) -> None: """ Parameters ---------- k : int Number of bars in output bar charts; rest are not displayed """ if type(k) == int: self._number_of_bars = k else: warnings.warn( "The number of bars must be an integer.", stacklevel=2, ) @property def label_len(self): return self._label_len @label_len.setter def label_len(self, l: int) -> None: """ Parameters ---------- l : int Maximum length of string axis labels """ if type(l) == int: self._label_len = l else: warnings.warn( "The maximum length must be an integer.", stacklevel=2, ) @property def topk(self): return self._topk @topk.setter def topk(self, k: Union[int, bool]): """ Setting parameter to display top k visualizations in each action Parameters ---------- k : Union[int,bool] False: if display all visualizations (no top-k) k: number of visualizations to display """ if isinstance(k, int) or isinstance(k, bool): self._topk = k else: warnings.warn( "Parameter to lux.config.topk must be an integer or a boolean.", stacklevel=2, ) @property def sort(self): return self._sort @sort.setter def sort(self, flag: Union[str]): """ Setting parameter to determine sort order of each action Parameters ---------- flag : Union[str] "none", "ascending","descending" No sorting, sort by ascending order, sort by descending order """ flag = flag.lower() if isinstance(flag, str) and flag in ["none", "ascending", "descending"]: self._sort = flag else: warnings.warn( "Parameter to lux.config.sort must be one of the following: 'none', 'ascending', or 'descending'.", stacklevel=2, ) @property def pandas_fallback(self): return self._pandas_fallback @pandas_fallback.setter def pandas_fallback(self, fallback: bool) -> None: """ Parameters ---------- fallback : bool If an error occurs, whether or not to raise an exception or fallback to default Pandas. """ if type(fallback) == bool: self._pandas_fallback = fallback else: warnings.warn( "The flag for Pandas fallback must be a boolean.", stacklevel=2, ) @property def interestingness_fallback(self): return self._interestingness_fallback @interestingness_fallback.setter def interestingness_fallback(self, fallback: bool) -> None: """ Parameters ---------- fallback : bool If an error occurs while calculating interestingness, whether or not to raise an exception or fallback to default Pandas. """ if type(fallback) == bool: self._interestingness_fallback = fallback else: warnings.warn( "The flag for interestingness fallback must be a boolean.", stacklevel=2, ) @property def sampling_cap(self): """ Parameters ---------- sample_number : int Cap on the number of rows to sample. Must be larger than _sampling_start """ return self._sampling_cap @sampling_cap.setter def sampling_cap(self, sample_number: int) -> None: """ Parameters ---------- sample_number : int Cap on the number of rows to sample. Must be larger than _sampling_start """ if type(sample_number) == int: assert sample_number >= self._sampling_start self._sampling_cap = sample_number else: warnings.warn( "The cap on the number samples must be an integer.", stacklevel=2, ) @property def sampling_start(self): """ Parameters ---------- sample_number : int Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap """ return self._sampling_start @sampling_start.setter def sampling_start(self, sample_number: int) -> None: """ Parameters ---------- sample_number : int Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap """ if type(sample_number) == int: assert sample_number <= self._sampling_cap self._sampling_start = sample_number else: warnings.warn( "The sampling starting point must be an integer.", stacklevel=2, ) @property def sampling(self): """ Parameters ---------- sample_flag : bool Whether or not sampling will occur. """ return self._sampling_flag @sampling.setter def sampling(self, sample_flag: bool) -> None: """ Parameters ---------- sample_flag : bool Whether or not sampling will occur. """ if type(sample_flag) == bool: self._sampling_flag = sample_flag else: warnings.warn( "The flag for sampling must be a boolean.", stacklevel=2, ) @property def heatmap(self): """ Parameters ---------- heatmap_flag : bool Whether or not a heatmap will be used instead of a scatter plot. """ return self._heatmap_flag @heatmap.setter def heatmap(self, heatmap_flag: bool) -> None: """ Parameters ---------- heatmap_flag : bool Whether or not a heatmap will be used instead of a scatter plot. """ if type(heatmap_flag) == bool: self._heatmap_flag = heatmap_flag else: warnings.warn( "The flag for enabling/disabling heatmaps must be a boolean.", stacklevel=2, ) @property def default_display(self): """ Set the widget display to show Pandas by default or Lux by default Parameters ---------- type : str Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) """ return self._default_display @default_display.setter def default_display(self, type: str) -> None: """ Set the widget display to show Pandas by default or Lux by default Parameters ---------- type : str Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) """ if type.lower() == "lux": self._default_display = "lux" elif type.lower() == "pandas": self._default_display = "pandas" else: warnings.warn( "Unsupported display type. Default display option should either be `lux` or `pandas`.", stacklevel=2, ) @property def plotting_backend(self): return self._plotting_backend @plotting_backend.setter def plotting_backend(self, type: str) -> None: """ Set the widget display to show Vegalite by default or Matplotlib by default Parameters ---------- type : str Default display type, can take either the string `vegalite` or `matplotlib` (regardless of capitalization) """ if type.lower() == "vegalite" or type.lower() == "altair": self._plotting_backend = "vegalite" elif type.lower() == "matplotlib": self._plotting_backend = "matplotlib_svg" else: warnings.warn( "Unsupported plotting backend. Lux currently only support 'altair', 'vegalite', or 'matplotlib'", stacklevel=2, ) @property def plotting_scale(self): return self._plotting_scale @plotting_scale.setter def plotting_scale(self, scale: float) -> None: """ Set the scale factor for charts displayed in Lux. ---------- type : float (default = 1.0) """ scale = float(scale) if isinstance(scale, int) else scale if isinstance(scale, float) and scale > 0: self._plotting_scale = scale else: warnings.warn( "Scaling factor for charts must be a positive float.", stacklevel=2, ) def _get_action(self, pat: str, silent: bool = False): return lux.actions[pat]
[docs] def register_action( self, name: str = "", action: Callable[[Any], Any] = None, display_condition: Optional[Callable[[Any], Any]] = None, *args, ) -> None: """ Registers the provided action globally in lux Parameters ---------- name : str the name of the action action : Callable[[Any], Any] the function used to generate the recommendations display_condition : Callable[[Any], Any] the function to check whether or not the function should be applied args: Any any additional arguments the function may require """ if action: if not callable(action): raise ValueError("Action must be a callable") if display_condition: if not callable(display_condition): raise ValueError("Display condition must be a callable") self.actions[name] = RegisteredOption( name=name, action=action, display_condition=display_condition, args=args ) self.update_actions["flag"] = True
[docs] def remove_action(self, name: str = "") -> None: """ Removes the provided action globally in lux Parameters ---------- name : str the name of the action to remove """ if name not in self.actions: raise ValueError(f"Option '{name}' has not been registered") del self.actions[name] self.update_actions["flag"] = True
[docs] def set_SQL_connection(self, connection): """ Sets SQL connection to a database Parameters: connection : SQLAlchemy connectable, str, or sqlite3 connection For more information, `see here <https://docs.sqlalchemy.org/en/13/core/connections.html>`__ """ self.set_executor_type("SQL") self.SQLconnection = connection
def read_query_template(self, query_template): from lux.executor.SQLExecutor import SQLExecutor query_dict = {} if type(query_template) is str: for line in query_template.split("\n"): (key, val) = line.split(":") query_dict[key] = val.strip() else: with open(query_file) as f: for line in f: (key, val) = line.split(":") query_dict[key] = val.strip() self.query_templates = query_dict self.executor = SQLExecutor() def set_executor_type(self, exe): if exe == "SQL": from lux.executor.SQLExecutor import SQLExecutor self.executor = SQLExecutor() self.read_query_template(postgres_template) elif exe == "Pandas": from lux.executor.PandasExecutor import PandasExecutor self.SQLconnection = "" self.executor = PandasExecutor() else: raise ValueError("Executor type must be either 'Pandas' or 'SQL'")
def warning_format(message, category, filename, lineno, file=None, line=None): return "%s:%s: %s:%s\n" % (filename, lineno, category.__name__, message)