# Copyright 2019-2020 The Lux Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Any
import lux
import pandas as pd
import numpy as np
timedelta_re = re.compile(r"^timedelta64\[\w+\]$")
[docs]def is_timedelta64_series(series: pd.Series) -> bool:
"""
Check if the Series object is of timedelta64 type
Parameters
----------
series : pd.Series
Returns
-------
is_date: bool
"""
return pd.api.types.is_timedelta64_dtype(series)
[docs]def timedelta64_to_float_seconds(series: pd.Series) -> pd.Series:
"""
Convert a timedelta64 Series to a float Series in seconds
Parameters
----------
series : pd.Series
Returns
-------
series: pd.Series
"""
return series.view(np.int64) / 1_000_000_000
[docs]def compute_date_granularity(date_column: pd.core.series.Series):
"""
Given a temporal column (pandas.core.series.Series), finds out the granularity of dates.
Example
----------
['2018-01-01', '2019-01-02', '2018-01-03'] -> "day"
['2018-01-01', '2019-02-01', '2018-03-01'] -> "month"
['2018-01-01', '2019-01-01', '2020-01-01'] -> "year"
Parameters
----------
date_column: pandas.core.series.Series
Column series with datetime type
Returns
-------
field: str
A str specifying the granularity of dates for the inspected temporal column
"""
# supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html)
# corresponding to Pandas timescales
date_fields = ["day", "month", "year", "dayofweek"]
date_index = pd.DatetimeIndex(date_column)
for field in date_fields:
# can be changed to sum(getattr(date_index, field)) != 0
if hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1:
return field
return "year" # if none, then return year by default
[docs]def is_datetime_series(series: pd.Series) -> bool:
"""
Check if the Series object is of datetime type
Parameters
----------
series : pd.Series
Returns
-------
is_date: bool
"""
return pd.api.types.is_datetime64_any_dtype(series) or pd.api.types.is_period_dtype(series)
[docs]def is_datetime_string(string: str) -> bool:
"""
Check if the string is date-like.
Parameters
----------
string : str
Returns
-------
is_date: bool
"""
from dateutil.parser import parse
try:
parse(string)
return True
except ValueError:
return False