import logging
import time
import warnings
from typing import Any, Dict, List, Optional
import pandas as pd
from collab.foraging import toolkit as ftk
from collab.foraging.toolkit.local_windows import generate_local_windows
from collab.foraging.toolkit.utils import dataObject
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
derivation_logger = logging.getLogger(__name__)
def _generate_DF_from_nestedList(df_list: List[List[pd.DataFrame]]) -> pd.DataFrame:
"""
A helper function that concatenates a nested list of DataFrames into a single, flattened DataFrame.
List elements that are `None` are automatically discarded.
:param df_list: nested list of DataFrames, e.g., computed predictor DataFrames that are grouped
by forager_index and time
:return: flattened DataFrame
"""
return pd.concat(
[pd.concat(df, axis=0) for df in df_list], axis=0
) # this automatically ignores None elements!
def _generate_combined_DF(
predictors_and_scores: Dict[str, List[List[pd.DataFrame]]],
dropna: Optional[bool] = True,
add_scaled_values: Optional[bool] = False,
) -> pd.DataFrame:
"""
A helper function that takes a dictionary of computed predictors/scores (as nested lists of DataFrames),
and returns a single, flattened DataFrame, containing each predictor/score as a column.
:param predictors_and_scoress: dictionary of computed predictors/scores
:param dropna: set to `True` to drop NaN elements from final DataFrame
:param add_scaled_values: set to `True` to scale the predictor/score columns and
add the values as additional columns in final DataFrame
:return: final, flattened DataFrame containing all computed predictors as columns
"""
list_DFs = [_generate_DF_from_nestedList(p) for p in predictors_and_scores.values()]
combinedDF = list_DFs[0]
for i in range(1, len(list_DFs)):
combinedDF = combinedDF.merge(list_DFs[i], how="inner")
if dropna:
og_frames = len(combinedDF)
combinedDF.dropna(inplace=True)
dropped_frames = og_frames - len(combinedDF)
if dropped_frames:
warnings.warn(
f"""
Dropped {dropped_frames}/{og_frames} frames from `derivedDF` due to NaN values.
Missing values can arise when computations depend on next/previous step positions
that are unavailable. See documentation of the corresponding predictor/score generating
functions for more information.
"""
)
# scale predictor columns
if add_scaled_values:
for key in predictors_and_scores.keys():
column_min = combinedDF[key].min()
column_max = combinedDF[key].max()
combinedDF[f"{key}_scaled"] = (combinedDF[key] - column_min) / (
column_max - column_min
)
return combinedDF
[docs]def derive_predictors_and_scores(
foragers_object: dataObject,
local_windows_kwargs: Dict[str, Any],
predictor_kwargs: Dict[str, Dict[str, Any]],
score_kwargs: Dict[str, Dict[str, Any]],
dropna: Optional[bool] = True,
add_scaled_values: Optional[bool] = False,
) -> pd.DataFrame:
"""
A function that calculates a chosen set of predictors and scores for data by inferring their names from
keys in `predictor_kwargs` & `score_kwargs`, and dynamically calling the corresponding functions.
:param foragers_object: instance of dataObject class containing the trajectory data of foragers.
:param local_window_kwargs: dictionary of keyword arguments for `generate_local_windows` function.
:param predictor_kwargs: nested dictionary of keyword arguments for predictors to be computed.
Keys of predictor_kwargs set the name of the predictor to be computed.
The predictor name can have underscores, however, the substring before the first underscore must correspond
to the name of a predictor type in Collab. Thus, we can have multiple versions of the same predictor type
(with different parameters) by naming them as follows:
predictor_kwargs = {
"proximity_10" : {"optimal_dist":10, "decay":1, ...},
"proximity_20" : {"optimal_dist":20, "decay":2, ...},
"proximity_w_constraint" : {...,"interaction_constraint" : constraint_function,
"interaction_constraint_params": {...}}
}
:param score_kwargs: nested dictionary of keyword arguments for outcome variables
("scores") to be computed. The substring before the first underscore in dictionary keys must
correspond to the name of a score type in Collab, same as in `predictor_kwargs`
score_kwargs = {
"nextStep_linear" : {"nonlinearity_exponent" : 1},
"nextStep_squared" : {"nonlinearity_exponent" : 2},
}
:param dropna: set to `True` to drop NaN elements from the final DataFrame
:param add_scaled_values: set to `True` to compute scaled predictor scores
and add them as additional columns in final DataFrame
:return: final, flattened DataFrame containing all computed predictors as columns
"""
# save chosen parameters to object
foragers_object.local_windows_kwargs = local_windows_kwargs
foragers_object.predictor_kwargs = predictor_kwargs
foragers_object.score_kwargs = score_kwargs
# generate local_windows and add to object
local_windows = generate_local_windows(foragers_object)
foragers_object.local_windows = local_windows
derived_quantities = {}
# calculate predictors
for predictor_name in predictor_kwargs.keys():
predictor_type = predictor_name.split("_")[0]
function_name = f"generate_{predictor_type}_predictor"
generate_function = getattr(ftk, function_name)
start = time.time()
derived_quantities[predictor_name] = generate_function(
foragers_object, predictor_name
)
end = time.time()
derivation_logger.info(
f"{predictor_name} completed in {end-start:.2f} seconds."
)
# calculate scores
for score_name in score_kwargs.keys():
score_type = score_name.split("_")[0]
function_name = f"generate_{score_type}_score"
generate_function = getattr(ftk, function_name)
start = time.time()
derived_quantities[score_name] = generate_function(foragers_object, score_name)
end = time.time()
derivation_logger.info(f"{score_name} completed in {end-start:.2f} seconds.")
# save to object
foragers_object.derived_quantities = derived_quantities
# generate combined DF
derivedDF = _generate_combined_DF(derived_quantities, dropna, add_scaled_values)
# save to object
foragers_object.derivedDF = derivedDF
return derivedDF
[docs]def get_list_of_predictors() -> List[str]:
"""
A function that returns a list of all available predictors in Collab.
"""
# get all functions from ftk that start with "generate_" and end with "_predictor"
predictor_functions = [
f for f in dir(ftk) if f.startswith("generate_") and f.endswith("_predictor")
]
# extract predictor names
predictor_names = [f.split("_")[1] for f in predictor_functions]
return predictor_names
[docs]def get_list_of_scores() -> List[str]:
"""
A function that returns a list of all available scores in Collab.
"""
# get all functions from ftk that start with "generate_" and end with "_score"
score_functions = [
f for f in dir(ftk) if f.startswith("generate_") and f.endswith("_score")
]
# extract score names
score_names = [f.split("_")[1] for f in score_functions]
return score_names