Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prediction type to return the mean, variance, and mode #21

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 96 additions & 6 deletions lightgbmlss/distributions/distribution_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import stats

from typing import Any, Dict, Optional, List, Tuple
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -334,12 +335,89 @@ def draw_samples(self,
dist_samples = dist_samples.astype(int)

return dist_samples

def get_moments(self,
predt_params: pd.DataFrame,
inference: str = "none",
n_samples: int = 1000,
seed: int = 123
) -> pd.DataFrame:
"""
Function that returns moments (mean, variance, mode) of a predicted distribution.

Arguments
---------
predt_params: pd.DataFrame
pd.DataFrame with predicted distributional parameters.
inference: str
Type of inference from drawn samples:
- "none" (default) Will return only the exact, implemented moments.
- "missing" Will infer moments for missing implementations by drawing samples.
- "all" Will infer all moments by drawing samples.
n_samples: int
Number of sample to draw from predicted response distribution.
seed: int
Manual seed.

Returns
-------
pred_dist: pd.DataFrame
DataFrame with mean, variance, and mode of predicted response distribution.

"""
if self.tau is None:
pred_params = torch.tensor(predt_params.values)
dist_kwargs = {arg_name: param for arg_name, param in zip(self.distribution_arg_names, pred_params.T)}
dist_pred = self.distribution(**dist_kwargs)
pred_moments = pd.DataFrame()

if inference != "none":
torch.manual_seed(seed)
dist_samples = dist_pred.sample((n_samples,)).squeeze().detach().numpy().T

if inference == "all":
pred_moments["mean"] = np.mean(dist_samples, axis=1)
pred_moments["variance"] = np.var(dist_samples, axis=1)
pred_moments["mode"], _ = stats.mode(dist_samples, axis=1, keepdims=True)
return pred_moments

try:
mean = dist_pred.mean
except NotImplementedError:
if inference == "missing":
pred_moments["mean"] = np.mean(dist_samples, axis=1)
else:
pred_moments["mean"] = mean.detach().numpy()

try:
variance = dist_pred.variance
except NotImplementedError:
if inference == "missing":
pred_moments["variance"] = np.var(dist_samples, axis=1)
pass
else:
pred_moments["variance"] = variance.detach().numpy()
try:
mode = dist_pred.mode
except NotImplementedError:
if inference == "missing":
pred_moments["mode"], _ = stats.mode(dist_samples, axis=1)
else:
pred_moments["mode"] = mode.detach().numpy()

if pred_moments.shape[1] == 0:
return None
else:
return pred_moments
else:
return None

def predict_dist(self,
booster: lgb.Booster,
data: pd.DataFrame,
start_values: np.ndarray,
pred_type: str = "parameters",
moments_inference: str = "none",
n_samples: int = 1000,
quantiles: list = [0.1, 0.5, 0.9],
seed: str = 123
Expand All @@ -361,6 +439,12 @@ def predict_dist(self,
- "quantiles" calculates the quantiles from the predicted distribution.
- "parameters" returns the predicted distributional parameters.
- "expectiles" returns the predicted expectiles.
- "moments" returns the mean, variance, and (if implemented) mode.
moments_inference: str
Type of inference to use if the prediction type is "moments":
- "none" (default) Will return only the exact, implemented moments.
- "missing" Will infer moments for missing implementations by drawing samples.
- "all" Will infer all moments by drawing samples.
n_samples : int
Number of samples to draw from the predicted distribution.
quantiles : List[float]
Expand Down Expand Up @@ -398,18 +482,24 @@ def predict_dist(self,
dist_params_predt = pd.DataFrame(dist_params_predt)
dist_params_predt.columns = self.param_dict.keys()

# Draw samples from predicted response distribution
pred_samples_df = self.draw_samples(predt_params=dist_params_predt,
n_samples=n_samples,
seed=seed)

if pred_type == "parameters":
return dist_params_predt

elif pred_type == "expectiles":
return dist_params_predt

elif pred_type == "moments":
return self.get_moments(predt_params=dist_params_predt,
inference=moments_inference,
n_samples=n_samples,
seed=seed)

# Draw samples from predicted response distribution
pred_samples_df = self.draw_samples(predt_params=dist_params_predt,
n_samples=n_samples,
seed=seed)

elif pred_type == "samples":
if pred_type == "samples":
return pred_samples_df

elif pred_type == "quantiles":
Expand Down
3 changes: 3 additions & 0 deletions lightgbmlss/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def predict(self,
pred_type: str = "parameters",
n_samples: int = 1000,
quantiles: list = [0.1, 0.5, 0.9],
moments_inference: str = "none",
seed: str = 123):
"""
Function that predicts from the trained model.
Expand All @@ -450,6 +451,7 @@ def predict(self,
- "quantiles" calculates the quantiles from the predicted distribution.
- "parameters" returns the predicted distributional parameters.
- "expectiles" returns the predicted expectiles.
- "moments" returns the mean, variance, and (if implemented) mode.
n_samples : int
Number of samples to draw from the predicted distribution.
quantiles : List[float]
Expand All @@ -470,6 +472,7 @@ def predict(self,
pred_type=pred_type,
n_samples=n_samples,
quantiles=quantiles,
moments_inference=moments_inference,
seed=seed)

return predt_df
Expand Down