Skip to content

Commit

Permalink
Add overflow behavior handling in binning nodes
Browse files Browse the repository at this point in the history
Closes #5
  • Loading branch information
nsmith- committed Feb 16, 2021
1 parent ed40f31 commit 4324229
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 5 deletions.
5 changes: 5 additions & 0 deletions data/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def build_discrbinning(sf):
build_formula(sf[(sf["discrMin"] >= lo) & (sf["discrMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "clamp",
}
)

Expand All @@ -66,6 +67,7 @@ def build_ptbinning(sf):
build_discrbinning(sf[(sf["ptMin"] >= lo) & (sf["ptMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "clamp",
}
)

Expand All @@ -81,6 +83,7 @@ def build_etabinning(sf):
build_ptbinning(sf[(sf["etaMin"] >= lo) & (sf["etaMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "error",
}
)

Expand Down Expand Up @@ -183,6 +186,7 @@ def build_pts(sf):
"input": "pt",
"edges": edges,
"content": content,
"flow": "clamp",
}
)

Expand All @@ -207,6 +211,7 @@ def build_etas(sf):
"input": "eta",
"edges": edges,
"content": content,
"flow": "error",
}
)

Expand Down
7 changes: 7 additions & 0 deletions include/correction.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ class Formula {
double eval_ast(const Ast& ast, const std::vector<double>& variables) const;
};

// common internal for Binning and MultiBinning
enum class _FlowBehavior {value, clamp, error};

class Binning {
public:
Binning(const rapidjson::Value& json, const std::vector<Variable>& inputs);
Expand All @@ -93,6 +96,8 @@ class Binning {
private:
std::vector<std::tuple<double, Content>> bins_;
size_t variableIdx_;
_FlowBehavior flow_;
std::unique_ptr<const Content> default_value_;
};

class MultiBinning {
Expand All @@ -105,6 +110,8 @@ class MultiBinning {
// variableIdx, stride, edges
std::vector<std::tuple<size_t, size_t, std::vector<double>>> axes_;
std::vector<Content> content_;
_FlowBehavior flow_;
std::unique_ptr<const Content> default_value_;
};

class Category {
Expand Down
61 changes: 56 additions & 5 deletions src/correction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,22 +326,47 @@ Binning::Binning(const rapidjson::Value& json, const std::vector<Variable>& inpu
}
bins_.reserve(edges.size());
// first bin is a dummy content node (represets lower_bound returning underflow)
// TODO: good spot to put overflow default behavior
bins_.push_back({*edges.begin(), 0.});
for (size_t i=0; i < content.Size(); ++i) {
bins_.push_back({edges[i + 1], resolve_content(content[i], inputs)});
}
variableIdx_ = find_variable_index(json["input"], inputs);
if ( json["flow"] == "clamp" ) {
flow_ = _FlowBehavior::clamp;
}
else if ( json["flow"] == "error" ) {
flow_ = _FlowBehavior::error;
}
else { // Content node
flow_ = _FlowBehavior::value;
default_value_ = std::make_unique<const Content>(resolve_content(json["flow"], inputs));
}
}

const Content& Binning::child(const std::vector<Variable::Type>& values) const {
double value = std::get<double>(values[variableIdx_]);
auto it = std::lower_bound(std::begin(bins_), std::end(bins_), value, [](const auto& a, auto b) { return std::get<0>(a) < b; });
if ( it == std::begin(bins_) ) {
throw std::runtime_error("Index below bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index below bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
}
else { // clamp
it++;
}
}
else if ( it == std::end(bins_) ) {
throw std::runtime_error("Index above bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index above bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
}
else { // clamp
it--;
}
}
return std::get<1>(*it);
}
Expand Down Expand Up @@ -373,6 +398,16 @@ MultiBinning::MultiBinning(const rapidjson::Value& json, const std::vector<Varia
if ( content_.size() != stride ) {
throw std::runtime_error("Inconsistency in MultiBinning: number of content nodes does not match binning");
}
if ( json["flow"] == "clamp" ) {
flow_ = _FlowBehavior::clamp;
}
else if ( json["flow"] == "error" ) {
flow_ = _FlowBehavior::error;
}
else { // Content node
flow_ = _FlowBehavior::value;
default_value_ = std::make_unique<const Content>(resolve_content(json["flow"], inputs));
}
}

const Content& MultiBinning::child(const std::vector<Variable::Type>& values) const {
Expand All @@ -381,10 +416,26 @@ const Content& MultiBinning::child(const std::vector<Variable::Type>& values) co
double value = std::get<double>(values[variableIdx]);
auto it = std::lower_bound(std::begin(edges), std::end(edges), value);
if ( it == std::begin(edges) ) {
throw std::runtime_error("Index below bounds in MultiBinning for input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index below bounds in MultiBinning for input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
}
else { // clamp
it++;
}
}
else if ( it == std::end(edges) ) {
throw std::runtime_error("Index above bounds in MultiBinning input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index above bounds in MultiBinning input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
}
else { // clamp
it--;
}
}
size_t localidx = std::distance(std::begin(edges), it) - 1;
idx += localidx * stride;
Expand Down
2 changes: 2 additions & 0 deletions src/correctionlib/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def build_data(
else build_data(value, axes[i:], variables[i:])
for value in flatten_to(values, i - 1)
],
"flow": "error", # TODO: can also produce overflow guard bins and clamp
}
)
return Binning.parse_obj(
Expand All @@ -103,6 +104,7 @@ def build_data(
else build_data(value, axes[1:], variables[1:])
for value in values
],
"flow": "error", # TODO: can also produce overflow guard bins and clamp
}
)

Expand Down
4 changes: 4 additions & 0 deletions src/correctionlib/schemav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class Binning(Model):
edges: List[float]
"Edges of the binning, where edges[i] <= x < edges[i+1] => f(x, ...) = content[i](...)"
content: List[Content]
flow: Union[Content, Literal["clamp", "error"]]
"Overflow behavior for out-of-bounds values"


class MultiBinning(Model):
Expand All @@ -56,6 +58,8 @@ class MultiBinning(Model):
to the element at i0 in dimension 0, i1 in dimension 1, etc. and d0 = len(edges[0]), etc.
"""
content: List[Content]
flow: Union[Content, Literal["clamp", "error"]]
"Overflow behavior for out-of-bounds values"


class CategoryItem(Model):
Expand Down
1 change: 1 addition & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def wrap(*corrs):
"nodetype": "binning",
"input": "pt",
"edges": [0, 20, 40],
"flow": "error",
"content": [
schema.Category.parse_obj(
{
Expand Down

0 comments on commit 4324229

Please sign in to comment.