Skip to content

Commit

Permalink
Merge pull request #92 from kortschak/names
Browse files Browse the repository at this point in the history
Rename Check-ish -> Contains-ish
  • Loading branch information
barakmich committed Jul 31, 2014
2 parents d54cd6a + 1606e98 commit cb177aa
Show file tree
Hide file tree
Showing 20 changed files with 142 additions and 143 deletions.
2 changes: 1 addition & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ An important failure of MQL before was that it was never well-specified. Let's n
### New Iterators

#### Limit Iterator
The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Check(), and a limit on NextResult()
The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Contains(), and a limit on NextResult()

## Medium Term

Expand Down
4 changes: 2 additions & 2 deletions cayley_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ var benchmarkQueries = []struct {

// This is more of an optimization problem that will get better over time. This takes a lot
// of wrong turns on the walk down to what is ultimately the name, but top AND has it easy
// as it has a fixed ID. Exercises Check().
// as it has a fixed ID. Exercises Contains().
{
message: "the helpless checker",
long: true,
Expand Down Expand Up @@ -383,7 +383,7 @@ func BenchmarkVeryLargeSetsSmallIntersection(b *testing.B) {
runBench(2, b)
}

func BenchmarkHelplessChecker(b *testing.B) {
func BenchmarkHelplessContainsChecker(b *testing.B) {
runBench(3, b)
}

Expand Down
23 changes: 11 additions & 12 deletions graph/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,8 @@ type Iterator interface {
// from the bottom up.
NextResult() bool

// Check(), given a value, returns whether or not that value is within the set
// held by this iterator.
Check(Value) bool
// Contains returns whether the value is within the set held by the iterator.
Contains(Value) bool

// Start iteration from the beginning
Reset()
Expand Down Expand Up @@ -161,9 +160,9 @@ type FixedIterator interface {
}

type IteratorStats struct {
CheckCost int64
NextCost int64
Size int64
ContainsCost int64
NextCost int64
Size int64
}

// Type enumerates the set of Iterator types.
Expand Down Expand Up @@ -229,20 +228,20 @@ func (t Type) String() string {
return types[t]
}

// Utility logging functions for when an iterator gets called Next upon, or Check upon, as
// Utility logging functions for when an iterator gets called Next upon, or Contains upon, as
// well as what they return. Highly useful for tracing the execution path of a query.
func CheckLogIn(it Iterator, val Value) {
func ContainsLogIn(it Iterator, val Value) {
if glog.V(4) {
glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type().String()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK CONTAINS %d", strings.ToUpper(it.Type().String()), it.UID(), val)
}
}

func CheckLogOut(it Iterator, val Value, good bool) bool {
func ContainsLogOut(it Iterator, val Value, good bool) bool {
if glog.V(4) {
if good {
glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK CONTAINS %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val)
} else {
glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK CONTAINS %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val)
}
}
return good
Expand Down
16 changes: 8 additions & 8 deletions graph/iterator/all_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,16 @@ func (it *Int64) Size() (int64, bool) {
return Size, true
}

// Check() for an Int64 is merely seeing if the passed value is
// Contains() for an Int64 is merely seeing if the passed value is
// withing the range, assuming the value is an int64.
func (it *Int64) Check(tsv graph.Value) bool {
graph.CheckLogIn(it, tsv)
func (it *Int64) Contains(tsv graph.Value) bool {
graph.ContainsLogIn(it, tsv)
v := tsv.(int64)
if it.min <= v && v <= it.max {
it.result = v
return graph.CheckLogOut(it, v, true)
return graph.ContainsLogOut(it, v, true)
}
return graph.CheckLogOut(it, v, false)
return graph.ContainsLogOut(it, v, false)
}

// The type of this iterator is an "all". This is important, as it puts it in
Expand All @@ -150,8 +150,8 @@ func (it *Int64) Optimize() (graph.Iterator, bool) { return it, false }
func (it *Int64) Stats() graph.IteratorStats {
s, _ := it.Size()
return graph.IteratorStats{
CheckCost: 1,
NextCost: 1,
Size: s,
ContainsCost: 1,
NextCost: 1,
Size: s,
}
}
34 changes: 17 additions & 17 deletions graph/iterator/and_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
//
// It accomplishes this in one of two ways. If it is a Next()ed iterator (that
// is, it is a top level iterator, or on the "Next() path", then it will Next()
// it's primary iterator (helpfully, and.primary_it) and Check() the resultant
// it's primary iterator (helpfully, and.primary_it) and Contains() the resultant
// value against it's other iterators. If it matches all of them, then it
// returns that value. Otherwise, it repeats the process.
//
// If it's on a Check() path, it merely Check()s every iterator, and returns the
// If it's on a Contains() path, it merely Contains()s every iterator, and returns the
// logical AND of each result.

package iterator
Expand Down Expand Up @@ -86,7 +86,7 @@ func (it *And) Clone() graph.Iterator {
and.AddSubIterator(sub.Clone())
}
if it.checkList != nil {
and.optimizeCheck()
and.optimizeContains()
}
return and
}
Expand Down Expand Up @@ -164,7 +164,7 @@ func (it *And) Next() (graph.Value, bool) {
if !exists {
return graph.NextLogOut(it, nil, false)
}
if it.checkSubIts(curr) {
if it.subItsContain(curr) {
it.result = curr
return graph.NextLogOut(it, curr, true)
}
Expand All @@ -177,47 +177,47 @@ func (it *And) Result() graph.Value {
}

// Checks a value against the non-primary iterators, in order.
func (it *And) checkSubIts(val graph.Value) bool {
func (it *And) subItsContain(val graph.Value) bool {
var subIsGood = true
for _, sub := range it.internalIterators {
subIsGood = sub.Check(val)
subIsGood = sub.Contains(val)
if !subIsGood {
break
}
}
return subIsGood
}

func (it *And) checkCheckList(val graph.Value) bool {
func (it *And) checkContainsList(val graph.Value) bool {
ok := true
for _, c := range it.checkList {
ok = c.Check(val)
ok = c.Contains(val)
if !ok {
break
}
}
if ok {
it.result = val
}
return graph.CheckLogOut(it, val, ok)
return graph.ContainsLogOut(it, val, ok)
}

// Check a value against the entire iterator, in order.
func (it *And) Check(val graph.Value) bool {
graph.CheckLogIn(it, val)
func (it *And) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val)
if it.checkList != nil {
return it.checkCheckList(val)
return it.checkContainsList(val)
}
mainGood := it.primaryIt.Check(val)
mainGood := it.primaryIt.Contains(val)
if !mainGood {
return graph.CheckLogOut(it, val, false)
return graph.ContainsLogOut(it, val, false)
}
othersGood := it.checkSubIts(val)
othersGood := it.subItsContain(val)
if !othersGood {
return graph.CheckLogOut(it, val, false)
return graph.ContainsLogOut(it, val, false)
}
it.result = val
return graph.CheckLogOut(it, val, true)
return graph.ContainsLogOut(it, val, true)
}

// Returns the approximate size of the And iterator. Because we're dealing
Expand Down
30 changes: 15 additions & 15 deletions graph/iterator/and_iterator_optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ import (
// In short, tread lightly.

// Optimizes the And, by picking the most efficient way to Next() and
// Check() its subiterators. For SQL fans, this is equivalent to JOIN.
// Contains() its subiterators. For SQL fans, this is equivalent to JOIN.
func (it *And) Optimize() (graph.Iterator, bool) {
// First, let's get the slice of iterators, in order (first one is Next()ed,
// the rest are Check()ed)
// the rest are Contains()ed)
old := it.SubIterators()

// And call Optimize() on our subtree, replacing each one in the order we
Expand Down Expand Up @@ -84,7 +84,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
// Move the tags hanging on us (like any good replacement).
newAnd.tags.CopyFrom(it)

newAnd.optimizeCheck()
newAnd.optimizeContains()

// And close ourselves but not our subiterators -- some may still be alive in
// the new And (they were unchanged upon calling Optimize() on them, at the
Expand Down Expand Up @@ -142,7 +142,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {

// Find the iterator with the projected "best" total cost.
// Total cost is defined as The Next()ed iterator's cost to Next() out
// all of it's contents, and to Check() each of those against everyone
// all of it's contents, and to Contains() each of those against everyone
// else.
for _, it := range its {
if _, canNext := it.(graph.Nexter); !canNext {
Expand All @@ -159,7 +159,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
continue
}
stats := f.Stats()
cost += stats.CheckCost
cost += stats.ContainsCost
}
cost *= rootStats.Size
if cost < bestCost {
Expand All @@ -169,7 +169,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
}

// TODO(barakmich): Optimization of order need not stop here. Picking a smart
// Check() order based on probability of getting a false Check() first is
// Contains() order based on probability of getting a false Contains() first is
// useful (fail faster).

// Put the best iterator (the one we wish to Next()) at the front...
Expand All @@ -192,12 +192,12 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
type byCost []graph.Iterator

func (c byCost) Len() int { return len(c) }
func (c byCost) Less(i, j int) bool { return c[i].Stats().CheckCost < c[j].Stats().CheckCost }
func (c byCost) Less(i, j int) bool { return c[i].Stats().ContainsCost < c[j].Stats().ContainsCost }
func (c byCost) Swap(i, j int) { c[i], c[j] = c[j], c[i] }

// optimizeCheck(l) creates an alternate check list, containing the same contents
// optimizeContains() creates an alternate check list, containing the same contents
// but with a new ordering, however it wishes.
func (it *And) optimizeCheck() {
func (it *And) optimizeContains() {
// GetSubIterators allocates, so this is currently safe.
// TODO(kortschak) Reuse it.checkList if possible.
// This involves providing GetSubIterators with a slice to fill.
Expand Down Expand Up @@ -298,21 +298,21 @@ func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
// For now, however, it's pretty static.
func (it *And) Stats() graph.IteratorStats {
primaryStats := it.primaryIt.Stats()
CheckCost := primaryStats.CheckCost
ContainsCost := primaryStats.ContainsCost
NextCost := primaryStats.NextCost
Size := primaryStats.Size
for _, sub := range it.internalIterators {
stats := sub.Stats()
NextCost += stats.CheckCost
CheckCost += stats.CheckCost
NextCost += stats.ContainsCost
ContainsCost += stats.ContainsCost
if Size > stats.Size {
Size = stats.Size
}
}
return graph.IteratorStats{
CheckCost: CheckCost,
NextCost: NextCost,
Size: Size,
ContainsCost: ContainsCost,
NextCost: NextCost,
Size: Size,
}

}
16 changes: 8 additions & 8 deletions graph/iterator/fixed_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,18 @@ func (it *Fixed) DebugString(indent int) string {
func (it *Fixed) Type() graph.Type { return graph.Fixed }

// Check if the passed value is equal to one of the values stored in the iterator.
func (it *Fixed) Check(v graph.Value) bool {
func (it *Fixed) Contains(v graph.Value) bool {
// Could be optimized by keeping it sorted or using a better datastructure.
// However, for fixed iterators, which are by definition kind of tiny, this
// isn't a big issue.
graph.CheckLogIn(it, v)
graph.ContainsLogIn(it, v)
for _, x := range it.values {
if it.cmp(x, v) {
it.result = x
return graph.CheckLogOut(it, v, true)
return graph.ContainsLogOut(it, v, true)
}
}
return graph.CheckLogOut(it, v, false)
return graph.ContainsLogOut(it, v, false)
}

// Return the next stored value from the iterator.
Expand Down Expand Up @@ -181,12 +181,12 @@ func (it *Fixed) Size() (int64, bool) {
return int64(len(it.values)), true
}

// As we right now have to scan the entire list, Next and Check are linear with the
// As we right now have to scan the entire list, Next and Contains are linear with the
// size. However, a better data structure could remove these limits.
func (it *Fixed) Stats() graph.IteratorStats {
return graph.IteratorStats{
CheckCost: int64(len(it.values)),
NextCost: int64(len(it.values)),
Size: int64(len(it.values)),
ContainsCost: int64(len(it.values)),
NextCost: int64(len(it.values)),
Size: int64(len(it.values)),
}
}
Loading

0 comments on commit cb177aa

Please sign in to comment.