-
Notifications
You must be signed in to change notification settings - Fork 1
/
TestTLBloomFilter.m
150 lines (118 loc) · 6.35 KB
/
TestTLBloomFilter.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
//
// TestTLBloomFilter.m
// TLCommon
//
// Created by Joshua Bleecher Snyder on 11/2/09.
//
// Some of these unit tests inspired by Cassandras'
// See http://spyced.blogspot.com/2009/01/all-you-ever-wanted-to-know-about.html
// and http://github.com/jbellis/cassandra-dev/tree/e284df7536ef32869b87d903a5f92f6a96c84801/test/com/facebook/infrastructure/utils#
#import "TestTLBloomFilter.h"
#import "TLBloomFilter.h"
#import "NSData_TLCommon.h"
#pragma mark -
@interface TestTLBloomFilter ()
@property(nonatomic, retain, readwrite) NSSet *testDataSet;
@end
#pragma mark -
@implementation TestTLBloomFilter
@synthesize testDataSet;
- (void)setUp {
self.testDataSet = [NSSet setWithObjects:
[NSData data],
[@"a" dataUsingEncoding:NSASCIIStringEncoding],
[@"ab" dataUsingEncoding:NSASCIIStringEncoding],
[@"1234" dataUsingEncoding:NSASCIIStringEncoding],
[@"hi" dataUsingEncoding:NSASCIIStringEncoding],
[@"testing" dataUsingEncoding:NSASCIIStringEncoding],
[@"more tests" dataUsingEncoding:NSASCIIStringEncoding],
[@"kllkads" dataUsingEncoding:NSASCIIStringEncoding],
[@"43" dataUsingEncoding:NSASCIIStringEncoding],
[@"aa" dataUsingEncoding:NSASCIIStringEncoding],
nil];
}
- (void)tearDown {
self.testDataSet = nil;
}
- (void)testEmptyBloomFilter {
TLBloomFilter *bloomFilter = [[[TLBloomFilter alloc] initWithLength:10 hashes:5] autorelease];
for(NSData *data in self.testDataSet) {
STAssertFalse([bloomFilter containsData:data], @"Found data %@ in empty Bloom filter", [data UTF8String]);
}
}
- (void)testAddThenCheck {
TLBloomFilter *bloomFilter = [[[TLBloomFilter alloc] initWithLength:100 hashes:5] autorelease];
for(NSData *data in self.testDataSet) {
[bloomFilter addData:data];
}
for(NSData *data in self.testDataSet) {
STAssertTrue([bloomFilter containsData:data], @"Didn't find data %@ in Bloom filter but it was added", [data UTF8String]);
}
}
- (void)testSavingAndReinitializing {
TLBloomFilter *bloomFilter = [[[TLBloomFilter alloc] initWithLength:100 hashes:5] autorelease];
for(NSData *data in self.testDataSet) {
[bloomFilter addData:data];
}
TLBloomFilter *restoredBloomFilter = [[[TLBloomFilter alloc] initWithData:bloomFilter.data hashes:5] autorelease];
for(NSData *data in self.testDataSet) {
STAssertTrue([restoredBloomFilter containsData:data], @"Didn't find data %@ in restored Bloom filter but it was added to original", [data UTF8String]);
}
}
- (void)testOne {
TLBloomFilter *bloomFilter = [[[TLBloomFilter alloc] initWithLength:100 hashes:5] autorelease];
[bloomFilter addData:[@"a" dataUsingEncoding:NSASCIIStringEncoding]];
STAssertFalse([bloomFilter containsData:[@"b" dataUsingEncoding:NSASCIIStringEncoding]], @"Put in 'a', claims 'b' is present");
}
- (void)testNumberOfHashCalculations {
// Correct values from http://hur.st/bloomfilter?n=100&p=0.1
NSUInteger numberOfHashes1 = [TLBloomFilter numberOfHashesToMinimizeFalsePositivesForLength:60 capacity:100];
STAssertEquals(numberOfHashes1, (NSUInteger)3, @"Wrong number of hashes: %i", numberOfHashes1);
NSUInteger numberOfHashes2 = [TLBloomFilter numberOfHashesToMinimizeFalsePositivesForLength:361 capacity:1000];
STAssertEquals(numberOfHashes2, (NSUInteger)2, @"Wrong number of hashes: %i", numberOfHashes2);
}
- (void)testLengthForFalsePositiveRate {
// Correct values from http://hur.st/bloomfilter?n=100&p=0.1
NSUInteger length1 = [TLBloomFilter lengthToAchieveFalsePositiveRate:0.1f forCapacity:100];
STAssertEquals(length1, (NSUInteger)60, @"Wrong length: %i", length1);
NSUInteger length2 = [TLBloomFilter lengthToAchieveFalsePositiveRate:0.25f forCapacity:1000];
STAssertEquals(length2, (NSUInteger)361, @"Wrong length: %i", length2);
NSUInteger verySmallLength = [TLBloomFilter lengthToAchieveFalsePositiveRate:0.49f forCapacity:1];
STAssertEquals(verySmallLength, (NSUInteger)1, @"Wrong length: %i", verySmallLength);
}
- (void)testThatEmptyFiltersThrowExceptions {
STAssertThrows([[[TLBloomFilter alloc] initWithCapacity:0 falsePositiveRate:0.2f] autorelease], @"Empty length should throw exception");
STAssertThrows([[[TLBloomFilter alloc] initWithLength:0 hashes:10] autorelease], @"Empty length should throw exception");
STAssertThrows([[[TLBloomFilter alloc] initWithData:[NSMutableData data] hashes:10] autorelease], @"Empty length should throw exception");
}
#if TL_RUN_SLOW_TESTS
- (void)testFalsePositiveRate {
NSString *allWords = [NSString stringWithContentsOfFile:@"/usr/share/dict/words"
encoding:NSASCIIStringEncoding
error:NULL];
NSArray *wordsExploded = [allWords componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
NSArray *words = [wordsExploded filteredArrayUsingPredicate:[NSPredicate predicateWithFormat:@"self.length > 0"]];
words = [words subarrayWithRange:NSMakeRange(0, 60000)];
NSUInteger numberOfWords = [words count];
NSUInteger numberOfEnteredWords = numberOfWords / 2;
for(float targetFalsePositiveRate = 0.05f; targetFalsePositiveRate < 0.4f; targetFalsePositiveRate += 0.05f) {
TLBloomFilter *bloomFilter = [[[TLBloomFilter alloc] initWithCapacity:numberOfEnteredWords falsePositiveRate:targetFalsePositiveRate] autorelease];
// add even words; test odd words
for(NSUInteger i = 0; i < numberOfEnteredWords; i++) {
NSString *evenWord = [words objectAtIndex:i * 2];
[bloomFilter addData:[evenWord dataUsingEncoding:NSASCIIStringEncoding]];
}
NSUInteger falsePositives = 0;
for(NSUInteger i = 0; i < numberOfEnteredWords; i++) {
NSString *oddWord = [words objectAtIndex:i * 2 + 1];
if([bloomFilter containsData:[oddWord dataUsingEncoding:NSASCIIStringEncoding]]) {
falsePositives++;
}
}
float actualFalsePositiveRate = (float)falsePositives / (float)numberOfEnteredWords;
NSLog(@"afp %f target %f", actualFalsePositiveRate, targetFalsePositiveRate);
STAssertEqualsWithAccuracy(actualFalsePositiveRate, targetFalsePositiveRate, 0.01f, @"Predicted false positive rate not achieved: %f vs %f", actualFalsePositiveRate, targetFalsePositiveRate);
}
}
#endif
@end