-
Notifications
You must be signed in to change notification settings - Fork 0
/
SearchingFiles.java
119 lines (87 loc) · 3.97 KB
/
SearchingFiles.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import java.io.BufferedReader;
import java.nio.file.Paths;
import java.nio.charset.StandardCharsets;
import java.io.IOException;
import java.util.Date;
import java.io.InputStreamReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.analysis.Analyzer;
public class SearchingFiles {
public static void search(String Index_Path, String q, String Ranking_Model) throws IOException, ParseException {
String[] fields = {"title", "contents"};
final int Max_Hits = 10; // Number of results to be displayed
// Initialize the index reader
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(Index_Path)));
// Initialize the index searcher
IndexSearcher searcher = new IndexSearcher(reader);
// Getting the model for ranking and set the same
if (Ranking_Model.equalsIgnoreCase("OK")) {
System.out.println("\nUsing OKAPI BM25 Ranking Model...");
BM25Similarity model = new BM25Similarity();
searcher.setSimilarity(model);
} else if(Ranking_Model.equalsIgnoreCase("VS")) {
System.out.println("\nUsing Vector Space Model...");
ClassicSimilarity model = new ClassicSimilarity();
searcher.setSimilarity(model);
}
else {
// Checking the ranking model given in the input
System.out.println("Invalid Ranking Model Selected");
System.exit(0);
}
Analyzer analyzer = new StandardAnalyzer();
// MultifieldQueryParser will search for document field using only one instance which is created
MultiFieldQueryParser MFQparser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
// Query is parsed and stored
Query query = MFQparser.parse(q);
System.out.println("\nSearching For: " + q + "\n");
// Executing the search
PerformSearch(in, searcher, query, Max_Hits);
// Index reader is closed
reader.close();
}
public static void PerformSearch(BufferedReader in, IndexSearcher searcher, Query query,
int Max_Hits) throws IOException {
Date startDate = new Date();
// Track number of hits
TopDocs results = searcher.search(query, Max_Hits);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = Math.toIntExact(results.totalHits);
// choose the lesser value between the maximum hits and actual hits recorded.
//Based on the value we may have to iterate over maximum hit or actual hit
int end = Math.min(Max_Hits, numTotalHits);
Date endDate = new Date();
System.out.println("Total " + numTotalHits + " Matching Documents Found in " + ((endDate.getTime() - startDate.getTime()) / 1000.0) + " seconds");
System.out.println("Showing Top " + end + "\n");
// Iterate over array containing hits
for (int i = 0; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String title = doc.get("title");
String path = doc.get("path");
double score = hits[i].score;
if (path != null) {
// print out the title and rank of document
System.out.println((i+1) + ". " + title);
// print out the document path
System.out.println(" Path: " + path);
// print out the score of document
System.out.println(" Score: " + score + "\n");
} else {
System.out.println((i+1) + ". " + "No path for this document exists");
}
}
}
}