-
Notifications
You must be signed in to change notification settings - Fork 13
/
index.js
executable file
·55 lines (39 loc) · 1.35 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env node
var read = require('node-readability'),
sanitizer = require('sanitizer'),
keyword = require('gramophone'),
program = require('commander'),
urlvalue = "",
ngramsvalue;
program
.option('--url, [url]', 'The url')
.option('--n, [ngrams]', 'Words')
.parse(process.argv);
if (program.url) urlvalue = program.url;
else process.exit(console.log('Please add --url parameter. Something like this: $ keywordsextract --url https://en.wikipedia.org/wiki/Search_engine_optimization'));
if (program.ngrams) ngramsvalue = program.ngrams
else ngramsvalue = 2, 3;
read(urlvalue, function(err, article, title, meta) {
var title1 = article.title;
var total = stripHTML(article.title + " " + article.content);
var extraction_result = keyword.extract(total, {
stem: true,
ngrams: [2, 3]
});
console.log(extraction_result);
var fs = require('fs');
fs.writeFile(title1 + ".txt", extraction_result, function(err) {
if (err) {
return console.log(err);
}
console.log(title1 + ".txt file was saved!");
});
});
function stripHTML(html) {
var clean = sanitizer.sanitize(html, function(str) {
return str;
});
clean = clean.replace(/<(?:.|\n)*?>/gm, "");
clean = clean.replace(/(?:(?:\r\n|\r|\n)\s*){2,}/ig, "\n");
return clean.trim();
}