-
Notifications
You must be signed in to change notification settings - Fork 0
/
nrdscan.py
119 lines (96 loc) · 5.25 KB
/
nrdscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import argparse
from datetime import datetime, timedelta
import base64
import requests
from zipfile import ZipFile
import os
from thefuzz import fuzz
import csv
class DomainMatch:
def __init__(self, domain, newdomain, matchtype, ratio):
self.domain = domain
self.newdomain = newdomain
self.matchtype = matchtype
self.ratio = ratio
def __iter__(self):
return iter([self.domain, self.newdomain, self.matchtype, self.ratio])
def getDomainListFromFile(filename):
with open(filename) as f:
content = f.readlines()
return [x.strip() for x in content]
argParser = argparse.ArgumentParser()
argParser.add_argument("-i", "--inputfile", help="file containing a list of domain names. one name per line.",type=str, required=True)
argParser.add_argument("-o", "--outputfile", help="file to write the result to. default is to print to stdout.",type=str)
argParser.add_argument("-fr", "--fuzzratio", help="ratio to use for fuzzy string matching. default is 75. NOTE: set to 0 to disable fuzzy matching.",type=int, default=75)
argParser.add_argument('-c', "--clean", help="clean working directories after execution. default is to leave the downloaded domain lists.", action='store_true')
args = argParser.parse_args()
yesterdayAsTimeStamp = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
workingdirectory = os.path.join(os.getcwd(), yesterdayAsTimeStamp)
if os.path.exists(workingdirectory):
print("Directory " + workingdirectory + " already exists. Aborting.")
else:
os.makedirs(workingdirectory)
# download a list of new domains from https://whoisds.com/newly-registered-domains
# and save it to a file called "newdomains.txt"
baseurl = "https://www.whoisds.com//whois-database/newly-registered-domains/"
baseurlsuffix = "/nrd"
filename = yesterdayAsTimeStamp + ".zip"
filenameb64 = (base64.b64encode(filename.encode('ascii'))).decode('ascii')
downloadurl = baseurl + filenameb64 + baseurlsuffix
downloadfilename = os.path.join(workingdirectory, filename)
print("Downloading " + downloadurl + " to " + downloadfilename)
r = requests.get(downloadurl, allow_redirects=True)
with open(downloadfilename, 'wb') as f:
f.write(r.content)
# unzip the file
with ZipFile(downloadfilename, 'r') as zip_ref:
zip_ref.extractall(workingdirectory)
# set the input files for domain matching
newdomainfile = os.path.join(workingdirectory,"domain-names.txt")
newdomains = getDomainListFromFile(newdomainfile)
mydomains = getDomainListFromFile(args.inputfile)
# set result array
result = []
# for every domain in the reference set, check if it is contained in the new domains by
for mydomain in mydomains:
# 1. get the domain (ignore the TLD) - we assume that the first part is the string you're looking for (e.g. domainname.tld or domainname.co.tld)
domain = mydomain.split(".")[0]
# 2. walk through every item in the new domain list
for newdomain in newdomains:
# 3. check for direct matches (e.g. newspaper.com matches mynewspaper.com because the string "newspaper" is in there as a whole)
if domain in newdomain:
result.append(DomainMatch(mydomain, newdomain, "DirectMatch", 0))
#result.append("DirectMatch: " + domain + " in " + newdomain)
# 4. check for fuzzy matches if theres no direct match (e.g. newspaper.com matches news-paper.com due to the distance limit)
elif args.fuzzratio > 0:
newdomainpart = newdomain.split(".")[0]
fuzzyratio = fuzz.ratio(domain, newdomainpart)
if fuzzyratio >= args.fuzzratio:
#result.append("FuzzyMatch: " + domain + " in " + newdomain + " (ratio: " + str(fuzzyratio) + ")")
result.append(DomainMatch(mydomain, newdomain, "FuzzyMatch", fuzzyratio))
if result:
# print the result summary
print("Found " + str(len(result)) + " new domains matching " + str(len(mydomains)) + " domains in your reference set:")
#print header first
print("{:20} {:20} {:20} {:20}".format("Domain", "NewDomain", "MatchType", "Ratio"))
print("---------------------------------------------------------------------------------------------------")
for domain in result:
print("{:20} {:20} {:20} {:20}".format(domain.domain, domain.newdomain, domain.matchtype, domain.ratio))
else:
print("No matches found.")
# if the result should be written to a file, first check if the file exists and write the csv header if it does not
if args.outputfile and not os.path.exists(args.outputfile):
with open(args.outputfile, 'a') as f:
writer=csv.writer(f, delimiter=',',lineterminator='\n')
writer.writerow(["Domain", "NewDomain", "MatchType", "Ratio"])
# then write the result to the file, again making sure it exists
if args.outputfile:
with open(args.outputfile, 'a') as f:
writer=csv.writer(f, delimiter=',',lineterminator='\n')
for item in result:
writer.writerow(item)
# clean up
if args.clean:
os.remove(downloadfilename)
os.remove(newdomainfile)
os.rmdir(workingdirectory)