-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial Add of files from bzr repository.
- Loading branch information
Showing
5 changed files
with
736 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,327 @@ | ||
"""script to check if a file matches those in a directory | ||
Marc Graham | ||
Version 0.6.1 - | ||
Improvements: | ||
- Added the Controler class (to do the non-UI functions being done by UI) | ||
Bugs sqaushed: | ||
- Danger detection was not working properly, had to flip around the relpath | ||
- Various NoneType errors causing crashes in exe file | ||
To be improved: | ||
- Do some file operations? | ||
""" | ||
|
||
import hashlib, os | ||
|
||
BLOCKSIZE = 65536 | ||
RECURSE = True | ||
VERBOSE = True | ||
#Really VERBOSE | ||
RVERBOSE = False | ||
|
||
class Controler(object): | ||
"""This class provides the functions and variables that interface between | ||
UI and engine. This was introduced to really pull all the non-UI stuff | ||
from ftf, so that a change in user interface would be possible in the future | ||
It has variables: | ||
self.filesToFind -- A list of files (HashMaker obj) to be looked for (left pane) | ||
Structure is [hM, matches] | ||
self.searchDir -- The directory to be searched (right pane) | ||
self.selectedFile -- The file in filesToFind that we're looking for (?) | ||
self.matches -- The matches corresponding to that file | ||
self.danger -- if the added files are inside within the search dir | ||
""" | ||
|
||
def __init__(self): | ||
self.filesToFind = [] | ||
self.searchDir = None | ||
self.selectedFile = None | ||
self.matches = None | ||
self.danger = False | ||
|
||
def add_file(self, path): | ||
path = self.full_path(path) | ||
if os.path.isfile(path): | ||
exists = False | ||
for f in self.filesToFind: | ||
if f[0].path == path: | ||
exists += 1 | ||
if not exists: | ||
h = HashMaker(path) | ||
self.filesToFind.append([h,None]) | ||
#check that the file is not within the search | ||
if self.searchDir is not None: | ||
if self.is_subdir(self.searchDir.compareDir, path): | ||
self.danger = True | ||
print("Danger!! Searched File is inside the search Dir!") | ||
if VERBOSE: | ||
print(self.searchDir.compareDir, path) | ||
if RVERBOSE: | ||
print("Added "+path) | ||
return True | ||
else : | ||
if VERBOSE: | ||
print("Already exists, "+path) | ||
return False | ||
else: | ||
if VERBOSE: | ||
print("Not a file, moron! "+path) | ||
return False | ||
|
||
def is_subdir(self, path, directory): | ||
"""Checks if we're searching in the same directory""" | ||
path = self.full_path(path) | ||
directory = self.full_path(directory) | ||
#True if it is not a subdir | ||
notSub = None | ||
#if the drive letter is not the same it can't be in the same path | ||
#relpath will crash if they are on different drives. | ||
if RVERBOSE : | ||
print(path[0], directory[0]) | ||
if path[0] != directory[0]: | ||
notSub = True | ||
else: | ||
relative = os.path.normpath(os.path.relpath(directory, path)) | ||
notSub = relative.startswith(os.pardir + os.sep) | ||
#a .. will not be made if the file is in the base dir | ||
if (self.full_path(os.path.dirname(path))==directory): | ||
notSub = False | ||
if VERBOSE: | ||
print(self.full_path(os.path.dirname(path)) + | ||
" is " + directory) | ||
if notSub: | ||
return False | ||
else: | ||
if VERBOSE: | ||
print("Danger... ") | ||
return True | ||
|
||
def get_matches(self, path): | ||
"""Returns matches to a certain path""" | ||
if self.searchDir is not None: | ||
path = self.full_path(path) | ||
for f in self.filesToFind: | ||
if path == f[0].path: | ||
f[1] = None | ||
matches = self.searchDir.check(f[0]) | ||
f[1] = matches[:] | ||
return f[1] | ||
|
||
|
||
def clear_matches(self): | ||
"""Clears the matches (like for when changing search directories""" | ||
for f in self.filesToFind: | ||
f[1] = None | ||
|
||
def set_search_dir(self, dir): | ||
"""Sets the directory to be searched, taking input from the UI | ||
Keyword Argument: | ||
dir -- The path to the directory to be searched | ||
""" | ||
self.danger = False | ||
path = self.full_path(dir) | ||
if self.searchDir is not None: | ||
if RVERBOSE: | ||
print("Setting new search dir "+path) | ||
self.searchDir = FileTree(path) | ||
if len(self.filesToFind) != 0: | ||
for f in self.filesToFind: | ||
if self.is_subdir(self.searchDir.compareDir, f[0].path): | ||
print("Danger! Searched File is in the search Dir!") | ||
if VERBOSE: | ||
print((self.searchDir.compareDir, f[0].path)) | ||
self.danger = True | ||
|
||
else : | ||
if VERBOSE: | ||
print("Resetting search dir to: "+path) | ||
self.searchDir = None | ||
self.clear_matches() | ||
self.searchDir = FileTree(path) | ||
if len(self.filesToFind) != 0: | ||
if VERBOSE: | ||
print("Checking for auto-inclusions...") | ||
for f in self.filesToFind: | ||
print(self.searchDir.compareDir, f[0].path) | ||
if self.is_subdir(self.searchDir.compareDir, f[0].path): | ||
print("Danger!!! Searched File is in the search Dir!") | ||
if VERBOSE: | ||
print((self.searchDir.compareDir, f[0].path)) | ||
self.danger = True | ||
|
||
|
||
def clear_left(self): | ||
"""Resets the files to be found to as new conditions""" | ||
self.filesToFind = [] | ||
self.selectedFile = None | ||
self.matches = None | ||
|
||
def clear_right(self): | ||
self.clear_matches() | ||
self.searchDir = None | ||
|
||
def full_path(self, path): | ||
path = os.path.expandvars(os.path.normpath(path)) | ||
return path | ||
|
||
|
||
class HashMaker(object): | ||
"""This class defines the basic storage unit for files which may or may not be hashed | ||
Keyword Argument: | ||
path -- the path to the file (directory only) | ||
name -- the name of the file at that location | ||
shortHash -- able to specify the hash of the first BLOCKSIZE characters | ||
fullHash -- able to specify the full hash of the file if you've got it | ||
matchMe -- a list of matching HashMaker objects (might remove this) | ||
""" | ||
|
||
def __init__(self, path, shortHash=None, fullHash=None): | ||
self.path = path | ||
self.name = os.path.basename(path) | ||
self.size = os.path.getsize(self.path) | ||
self.sHash = shortHash | ||
self.fHash = fullHash | ||
self.matchMe = [] | ||
|
||
def read_hash(self,block): | ||
""" | ||
Reads the hash of the file up to the block | ||
Keyword Arguments: | ||
block -- the number of bytes to read, 0 if all | ||
""" | ||
ifile = self.path | ||
block = int(block) | ||
hasher = hashlib.md5() | ||
with open(ifile, 'rb') as source: | ||
if block == 0 : | ||
buf = source.read() | ||
else: | ||
buf = source.read(block) | ||
hasher.update(buf) | ||
return(hasher.hexdigest()) | ||
|
||
def set_short(self): | ||
"""Sets the hash of the first BLOCKSIZE bytes of the file""" | ||
if self.sHash is None : | ||
self.sHash = self.read_hash(BLOCKSIZE) | ||
if BLOCKSIZE > self.size: | ||
self.fHash = self.sHash | ||
|
||
def set_full(self): | ||
"""Sets the full hash of the file""" | ||
if self.fHash is None: | ||
self.fHash = self.read_hash(0) | ||
|
||
def say_me(self): | ||
"""Returns a tuple with pathname, size, sHash, fHash""" | ||
a = (self.path, self.size, self.sHash, self.fHash) | ||
return a | ||
|
||
|
||
class FileTree(object): | ||
""" | ||
This class stores the file tree which is basically a list of HashMaker objects | ||
Keyword Argument: | ||
compareDir -- Specifies which is the directory to be searched | ||
""" | ||
def __init__(self, compareDir): | ||
self.compareDir = os.path.normpath(compareDir) | ||
self.fileList = [] | ||
self.sizedList = [] | ||
self.sHList = [] | ||
self.fHList = [] | ||
self.get_files(self.compareDir) | ||
|
||
def get_files(self, cD): | ||
"""Recursive method reads all the files in the given directory""" | ||
for f in os.listdir(cD): | ||
fpath = self.full_path(os.path.join(cD,f)) | ||
if os.path.isfile(fpath): | ||
self.fileList.append(HashMaker(fpath)) | ||
elif os.path.isdir(fpath) : | ||
#if RECURSE is set to true we'll dig right down | ||
if RECURSE: | ||
#method calls itself on the subdir by joining the path and the subdir | ||
self.get_files(fpath) | ||
if VERBOSE : | ||
print(len(self.fileList)) | ||
|
||
def trim_by_size(self, chksize): | ||
"""Writes to sizedList a list with all the files having the right size""" | ||
self.sizedList = [] | ||
for f in self.fileList: | ||
if f.size == chksize: | ||
self.sizedList.append(f) | ||
|
||
def trim_by_sHash(self, chksHash): | ||
"""Writes to sHList all the files matching chksHash in the sizedList""" | ||
self.sHList = [] | ||
for f in self.sizedList: | ||
f.set_short() | ||
if f.sHash == chksHash: | ||
self.sHList.append(f) | ||
|
||
def trim_by_fHash(self, chkfHash): | ||
"""Writes to fHList all the files matching chksHash in the sHList""" | ||
self.fHList = [] | ||
for f in self.sHList : | ||
f.set_full() | ||
if f.fHash == chkfHash: | ||
self.fHList.append(f) | ||
|
||
def re_init(self): | ||
"""Re-vanillas the File Tree without deleting or re-seeking""" | ||
self.sizedList = [] | ||
self.sHList = [] | ||
self.fHList = [] | ||
|
||
def check(self, matchFile): | ||
mD = matchFile | ||
returnList = [] | ||
#mD will be a single HashMaker object | ||
self.re_init() | ||
self.trim_by_size(mD.size) | ||
if VERBOSE: | ||
print(str(len(self.sizedList))+" of " + | ||
str(len(self.fileList)) + " matched on size...") | ||
if len(self.sizedList) > 0: | ||
mD.set_short() | ||
self.trim_by_sHash(mD.sHash) | ||
if VERBOSE: | ||
print(str(len(self.sHList))+ " of " + | ||
str(len(self.sizedList)) + " matched on short hash") | ||
#if the trimmed short hash list has matches then continue and | ||
#trim by full hash | ||
if len(self.sHList) > 0: | ||
mD.set_full() | ||
self.trim_by_fHash(mD.fHash) | ||
if VERBOSE: | ||
print(str(len(self.fHList))+ " of " + | ||
str(len(self.sHList)) + " matched on full hash") | ||
else: | ||
#return no matches on semi-hash | ||
print("No Matches on semi-Hash") | ||
else: | ||
#return no matches on size | ||
print("No Matches on Size") | ||
for ff in self.fHList: | ||
if ff.path != mD.path: | ||
returnList.append(ff) | ||
else: | ||
print("Omitting self-detection match") | ||
return returnList | ||
|
||
def full_path(self, path): | ||
path = os.path.expandvars(os.path.normpath(path)) | ||
return path | ||
|
||
|
||
__author__ = "Marc Graham" | ||
__copyright__ = "Copyright 2013" | ||
|
||
|
Oops, something went wrong.