diff --git a/Find these files.pptx b/Find these files.pptx new file mode 100644 index 0000000..b53e23a Binary files /dev/null and b/Find these files.pptx differ diff --git a/FindTheseFiles-0.91-win32/build/findthesefiles0.9.1.zip b/FindTheseFiles-0.91-win32/build/findthesefiles0.9.1.zip new file mode 100644 index 0000000..5b2b349 Binary files /dev/null and b/FindTheseFiles-0.91-win32/build/findthesefiles0.9.1.zip differ diff --git a/ftfengine.py b/ftfengine.py new file mode 100644 index 0000000..8f5e39c --- /dev/null +++ b/ftfengine.py @@ -0,0 +1,327 @@ +"""script to check if a file matches those in a directory +Marc Graham + Version 0.6.1 - + Improvements: + - Added the Controler class (to do the non-UI functions being done by UI) +Bugs sqaushed: + - Danger detection was not working properly, had to flip around the relpath + - Various NoneType errors causing crashes in exe file +To be improved: + - Do some file operations? +""" + +import hashlib, os + +BLOCKSIZE = 65536 +RECURSE = True +VERBOSE = True +#Really VERBOSE +RVERBOSE = False + +class Controler(object): + """This class provides the functions and variables that interface between + UI and engine. This was introduced to really pull all the non-UI stuff + from ftf, so that a change in user interface would be possible in the future + + It has variables: + self.filesToFind -- A list of files (HashMaker obj) to be looked for (left pane) + Structure is [hM, matches] + self.searchDir -- The directory to be searched (right pane) + self.selectedFile -- The file in filesToFind that we're looking for (?) + self.matches -- The matches corresponding to that file + self.danger -- if the added files are inside within the search dir + """ + + def __init__(self): + self.filesToFind = [] + self.searchDir = None + self.selectedFile = None + self.matches = None + self.danger = False + + def add_file(self, path): + path = self.full_path(path) + if os.path.isfile(path): + exists = False + for f in self.filesToFind: + if f[0].path == path: + exists += 1 + if not exists: + h = HashMaker(path) + self.filesToFind.append([h,None]) + #check that the file is not within the search + if self.searchDir is not None: + if self.is_subdir(self.searchDir.compareDir, path): + self.danger = True + print("Danger!! Searched File is inside the search Dir!") + if VERBOSE: + print(self.searchDir.compareDir, path) + if RVERBOSE: + print("Added "+path) + return True + else : + if VERBOSE: + print("Already exists, "+path) + return False + else: + if VERBOSE: + print("Not a file, moron! "+path) + return False + + def is_subdir(self, path, directory): + """Checks if we're searching in the same directory""" + path = self.full_path(path) + directory = self.full_path(directory) + #True if it is not a subdir + notSub = None + #if the drive letter is not the same it can't be in the same path + #relpath will crash if they are on different drives. + if RVERBOSE : + print(path[0], directory[0]) + if path[0] != directory[0]: + notSub = True + else: + relative = os.path.normpath(os.path.relpath(directory, path)) + notSub = relative.startswith(os.pardir + os.sep) + #a .. will not be made if the file is in the base dir + if (self.full_path(os.path.dirname(path))==directory): + notSub = False + if VERBOSE: + print(self.full_path(os.path.dirname(path)) + + " is " + directory) + if notSub: + return False + else: + if VERBOSE: + print("Danger... ") + return True + + def get_matches(self, path): + """Returns matches to a certain path""" + if self.searchDir is not None: + path = self.full_path(path) + for f in self.filesToFind: + if path == f[0].path: + f[1] = None + matches = self.searchDir.check(f[0]) + f[1] = matches[:] + return f[1] + + + def clear_matches(self): + """Clears the matches (like for when changing search directories""" + for f in self.filesToFind: + f[1] = None + + def set_search_dir(self, dir): + """Sets the directory to be searched, taking input from the UI + + Keyword Argument: + dir -- The path to the directory to be searched + """ + self.danger = False + path = self.full_path(dir) + if self.searchDir is not None: + if RVERBOSE: + print("Setting new search dir "+path) + self.searchDir = FileTree(path) + if len(self.filesToFind) != 0: + for f in self.filesToFind: + if self.is_subdir(self.searchDir.compareDir, f[0].path): + print("Danger! Searched File is in the search Dir!") + if VERBOSE: + print((self.searchDir.compareDir, f[0].path)) + self.danger = True + + else : + if VERBOSE: + print("Resetting search dir to: "+path) + self.searchDir = None + self.clear_matches() + self.searchDir = FileTree(path) + if len(self.filesToFind) != 0: + if VERBOSE: + print("Checking for auto-inclusions...") + for f in self.filesToFind: + print(self.searchDir.compareDir, f[0].path) + if self.is_subdir(self.searchDir.compareDir, f[0].path): + print("Danger!!! Searched File is in the search Dir!") + if VERBOSE: + print((self.searchDir.compareDir, f[0].path)) + self.danger = True + + + def clear_left(self): + """Resets the files to be found to as new conditions""" + self.filesToFind = [] + self.selectedFile = None + self.matches = None + + def clear_right(self): + self.clear_matches() + self.searchDir = None + + def full_path(self, path): + path = os.path.expandvars(os.path.normpath(path)) + return path + + +class HashMaker(object): + """This class defines the basic storage unit for files which may or may not be hashed + + Keyword Argument: + path -- the path to the file (directory only) + name -- the name of the file at that location + shortHash -- able to specify the hash of the first BLOCKSIZE characters + fullHash -- able to specify the full hash of the file if you've got it + matchMe -- a list of matching HashMaker objects (might remove this) + + """ + + def __init__(self, path, shortHash=None, fullHash=None): + self.path = path + self.name = os.path.basename(path) + self.size = os.path.getsize(self.path) + self.sHash = shortHash + self.fHash = fullHash + self.matchMe = [] + + def read_hash(self,block): + """ + Reads the hash of the file up to the block + Keyword Arguments: + block -- the number of bytes to read, 0 if all + """ + ifile = self.path + block = int(block) + hasher = hashlib.md5() + with open(ifile, 'rb') as source: + if block == 0 : + buf = source.read() + else: + buf = source.read(block) + hasher.update(buf) + return(hasher.hexdigest()) + + def set_short(self): + """Sets the hash of the first BLOCKSIZE bytes of the file""" + if self.sHash is None : + self.sHash = self.read_hash(BLOCKSIZE) + if BLOCKSIZE > self.size: + self.fHash = self.sHash + + def set_full(self): + """Sets the full hash of the file""" + if self.fHash is None: + self.fHash = self.read_hash(0) + + def say_me(self): + """Returns a tuple with pathname, size, sHash, fHash""" + a = (self.path, self.size, self.sHash, self.fHash) + return a + + +class FileTree(object): + """ + This class stores the file tree which is basically a list of HashMaker objects + + Keyword Argument: + compareDir -- Specifies which is the directory to be searched + + """ + def __init__(self, compareDir): + self.compareDir = os.path.normpath(compareDir) + self.fileList = [] + self.sizedList = [] + self.sHList = [] + self.fHList = [] + self.get_files(self.compareDir) + + def get_files(self, cD): + """Recursive method reads all the files in the given directory""" + for f in os.listdir(cD): + fpath = self.full_path(os.path.join(cD,f)) + if os.path.isfile(fpath): + self.fileList.append(HashMaker(fpath)) + elif os.path.isdir(fpath) : + #if RECURSE is set to true we'll dig right down + if RECURSE: + #method calls itself on the subdir by joining the path and the subdir + self.get_files(fpath) + if VERBOSE : + print(len(self.fileList)) + + def trim_by_size(self, chksize): + """Writes to sizedList a list with all the files having the right size""" + self.sizedList = [] + for f in self.fileList: + if f.size == chksize: + self.sizedList.append(f) + + def trim_by_sHash(self, chksHash): + """Writes to sHList all the files matching chksHash in the sizedList""" + self.sHList = [] + for f in self.sizedList: + f.set_short() + if f.sHash == chksHash: + self.sHList.append(f) + + def trim_by_fHash(self, chkfHash): + """Writes to fHList all the files matching chksHash in the sHList""" + self.fHList = [] + for f in self.sHList : + f.set_full() + if f.fHash == chkfHash: + self.fHList.append(f) + + def re_init(self): + """Re-vanillas the File Tree without deleting or re-seeking""" + self.sizedList = [] + self.sHList = [] + self.fHList = [] + + def check(self, matchFile): + mD = matchFile + returnList = [] + #mD will be a single HashMaker object + self.re_init() + self.trim_by_size(mD.size) + if VERBOSE: + print(str(len(self.sizedList))+" of " + + str(len(self.fileList)) + " matched on size...") + if len(self.sizedList) > 0: + mD.set_short() + self.trim_by_sHash(mD.sHash) + if VERBOSE: + print(str(len(self.sHList))+ " of " + + str(len(self.sizedList)) + " matched on short hash") + #if the trimmed short hash list has matches then continue and + #trim by full hash + if len(self.sHList) > 0: + mD.set_full() + self.trim_by_fHash(mD.fHash) + if VERBOSE: + print(str(len(self.fHList))+ " of " + + str(len(self.sHList)) + " matched on full hash") + else: + #return no matches on semi-hash + print("No Matches on semi-Hash") + else: + #return no matches on size + print("No Matches on Size") + for ff in self.fHList: + if ff.path != mD.path: + returnList.append(ff) + else: + print("Omitting self-detection match") + return returnList + + def full_path(self, path): + path = os.path.expandvars(os.path.normpath(path)) + return path + + +__author__ = "Marc Graham" +__copyright__ = "Copyright 2013" + + diff --git a/ftfui.py b/ftfui.py new file mode 100644 index 0000000..035b1f1 --- /dev/null +++ b/ftfui.py @@ -0,0 +1,401 @@ +""" +User Interface for Find These Files +Version 0.9.1 +Improvements/Changes Made in 0.9: + - The lines between UI and engine were blurred, needed to move a lot of the + calculating functions back into ftfengine + - Renamed to ftfui +Bugs Squished: + - Fixed some NoneType errors causing crashes in exe + - Fixed Danger operation, looks pretty good now +Improvements to come: + - Need to add some easy functions like Move duplicated files to a folder + +""" + +import os +import ftfengine as ftfe +import tkinter as tk +from tkinter import messagebox +from tkinter import filedialog as fd +from tkinter import ttk + +VERBOSE = False +#Really VERBOSE +RVERBOSE = False + +class UIWindow(tk.Tk): + """This is the container class for the Tkinter app. + + Variables used in here are + - self.parent which is a reference to the parent window + - self.file_names which is a text list of file_names to be checked for + - self.filesToFind which is a list of lists [name, Hashmaker object] + - self.dir_name is the text name for the base dir we want to look in + - lastDir is a place holder to keep the last directory + - isFileSelected is a boolean for whether anything has been added to + the left window + - isFolderSelected is a boolean for having selected a folder to search + - treeList is a FileTree object for the searched dir + - self.selectedFile is the HashMaker object for the file selected (left) + - sames is the list of matches as HashMaker objects + """ + + def __init__(self,parent): + tk.Tk.__init__(self,parent) + self.parent = parent + self.file_names = None + self.dir_name = None + self.lastdir = None + self.isFileSelected = False + self.isFolderSelected = False + self.ctrl = ftfe.Controler() + self.initialize() + + def initialize(self): + + self.topGroup= tk.LabelFrame(self.parent) + self.topGroup.pack(pady=5, padx=5, fill='x', expand='no') + + self.leftGroup = tk.LabelFrame(self.topGroup, + text="These are the files you want find...") + self.leftGroup.pack(side='left') + + self.addFiles = tk.Button( + self.leftGroup,text="Add file(s) individually...", + command=lambda: self.file_pick()) + self.addFiles.pack(side='left', padx=2, pady=5) + + self.addDirLeftB = tk.Button(self.leftGroup, + text="Add a whole directory", + command=lambda: self.add_folder_select()) + self.addDirLeftB.pack(side='left', padx=2, pady=5) + + self.clearEntries = tk.Button(self.leftGroup, + text="Clear list", + command=lambda: self.clear_left()) + self.clearEntries.pack(side='right', padx=2, pady=5) + + self.rightGroup = tk.LabelFrame(self.topGroup, + text="This is the directory you want to search...") + self.rightGroup.pack(side='right') + + self.selSearchDir = tk.Button(self.rightGroup, + text="Select Search Directory... ", + command=lambda: self.browse_dir()) + self.selSearchDir.pack(padx=2, pady=5) + + #self.state = tk.Label(self.parent, text="") + #self.state.pack(side='top') + #add a Tree Group + self.tg = tk.LabelFrame(self.parent, text="Files", height=400, padx=5, pady=5) + self.tg.pack(padx=5, pady=5, side='bottom', fill='both', expand='yes') + + #add the left treeviewer + + self.tree = ttk.Treeview(self.tg,selectmode="browse",columns=('Matches')) + self.tree.pack(fill='y', side='left', padx=5) + self.tree.heading("#0", text="File") + self.tree.column("#0",minwidth=200,width=self.max_el_size(),stretch='YES') + self.tree.heading('Matches', text="Matches") + self.tree.column("Matches",minwidth=25,stretch='NO',width=40) + self.tree.bind("", self.on_double_click) + self.tree.bind("<>", self.send_selected) + + + """self.tree.heading("A", text="Size") + self.tree.column("A",minwidth=0,width=200, stretch='NO') + self.tree.heading("B", text="Matches") + self.tree.column("B",minwidth=0,width=300)""" + + #make a tree on the right side + self.rTree = ttk.Treeview(self.tg,selectmode="browse") + self.rTree.pack(expand='YES', fill='both', side='left', padx=5) + self.rTree.heading("#0", text="Please load a search directory...") + self.rTree.column("#0",minwidth=500,width=500, stretch='YES') + self.rTree.bind("", self.on_double_click2) + self.rTree.bind("<>", self.printSames) + + def full_path(self, path): + path = os.path.expandvars(os.path.normpath(path)) + return path + + def clear_left(self): + "clears all entries on the left and resets, keeps the treeList" + #clear left + self.file_names = None + self.isFileSelected = False + self.ctrl.clear_left() + for i in self.tree.get_children(): + self.tree.delete(i) + #clear right + for i in self.rTree.get_children(): + self.rTree.delete(i) + + def browse_dir(self): + if self.dir_name is None: + self.right_dir_sel() + else: + if messagebox.askyesno("Woah!", + "Do you want to clear all the search results"+ + " and load a new directory?"): + self.dir_name = None + self.ctrl.clear_right() + for i in self.rTree.get_children(): + self.rTree.delete(i) + self.right_dir_sel() + + + def printSames(self, event): + item = self.rTree.selection()[0] + print("item is ",item) + print("text is ", self.rTree.item(item, "text")) + print() + + def max_el_size(self): + treemaxwidth = 0 + for i in self.tree.get_children(): + if i > treemaxwidth : + treemaxwidth = i + return treemaxwidth+400 + + def on_double_click(self, event): + item = self.tree.selection()[0] + os.system("explorer.exe /select," + item) + + def on_double_click2(self, event): + item = self.rTree.selection()[0] + os.system("explorer.exe /select," + item) + + def file_pick(self): + """ + Opens a popup dialog to select one or more files. Stores those names in + self.file_names. Sends them to the controler. Sets + states for isFileSelected. + """ + self.file_names = fd.askopenfilenames( + title='Choose one or more files', + initialdir=self.lastdir) + self.file_names = self.splitlist(self.file_names) + new_names = [] + for i in self.file_names : + #had to do this due to wierd behaviour with mounted drives + j = os.path.join(os.path.split(i)[0],os.path.split(i)[1]) + j = self.full_path(j) + new_names.append(j) + self.file_names = new_names + if len(self.file_names) == 0: + self.file_names = ['.'] + if self.file_names[0] != '.': + self.lastdir = os.path.dirname(self.file_names[0]) + #make full path + self.lastdir = self.full_path(self.lastdir) + for f in self.file_names: + f = self.full_path(f) #expands the path where $home might be present + if not self.tree.exists(f): + self.ctrl.add_file(f) + self.tree.insert('', 'end', f, text=os.path.basename(f)) + self.isFileSelected = True + elif len(self.tree.get_children("")) > 0: + self.isFileSelected = True + else: + self.isFileSelected = False + + + def right_dir_sel(self): + self.dir_name = fd.askdirectory(initialdir=self.lastdir) + if self.dir_name : + self.rTree.heading("#0", text="Loading Directory") + self.lastdir = self.full_path(self.dir_name) + #make things normalised + i = self.dir_name + #stops weird stuff happening with mapped drives + j = os.path.join(os.path.split(i)[0],os.path.split(i)[1]) + j = self.full_path(j) + self.rTree.insert('', 'end', j, text=j) + self.ctrl.set_search_dir(j) + #clear the matches + for i in self.tree.get_children(): + self.tree.item(i, values=("")) + self.pop_folders(j) + self.isFolderSelected = True + self.rTree.heading("#0", text=str("Showing results for "+ + os.path.normpath(self.dir_name))) + else: + self.dir_name = None + + + + def pop_folders(self, dirName): + """Method to add a folder to the RIGHT tree, with hierarchy + """ + j = os.path.expandvars(dirName) + #check if dirname is a dir + if os.path.isdir(j): + #get the list of directories + dirList = os.listdir(j) + #do dirs first + for e in dirList: + full = self.full_path((os.path.join(j,e))) + if os.path.isdir(full): + self.rTree.insert(j, 'end', full, text=e, tags=('dir',)) + self.pop_folders(full) + #now do files, no need to add files to a hash list + for e in dirList: + full = os.path.expandvars(os.path.join(j,e)) + if os.path.isfile(full): + self.rTree.insert(j, 'end', full, text=e, tags=('file',)) + else: + print("Not a directory") + + def add_folder_select(self): + leftDirName = fd.askdirectory(initialdir=self.lastdir) + if leftDirName : + self.lastdir = self.full_path(leftDirName) + #make things normalised + i = leftDirName + j = os.path.join(os.path.split(i)[0],os.path.split(i)[1]) + j = self.full_path(j) + if not self.tree.exists(j): + self.tree.insert('', 'end', j, text=j) + self.add_folder(j) + + def add_folder(self, dirName): + """Method to add a folder to the left tree, with hierarchy + """ + j = self.full_path(dirName) + #check if dirname is a dir + if os.path.isdir(j): + #get the list of directories + dirList = os.listdir(j) + #do dirs first + for e in dirList: + full = self.full_path(os.path.join(j,e)) + if os.path.isdir(full): + if not self.tree.exists(full): + self.tree.insert(j, 'end', full, text=e, tags=('dir',)) + self.add_folder(full) + else: + self.tree.delete(full) + self.tree.insert(j, 'end', full, text=e, tags=('dir',)) + self.add_folder(full) + #now do files + for e in dirList: + full = self.full_path(os.path.join(j,e)) + if os.path.isfile(full): + if not self.tree.exists(full): + self.tree.insert(j, 'end', full, text=e, tags=('file',)) + self.ctrl.add_file(full) + else: + if RVERBOSE: + print("Adding "+full+" but it already exists") + self.tree.delete(full) + self.tree.insert(j, 'end', full, text=e, tags=('file',)) + self.ctrl.add_file(full) + else: + if VERBOSE: + print("Not a directory") + + def send_selected(self, event): + """Only does action to right side if a file is selected""" + item = self.tree.selection()[0] + if os.path.isfile(item): + self.show_sames(self.full_path(item)) + + def show_sames(self, item): + """Called by send_selected, this method makes the right side list + become a tree of matches. It's input is item, which is found on + the left list. + """ + if (self.dir_name is not None): + self.rTree.tag_configure('highlight', foreground='black') + for i in self.rTree.get_children(): + self.rTree.item(i, open=False, tags=()) + if VERBOSE: + print(self.tree.selection()) + #clear last result + for e in self.rTree.get_children(): + self.rTree.delete(e) + #find matches (uses ftfe) + matchList = self.ctrl.get_matches(item) + numMatches = len(matchList) + if len(matchList) > 0: + self.do_match(matchList) + else : + self.rTree.insert('', 'end', + text="No matches.") + self.tree.item(item, values=(str(numMatches))) + else: + if messagebox.askyesno("No search directory loaded", + "Would you like to load a new search directory?"): + self.browse_dir() + + def do_match(self, matchList): + """This really just clears the right list, sets up the list of matches + and palms them off to add_match for insertion + """ + for i in self.rTree.get_children(): + self.rTree.delete(i) + rootNode = str(str(len(matchList))+" Matches found in... " + +str(os.path.normpath(self.dir_name))) + self.rTree.insert('', 'end', + self.full_path(self.dir_name), + text=rootNode) + for f in matchList: + self.add_match(f, self.full_path(self.dir_name)) + self.rTree.tag_configure('idem', foreground='red') + + def add_match(self, match, root): + #remove the trunk path from the path + spath = self.rem_bdir(os.path.normpath(root), match.path) + top = self.get_top(spath) + path = os.path.join(root,top) + while os.path.isdir(path): + if not self.rTree.exists(path): + self.rTree.insert(root, 'end', path, text=top) + root = path + spath = self.rem_bdir(os.path.normpath(root), match.path) + top = self.get_top(spath) + path = os.path.join(root,top) + item = self.tree.selection()[0] + if match.path == item: + print("Major Error returning same path matches") + else: + self.rTree.insert(root, 'end', path, text=top) + self.rTree.see(path) + + def get_top(self, spath): + split = os.path.split(spath) + top = None + while split[0] != '\\': + split = os.path.split(split[0]) + if split[0] == '\\': + top = split[1] + if top is None: + top = os.path.split(spath)[1] + return top + + def rem_bdir(self, basedir, path): + """Returns the path without the hanging basedir""" + path = os.path.normpath(path) + basedir = os.path.normpath(basedir) + smallpath="" + while path != basedir: + split = os.path.split(path) + if os.path.isfile(path): + smallpath = "\\" + split[1] + else : + smallpath = "\\" + split[1] + "\\" + smallpath + path = os.path.normpath(split[0]) + return self.full_path(smallpath) + + +if __name__ == "__main__": + app = UIWindow(None) + app.title('Find these files') + app.mainloop() + + +__author__ = "Marc Graham" +__copyright__ = "Copyright 2013" +__version__ = "0.9" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4557b3e --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +import sys +from cx_Freeze import setup, Executable + +setup( + name = "Find These Files", + version = "0.8", + description = "Finds a file in a folder even if the names are different", + executables = [Executable("ftfui.py", base = "Win32GUI")])