diff --git a/findDup.py b/findDup.py new file mode 100644 index 0000000..b9dadab --- /dev/null +++ b/findDup.py @@ -0,0 +1,42 @@ +import hashlib +import os +import sys + + +def getmd5(filename): + file_txt = open(filename, 'rb').read() + return hashlib.md5(file_txt).hexdigest() + + +def main(): + allfiles = sys.argv[1] + print("input file:%s"%allfiles) + # all_md5 = [] + # total_file = 0 + # total_delete = 0 + all_md5 = {} + for dir in os.listdir(allfiles): + path = os.path.join(allfiles,dir) + filemd5 = getmd5(path) + if filemd5 in all_md5: + print("dup file:%s\n"%dir) + print("origin pic:%s\n"%all_md5[filemd5]) + os.remove("%s/%s"%(allfiles, dir)) + else: + all_md5[filemd5] = dir + if filemd5 not in dir: + if ".png" in dir: + filemd5 = filemd5 + ".png" + if ".jpg" in dir: + filemd5 = filemd5 + ".png" + if ".webp" in dir: + filemd5 = filemd5 + ".webp" + if ".gif" in dir: + filemd5 = filemd5 + ".gif" + if ".jpeg" in dir: + filemd5 = filemd5 + ".jpeg" + os.rename(path, os.path.join(allfiles, filemd5)) + +if __name__ == '__main__': + main() +