1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
import magic, os, stat import re import codecs def is_binary_file_1(file_path): ''' 根据text文件数据类型判断是否是二进制文件 :param ff: 文件名(含路径) :return: True或False,返回是否是二进制文件 ''' TEXT_BOMS = ( codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE, codecs.BOM_UTF8, ) with open(file_path, 'rb') as file: CHUNKSIZE = 8192 initial_bytes = file.read(CHUNKSIZE) file.close() return not any(initial_bytes.startswith(bom) for bom in TEXT_BOMS) and b'\0' in initial_bytes def is_binary_file_2(ff): ''' 根据magic文件的魔术判断是否是二进制文件 :param ff: 文件名(含路径) :return: True或False,返回是否是二进制文件 ''' mime_kw = 'x-executablex-sharedliboctet-streamx-object' try: magic_mime = magic.from_file(ff, mime=True) magic_hit = re.search(mime_kw, magic_mime, re.I) if magic_hit: return True else: return False except Exception as e: return False def is_ELFfile(filepath): if not os.path.exists(filepath): logger.info('file path {} doesnot exits'.format(filepath)) return False try: FileStates = os.stat(filepath) FileMode = FileStates[stat.ST_MODE] if not stat.S_ISREG(FileMode) or stat.S_ISLNK(FileMode): return False with open(filepath, 'rb') as f: header = (bytearray(f.read(4))[1:4]).decode(encoding="utf-8") if header in ["ELF"]: return True except UnicodeDecodeError as e: pass return False def is_binary_file(filepath): return any((is_binary_file_1(filepath), is_binary_file_2(filepath), is_ELFfile(filepath)))
|