How to parse the Manifest.mbdb file in an iOS 4.0 iTunes Backup

后端 未结 8 1080
小蘑菇
小蘑菇 2020-12-02 03:45

In iOS 4.0 Apple has redesigned the backup process.

iTunes used to store a list of filenames associated with backup files in the Manifest.plist file, but in iOS 4.

相关标签:
8条回答
  • 2020-12-02 04:21

    This python script is awesome.

    Here's my Ruby version of it (with minor improvement) and search capabilities. (for iOS 5)

    # encoding: utf-8
    require 'fileutils'
    require 'digest/sha1'
    
    class ManifestParser
      def initialize(mbdb_filename, verbose = false)
        @verbose = verbose
        process_mbdb_file(mbdb_filename)
      end
    
      # Returns the numbers of records in the Manifest files.
      def record_number
        @mbdb.size
      end
    
      # Returns a huge string containing the parsing of the Manifest files.
      def to_s
        s = ''
        @mbdb.each do |v|
          s += "#{fileinfo_str(v)}\n"
        end
        s
      end
    
      def to_file(filename)
        File.open(filename, 'w') do |f|
          @mbdb.each do |v|
            f.puts fileinfo_str(v)
          end
        end
      end
    
      # Copy the backup files to their real path/name.
      # * domain_match Can be a regexp to restrict the files to copy.
      # * filename_match Can be a regexp to restrict the files to copy.
      def rename_files(domain_match = nil, filename_match = nil)
        @mbdb.each do |v|
          if v[:type] == '-' # Only rename files.
            if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
              dst = "#{v[:domain]}/#{v[:filename]}"
              puts "Creating: #{dst}"
              FileUtils.mkdir_p(File.dirname(dst))
              FileUtils.cp(v[:fileID], dst)
            end
          end
        end
      end
    
      # Return the filename that math the given regexp.
      def search(regexp)
        result = Array.new
        @mbdb.each do |v|
          if "#{v[:domain]}::#{v[:filename]}" =~ regexp
            result << v
          end
        end
        result
      end
    
      private
      # Retrieve an integer (big-endian) and new offset from the current offset
      def getint(data, offset, intsize)
        value = 0
        while intsize > 0
          value = (value<<8) + data[offset].ord
          offset += 1
          intsize -= 1
        end
        return value, offset
      end
    
      # Retrieve a string and new offset from the current offset into the data
      def getstring(data, offset)
        return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
        length, offset = getint(data, offset, 2) # 2-byte length
        value = data[offset...(offset + length)]
        return value, (offset + length)
      end
    
      def process_mbdb_file(filename)
        @mbdb = Array.new
        data = File.open(filename, 'rb') { |f| f.read }
        puts "MBDB file read. Size: #{data.size}"
        raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
        offset = 4
        offset += 2 # value x05 x00, not sure what this is
        while offset < data.size
          fileinfo = Hash.new
          fileinfo[:start_offset] = offset
          fileinfo[:domain], offset = getstring(data, offset)
          fileinfo[:filename], offset = getstring(data, offset)
          fileinfo[:linktarget], offset = getstring(data, offset)
          fileinfo[:datahash], offset = getstring(data, offset)
          fileinfo[:unknown1], offset = getstring(data, offset)
          fileinfo[:mode], offset = getint(data, offset, 2)
          if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
            fileinfo[:type] = 'l'
          elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
            fileinfo[:type] = '-'
          elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
            fileinfo[:type] = 'd'
          else
            # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
            fileinfo[:type] = '?'
          end
          fileinfo[:unknown2], offset = getint(data, offset, 4)
          fileinfo[:unknown3], offset = getint(data, offset, 4)
          fileinfo[:userid], offset = getint(data, offset, 4)
          fileinfo[:groupid], offset = getint(data, offset, 4)
          fileinfo[:mtime], offset = getint(data, offset, 4)
          fileinfo[:atime], offset = getint(data, offset, 4)
          fileinfo[:ctime], offset = getint(data, offset, 4)
          fileinfo[:filelen], offset = getint(data, offset, 8)
          fileinfo[:flag], offset = getint(data, offset, 1)
          fileinfo[:numprops], offset = getint(data, offset, 1)
          fileinfo[:properties] = Hash.new
          (0...(fileinfo[:numprops])).each do |ii|
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo[:properties][propname] = propval
          end
          # Compute the ID of the file.
          fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
          fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
          # We add the file to the list of files.
          @mbdb << fileinfo
        end
        @mbdb
      end
    
      def modestr(val)
        def mode(val)
          r = (val & 0x4) ? 'r' : '-'
          w = (val & 0x2) ? 'w' : '-'
          x = (val & 0x1) ? 'x' : '-'
          r + w + x
        end
        mode(val >> 6) + mode(val >> 3) + mode(val)
      end
    
      def fileinfo_str(f)
        return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
        data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
        info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
        info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
        f[:properties].each do |k, v|
          info += " #{k}=#{v.inspect}"
        end
        info
      end
    end
    
    if __FILE__ == $0
      mp = ManifestParser.new 'Manifest.mbdb', true
      mp.to_file 'filenames.txt'
    end
    
    0 讨论(0)
  • 2020-12-02 04:24

    Thanks to galloglass' answer. The code works great with Python 2.7. There is only one thing I want to metion. When read the manifest.mbdb file, you should use binary mode. Otherwise, not all content are read.

    I also made some minor changes to make the code work with Python 3.4. Here is the code.

    #!/usr/bin/env python
    import sys
    import hashlib
    
    mbdx = {}
    
    def getint(data, offset, intsize):
        """Retrieve an integer (big-endian) and new offset from the current offset"""
        value = 0
        while intsize > 0:
            value = (value << 8) + data[offset]
            offset = offset + 1
            intsize = intsize - 1
        return value, offset
    
    def getstring(data, offset):
        """Retrieve a string and new offset from the current offset into the data"""
        if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
            return '', offset + 2  # Blank string
        length, offset = getint(data, offset, 2)  # 2-byte length
        value = data[offset:offset + length]
        return value.decode(encoding='latin-1'), (offset + length)
    
    def process_mbdb_file(filename):
        mbdb = {}  # Map offset of info in this file => file info
        data = open(filename, 'rb').read()  # 'b' is needed to read all content at once
        if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
        offset = 4
        offset = offset + 2  # value x05 x00, not sure what this is
        while offset < len(data):
            fileinfo = {}
            fileinfo['start_offset'] = offset
            fileinfo['domain'], offset = getstring(data, offset)
            fileinfo['filename'], offset = getstring(data, offset)
            fileinfo['linktarget'], offset = getstring(data, offset)
            fileinfo['datahash'], offset = getstring(data, offset)
            fileinfo['unknown1'], offset = getstring(data, offset)
            fileinfo['mode'], offset = getint(data, offset, 2)
            fileinfo['unknown2'], offset = getint(data, offset, 4)
            fileinfo['unknown3'], offset = getint(data, offset, 4)
            fileinfo['userid'], offset = getint(data, offset, 4)
            fileinfo['groupid'], offset = getint(data, offset, 4)
            fileinfo['mtime'], offset = getint(data, offset, 4)
            fileinfo['atime'], offset = getint(data, offset, 4)
            fileinfo['ctime'], offset = getint(data, offset, 4)
            fileinfo['filelen'], offset = getint(data, offset, 8)
            fileinfo['flag'], offset = getint(data, offset, 1)
            fileinfo['numprops'], offset = getint(data, offset, 1)
            fileinfo['properties'] = {}
            for ii in range(fileinfo['numprops']):
                propname, offset = getstring(data, offset)
                propval, offset = getstring(data, offset)
                fileinfo['properties'][propname] = propval
            mbdb[fileinfo['start_offset']] = fileinfo
            fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
            id = hashlib.sha1(fullpath.encode())
            mbdx[fileinfo['start_offset']] = id.hexdigest()
        return mbdb
    
    def modestr(val):
        def mode(val):
            if (val & 0x4):
                r = 'r'
            else:
                r = '-'
            if (val & 0x2):
                w = 'w'
            else:
                w = '-'
            if (val & 0x1):
                x = 'x'
            else:
                x = '-'
            return r + w + x
        return mode(val >> 6) + mode((val >> 3)) + mode(val)
    
    def fileinfo_str(f, verbose=False):
        if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
        if (f['mode'] & 0xE000) == 0xA000:
            type = 'l'  # symlink
        elif (f['mode'] & 0xE000) == 0x8000:
            type = '-'  # file
        elif (f['mode'] & 0xE000) == 0x4000:
            type = 'd'  # dir
        else:
            print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
            type = '?'  # unknown
        info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
                (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
                 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
        if type == 'l': info = info + ' -> ' + f['linktarget']  # symlink destination
        for name, value in f['properties'].items():  # extra properties
            info = info + ' ' + name + '=' + repr(value)
        return info
    
    verbose = True
    if __name__ == '__main__':
        mbdb = process_mbdb_file(
            r"Manifest.mbdb")
        for offset, fileinfo in mbdb.items():
            if offset in mbdx:
                fileinfo['fileID'] = mbdx[offset]
            else:
                fileinfo['fileID'] = "<nofileID>"
                print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
            print(fileinfo_str(fileinfo, verbose))
    
    0 讨论(0)
  • 2020-12-02 04:30

    In iOS 5, the Manifest.mbdx file was eliminated. For the purpose of this article, it was redundant anyway, because the domain and path are in Manifest.mbdb and the ID hash can be generated with SHA1.

    Here is my update of galloglass's code so it works with backups of iOS 5 devices. The only changes are elimination of process_mbdx_file() and addition of a few lines in process_mbdb_file().

    Tested with backups of an iPhone 4S and an iPad 1, both with plenty of apps and files.

    #!/usr/bin/env python
    import sys
    import hashlib
    
    mbdx = {}
    
    def getint(data, offset, intsize):
        """Retrieve an integer (big-endian) and new offset from the current offset"""
        value = 0
        while intsize > 0:
            value = (value<<8) + ord(data[offset])
            offset = offset + 1
            intsize = intsize - 1
        return value, offset
    
    def getstring(data, offset):
        """Retrieve a string and new offset from the current offset into the data"""
        if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
            return '', offset+2 # Blank string
        length, offset = getint(data, offset, 2) # 2-byte length
        value = data[offset:offset+length]
        return value, (offset + length)
    
    def process_mbdb_file(filename):
        mbdb = {} # Map offset of info in this file => file info
        data = open(filename).read()
        if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
        offset = 4
        offset = offset + 2 # value x05 x00, not sure what this is
        while offset < len(data):
            fileinfo = {}
            fileinfo['start_offset'] = offset
            fileinfo['domain'], offset = getstring(data, offset)
            fileinfo['filename'], offset = getstring(data, offset)
            fileinfo['linktarget'], offset = getstring(data, offset)
            fileinfo['datahash'], offset = getstring(data, offset)
            fileinfo['unknown1'], offset = getstring(data, offset)
            fileinfo['mode'], offset = getint(data, offset, 2)
            fileinfo['unknown2'], offset = getint(data, offset, 4)
            fileinfo['unknown3'], offset = getint(data, offset, 4)
            fileinfo['userid'], offset = getint(data, offset, 4)
            fileinfo['groupid'], offset = getint(data, offset, 4)
            fileinfo['mtime'], offset = getint(data, offset, 4)
            fileinfo['atime'], offset = getint(data, offset, 4)
            fileinfo['ctime'], offset = getint(data, offset, 4)
            fileinfo['filelen'], offset = getint(data, offset, 8)
            fileinfo['flag'], offset = getint(data, offset, 1)
            fileinfo['numprops'], offset = getint(data, offset, 1)
            fileinfo['properties'] = {}
            for ii in range(fileinfo['numprops']):
                propname, offset = getstring(data, offset)
                propval, offset = getstring(data, offset)
                fileinfo['properties'][propname] = propval
            mbdb[fileinfo['start_offset']] = fileinfo
            fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
            id = hashlib.sha1(fullpath)
            mbdx[fileinfo['start_offset']] = id.hexdigest()
        return mbdb
    
    def modestr(val):
        def mode(val):
            if (val & 0x4): r = 'r'
            else: r = '-'
            if (val & 0x2): w = 'w'
            else: w = '-'
            if (val & 0x1): x = 'x'
            else: x = '-'
            return r+w+x
        return mode(val>>6) + mode((val>>3)) + mode(val)
    
    def fileinfo_str(f, verbose=False):
        if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
        if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
        elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
        elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
        else: 
            print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
            type = '?' # unknown
        info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
                (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
                 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
        if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
        for name, value in f['properties'].items(): # extra properties
            info = info + ' ' + name + '=' + repr(value)
        return info
    
    verbose = True
    if __name__ == '__main__':
        mbdb = process_mbdb_file("Manifest.mbdb")
        for offset, fileinfo in mbdb.items():
            if offset in mbdx:
                fileinfo['fileID'] = mbdx[offset]
            else:
                fileinfo['fileID'] = "<nofileID>"
                print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
            print fileinfo_str(fileinfo, verbose)
    
    0 讨论(0)
  • 2020-12-02 04:33

    For those looking for a Java implementation of a MBDB file reader, there are several out there:

    • "iPhone Analyzer" project (very clean code): http://sourceforge.net/p/iphoneanalyzer/code/HEAD/tree/trunk/library/src/main/java/com/crypticbit/ipa/io/parser/manifest/Mbdb.java

    • "iPhone Stalker" project: https://code.google.com/p/iphonestalker/source/browse/trunk/src/iphonestalker/util/io/MBDBReader.java

    0 讨论(0)
  • 2020-12-02 04:36

    You can find information and a little description of the MBDB/MBDX format here:

    http://code.google.com/p/iphonebackupbrowser/

    This is my application to browse the backup files. I have tried to document the format of the new files that come with iTunes 9.2.

    0 讨论(0)
  • 2020-12-02 04:37

    Thank you, user374559 and reneD -- that code and description is very helpful.

    My stab at some Python to parse and print out the information in a Unix ls-l like format:

    #!/usr/bin/env python
    import sys
    
    def getint(data, offset, intsize):
        """Retrieve an integer (big-endian) and new offset from the current offset"""
        value = 0
        while intsize > 0:
            value = (value<<8) + ord(data[offset])
            offset = offset + 1
            intsize = intsize - 1
        return value, offset
    
    def getstring(data, offset):
        """Retrieve a string and new offset from the current offset into the data"""
        if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
            return '', offset+2 # Blank string
        length, offset = getint(data, offset, 2) # 2-byte length
        value = data[offset:offset+length]
        return value, (offset + length)
    
    def process_mbdb_file(filename):
        mbdb = {} # Map offset of info in this file => file info
        data = open(filename).read()
        if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
        offset = 4
        offset = offset + 2 # value x05 x00, not sure what this is
        while offset < len(data):
            fileinfo = {}
            fileinfo['start_offset'] = offset
            fileinfo['domain'], offset = getstring(data, offset)
            fileinfo['filename'], offset = getstring(data, offset)
            fileinfo['linktarget'], offset = getstring(data, offset)
            fileinfo['datahash'], offset = getstring(data, offset)
            fileinfo['unknown1'], offset = getstring(data, offset)
            fileinfo['mode'], offset = getint(data, offset, 2)
            fileinfo['unknown2'], offset = getint(data, offset, 4)
            fileinfo['unknown3'], offset = getint(data, offset, 4)
            fileinfo['userid'], offset = getint(data, offset, 4)
            fileinfo['groupid'], offset = getint(data, offset, 4)
            fileinfo['mtime'], offset = getint(data, offset, 4)
            fileinfo['atime'], offset = getint(data, offset, 4)
            fileinfo['ctime'], offset = getint(data, offset, 4)
            fileinfo['filelen'], offset = getint(data, offset, 8)
            fileinfo['flag'], offset = getint(data, offset, 1)
            fileinfo['numprops'], offset = getint(data, offset, 1)
            fileinfo['properties'] = {}
            for ii in range(fileinfo['numprops']):
                propname, offset = getstring(data, offset)
                propval, offset = getstring(data, offset)
                fileinfo['properties'][propname] = propval
            mbdb[fileinfo['start_offset']] = fileinfo
        return mbdb
    
    def process_mbdx_file(filename):
        mbdx = {} # Map offset of info in the MBDB file => fileID string
        data = open(filename).read()
        if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
        offset = 4
        offset = offset + 2 # value 0x02 0x00, not sure what this is
        filecount, offset = getint(data, offset, 4) # 4-byte count of records 
        while offset < len(data):
            # 26 byte record, made up of ...
            fileID = data[offset:offset+20] # 20 bytes of fileID
            fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
            offset = offset + 20
            mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
            mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
            mode, offset = getint(data, offset, 2) # 2-byte mode field
            mbdx[mbdb_offset] = fileID_string
        return mbdx
    
    def modestr(val):
        def mode(val):
            if (val & 0x4): r = 'r'
            else: r = '-'
            if (val & 0x2): w = 'w'
            else: w = '-'
            if (val & 0x1): x = 'x'
            else: x = '-'
            return r+w+x
        return mode(val>>6) + mode((val>>3)) + mode(val)
    
    def fileinfo_str(f, verbose=False):
        if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
        if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
        elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
        elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
        else: 
            print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
            type = '?' # unknown
        info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
                (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
                 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
        if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
        for name, value in f['properties'].items(): # extra properties
            info = info + ' ' + name + '=' + repr(value)
        return info
    
    verbose = True
    if __name__ == '__main__':
        mbdb = process_mbdb_file("Manifest.mbdb")
        mbdx = process_mbdx_file("Manifest.mbdx")
        for offset, fileinfo in mbdb.items():
            if offset in mbdx:
                fileinfo['fileID'] = mbdx[offset]
            else:
                fileinfo['fileID'] = "<nofileID>"
                print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
            print fileinfo_str(fileinfo, verbose)
    
    0 讨论(0)
提交回复
热议问题