Switched from struct.unpack (module) -> Struct.unpack (class), it compiles the format string and reduces parsing costs. Coalesced a

few unpack calls into a compound unpack call. Moved the functionality to get a list of valid chunks into get_chunks out from get_chunk_info.
2011-03-23 16:44:27 +08:00
parent c1b7b12592
commit dbdd5d0fc8
1 changed files with 385 additions and 356 deletions
--- a/nbt.py
+++ b/nbt.py
@@ -1,356 +1,385 @@
-#    This file is part of the Minecraft Overviewer.
+#    This file is part of the Minecraft Overviewer.
-#
+#
-#    Minecraft Overviewer is free software: you can redistribute it and/or
+#    Minecraft Overviewer is free software: you can redistribute it and/or
-#    modify it under the terms of the GNU General Public License as published
+#    modify it under the terms of the GNU General Public License as published
-#    by the Free Software Foundation, either version 3 of the License, or (at
+#    by the Free Software Foundation, either version 3 of the License, or (at
-#    your option) any later version.
+#    your option) any later version.
-#
+#
-#    Minecraft Overviewer is distributed in the hope that it will be useful,
+#    Minecraft Overviewer is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-#    Public License for more details.
+#    Public License for more details.
-#
+#
-#    You should have received a copy of the GNU General Public License along
+#    You should have received a copy of the GNU General Public License along
-#    with the Overviewer.  If not, see <http://www.gnu.org/licenses/>.
+#    with the Overviewer.  If not, see <http://www.gnu.org/licenses/>.
-
+
-import gzip, zlib
+import gzip, zlib
-import struct
+import struct
-import StringIO
+import StringIO
-import os
+import os
-
+
-# decorator to handle filename or object as first parameter
+# decorator to handle filename or object as first parameter
-def _file_loader(func):
+def _file_loader(func):
-    def wrapper(fileobj, *args):
+    def wrapper(fileobj, *args):
-        if isinstance(fileobj, basestring):
+        if isinstance(fileobj, basestring):
-            if not os.path.isfile(fileobj):
+            if not os.path.isfile(fileobj):
-               return None
+               return None
-
+
-            # Is actually a filename
+            # Is actually a filename
-            fileobj = open(fileobj, 'rb')
+            fileobj = open(fileobj, 'rb',4096)
-        return func(fileobj, *args)
+        return func(fileobj, *args)
-    return wrapper
+    return wrapper
-
+
-@_file_loader
+@_file_loader
-def load(fileobj):
+def load(fileobj):
-    return NBTFileReader(fileobj).read_all()
+    return NBTFileReader(fileobj).read_all()
-
+
-def load_from_region(filename, x, y):
+def load_from_region(filename, x, y):
-    nbt = load_region(filename).load_chunk(x, y)
+    nbt = load_region(filename).load_chunk(x, y)
-    if nbt is None:
+    if nbt is None:
-        return None ## return none.  I think this is who we should indicate missing chunks
+        return None ## return none.  I think this is who we should indicate missing chunks
-        #raise IOError("No such chunk in region: (%i, %i)" % (x, y))     
+        #raise IOError("No such chunk in region: (%i, %i)" % (x, y))     
-    return nbt.read_all()
+    return nbt.read_all()
-  
+  
-def load_region(filename):            
+def load_region(filename):            
-    return MCRFileReader(filename)
+    return MCRFileReader(filename)
-  
+  
-class NBTFileReader(object):
+  
-    def __init__(self, fileobj, is_gzip=True):
+# compile the unpacker's into a classes
-        if is_gzip:
+_byte   = struct.Struct("b")
-            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
+_short  = struct.Struct(">h")
-        else:
+_int    = struct.Struct(">i")
-            # pure zlib stream -- maybe later replace this with
+_long   = struct.Struct(">q")
-            # a custom zlib file object?
+_float  = struct.Struct(">f")
-            data = zlib.decompress(fileobj.read())
+_double = struct.Struct(">d") 
-            self._file = StringIO.StringIO(data)
+
-
+_24bit_int = struct.Struct("B B B")
-    # These private methods read the payload only of the following types
+_unsigned_byte = struct.Struct("B")
-    def _read_tag_end(self):
+_unsigned_int = struct.Struct(">I")
-        # Nothing to read
+_chunk_header = struct.Struct(">I B")
-        return 0
+ 
-
+class NBTFileReader(object):
-    def _read_tag_byte(self):
+    def __init__(self, fileobj, is_gzip=True):
-        byte = self._file.read(1)
+        if is_gzip:
-        return struct.unpack("b", byte)[0]
+            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
-    
+        else:
-    def _read_tag_short(self):
+            # pure zlib stream -- maybe later replace this with
-        bytes = self._file.read(2)
+            # a custom zlib file object?
-        return struct.unpack(">h", bytes)[0]
+            data = zlib.decompress(fileobj.read())
-
+            self._file = StringIO.StringIO(data)
-    def _read_tag_int(self):
+
-        bytes = self._file.read(4)
+    # These private methods read the payload only of the following types
-        return struct.unpack(">i", bytes)[0]
+    def _read_tag_end(self):
-
+        # Nothing to read
-    def _read_tag_long(self):
+        return 0
-        bytes = self._file.read(8)
+
-        return struct.unpack(">q", bytes)[0]
+    def _read_tag_byte(self):
-
+        byte = self._file.read(1)
-    def _read_tag_float(self):
+        return _byte.unpack(byte)[0]
-        bytes = self._file.read(4)
+    
-        return struct.unpack(">f", bytes)[0]
+    def _read_tag_short(self):
-
+        bytes = self._file.read(2)
-    def _read_tag_double(self):
+        global _short
-        bytes = self._file.read(8)
+        return _short.unpack(bytes)[0]
-        return struct.unpack(">d", bytes)[0]
+
-
+    def _read_tag_int(self):
-    def _read_tag_byte_array(self):
+        bytes = self._file.read(4)
-        length = self._read_tag_int()
+        global _int
-        bytes = self._file.read(length)
+        return _int.unpack(bytes)[0]
-        return bytes
+
-
+    def _read_tag_long(self):
-    def _read_tag_string(self):
+        bytes = self._file.read(8)
-        length = self._read_tag_short()
+        global _long
-
+        return _long.unpack(bytes)[0]
-        # Read the string
+
-        string = self._file.read(length)
+    def _read_tag_float(self):
-
+        bytes = self._file.read(4)
-        # decode it and return
+        global _float
-        return string.decode("UTF-8")
+        return _float.unpack(bytes)[0]
-
+
-    def _read_tag_list(self):
+    def _read_tag_double(self):
-        tagid = self._read_tag_byte()
+        bytes = self._file.read(8)
-        length = self._read_tag_int()
+        global _double
-
+        return _double.unpack(bytes)[0]
-        read_tagmap = {
+
-                0: self._read_tag_end,
+    def _read_tag_byte_array(self):
-                1: self._read_tag_byte,
+        length = self._read_tag_int()
-                2: self._read_tag_short,
+        bytes = self._file.read(length)
-                3: self._read_tag_int,
+        return bytes
-                4: self._read_tag_long,
+
-                5: self._read_tag_float,
+    def _read_tag_string(self):
-                6: self._read_tag_double,
+        length = self._read_tag_short()
-                7: self._read_tag_byte_array,
+
-                8: self._read_tag_string,
+        # Read the string
-                9: self._read_tag_list,
+        string = self._file.read(length)
-                10:self._read_tag_compound,
+
-                }
+        # decode it and return
-
+        return string.decode("UTF-8")
-        read_method = read_tagmap[tagid]
+
-        l = []
+    def _read_tag_list(self):
-        for _ in xrange(length):
+        tagid = self._read_tag_byte()
-            l.append(read_method())
+        length = self._read_tag_int()
-        return l
+
-
+        read_tagmap = {
-    def _read_tag_compound(self):
+                0: self._read_tag_end,
-        # Build a dictionary of all the tag names mapping to their payloads
+                1: self._read_tag_byte,
-        tags = {}
+                2: self._read_tag_short,
-        while True:
+                3: self._read_tag_int,
-            # Read a tag
+                4: self._read_tag_long,
-            tagtype = ord(self._file.read(1))
+                5: self._read_tag_float,
-
+                6: self._read_tag_double,
-            if tagtype == 0:
+                7: self._read_tag_byte_array,
-                break
+                8: self._read_tag_string,
-
+                9: self._read_tag_list,
-            name = self._read_tag_string()
+                10:self._read_tag_compound,
-            read_tagmap = {
+                }
-                    0: self._read_tag_end,
+
-                    1: self._read_tag_byte,
+        read_method = read_tagmap[tagid]
-                    2: self._read_tag_short,
+        l = []
-                    3: self._read_tag_int,
+        for _ in xrange(length):
-                    4: self._read_tag_long,
+            l.append(read_method())
-                    5: self._read_tag_float,
+        return l
-                    6: self._read_tag_double,
+
-                    7: self._read_tag_byte_array,
+    def _read_tag_compound(self):
-                    8: self._read_tag_string,
+        # Build a dictionary of all the tag names mapping to their payloads
-                    9: self._read_tag_list,
+        tags = {}
-                    10:self._read_tag_compound,
+        while True:
-                    }
+            # Read a tag
-            payload = read_tagmap[tagtype]()
+            tagtype = ord(self._file.read(1))
-            
+
-            tags[name] = payload
+            if tagtype == 0:
-
+                break
-        return tags
+
-
+            name = self._read_tag_string()
-
+            read_tagmap = {
-
+                    0: self._read_tag_end,
-    def read_all(self):
+                    1: self._read_tag_byte,
-        """Reads the entire file and returns (name, payload)
+                    2: self._read_tag_short,
-        name is the name of the root tag, and payload is a dictionary mapping
+                    3: self._read_tag_int,
-        names to their payloads
+                    4: self._read_tag_long,
-
+                    5: self._read_tag_float,
-        """
+                    6: self._read_tag_double,
-        # Read tag type
+                    7: self._read_tag_byte_array,
-        tagtype = ord(self._file.read(1))
+                    8: self._read_tag_string,
-        if tagtype != 10:
+                    9: self._read_tag_list,
-            raise Exception("Expected a tag compound")
+                    10:self._read_tag_compound,
-
+                    }
-        # Read the tag name
+            payload = read_tagmap[tagtype]()
-        name = self._read_tag_string()
+            
-
+            tags[name] = payload
-        payload = self._read_tag_compound()
+
-
+        return tags
-        return name, payload
+
-
+
-
+
-# For reference, the MCR format is outlined at
+    def read_all(self):
-# <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
+        """Reads the entire file and returns (name, payload)
-class MCRFileReader(object):
+        name is the name of the root tag, and payload is a dictionary mapping
-    """A class for reading chunk region files, as introduced in the
+        names to their payloads
-    Beta 1.3 update. It provides functions for opening individual
+
-    chunks (as instances of NBTFileReader), getting chunk timestamps,
+        """
-    and for listing chunks contained in the file."""
+        # Read tag type
-    
+        tagtype = ord(self._file.read(1))
-    def __init__(self, filename):
+        if tagtype != 10:
-        self._file = None
+            raise Exception("Expected a tag compound")
-        self._filename = filename
+
-        # cache used when the entire header tables are read in get_chunks()
+        # Read the tag name
-        self._locations = None
+        name = self._read_tag_string()
-        self._timestamps = None
+
-        self._chunks = None
+        payload = self._read_tag_compound()
-    
+
-    def _read_24bit_int(self):
+        return name, payload
-        """Read in a 24-bit, big-endian int, used in the chunk
+
-        location table."""
+
-        
+# For reference, the MCR format is outlined at
-        ret = 0
+# <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
-        bytes = self._file.read(3)
+class MCRFileReader(object):
-        for i in xrange(3):
+    """A class for reading chunk region files, as introduced in the
-            ret = ret << 8
+    Beta 1.3 update. It provides functions for opening individual
-            ret += struct.unpack("B", bytes[i])[0]
+    chunks (as instances of NBTFileReader), getting chunk timestamps,
-        
+    and for listing chunks contained in the file."""
-        return ret
+    
-    
+    def __init__(self, filename):
-    def _read_chunk_location(self, x=None, y=None):
+        self._file = None
-        """Read and return the (offset, length) of the given chunk
+        self._filename = filename
-        coordinate, or None if the requested chunk doesn't exist. x
+        # cache used when the entire header tables are read in get_chunks()
-        and y must be between 0 and 31, or None. If they are None,
+        self._locations = None
-        then there will be no file seek before doing the read."""
+        self._timestamps = None
-        
+        self._chunks = None
-        if x is not None and y is not None:
+    
-            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+    def _read_24bit_int(self):
-                raise ValueError("Chunk location out of range.")
+        """Read in a 24-bit, big-endian int, used in the chunk
-            
+        location table."""
-            # check for a cached value
+        
-            if self._locations:
+        ret = 0
-                return self._locations[x + y * 32]
+        bytes = self._file.read(3)
-            
+        global _24bit_int
-            # go to the correct entry in the chunk location table
+        bytes = _24bit_int.unpack(bytes)
-            self._file.seek(4 * (x + y * 32))
+        for i in xrange(3):
-        
+            ret = ret << 8
-        # 3-byte offset in 4KiB sectors
+            ret += bytes[i]
-        offset_sectors = self._read_24bit_int()
+        
-        
+        return ret
-        # 1-byte length in 4KiB sectors, rounded up
+    
-        byte = self._file.read(1)
+    def _read_chunk_location(self, x=None, y=None):
-        length_sectors = struct.unpack("B", byte)[0]
+        """Read and return the (offset, length) of the given chunk
-        
+        coordinate, or None if the requested chunk doesn't exist. x
-        # check for empty chunks
+        and y must be between 0 and 31, or None. If they are None,
-        if offset_sectors == 0 or length_sectors == 0:
+        then there will be no file seek before doing the read."""
-            return None
+        
-        
+        if x is not None and y is not None:
-        return (offset_sectors * 4096, length_sectors * 4096)
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
-    
+                raise ValueError("Chunk location out of range.")
-    def _read_chunk_timestamp(self, x=None, y=None):
+            
-        """Read and return the last modification time of the given
+            # check for a cached value
-        chunk coordinate. x and y must be between 0 and 31, or
+            if self._locations:
-        None. If they are, None, then there will be no file seek
+                return self._locations[x + y * 32]
-        before doing the read."""
+            
-        
+            # go to the correct entry in the chunk location table
-        if x is not None and y is not None:
+            self._file.seek(4 * (x + y * 32))
-            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+        
-                raise ValueError("Chunk location out of range.")
+
-            
+        # 3-byte offset in 4KiB sectors
-            # check for a cached value
+        offset_sectors = self._read_24bit_int()
-            if self._timestamps:
+        global _unsigned_byte
-                return self._timestamps[x + y * 32]
+        # 1-byte length in 4KiB sectors, rounded up
-            
+        byte = self._file.read(1)
-            # go to the correct entry in the chunk timestamp table
+        length_sectors = _unsigned_byte.unpack(byte)[0]
-            self._file.seek(4 * (x + y * 32) + 4096)
+        
-        
+        # check for empty chunks
-        bytes = self._file.read(4)
+        if offset_sectors == 0 or length_sectors == 0:
-        timestamp = struct.unpack(">I", bytes)[0]
+            return None
-        
+        
-        return timestamp
+        return (offset_sectors * 4096, length_sectors * 4096)
-    
+    
-    def get_chunk_info(self,closeFile = True):
+    def _read_chunk_timestamp(self, x=None, y=None):
-        """Return a list of all chunks contained in this region file,
+        """Read and return the last modification time of the given
-        as a list of (x, y) coordinate tuples. To load these chunks,
+        chunk coordinate. x and y must be between 0 and 31, or
-        provide these coordinates to load_chunk()."""
+        None. If they are, None, then there will be no file seek
-        
+        before doing the read."""
-        if self._chunks:
+        
-            return self._chunks
+        if x is not None and y is not None:
-        
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
-        if self._file is None:
+                raise ValueError("Chunk location out of range.")
-            self._file = open(self._filename,'rb');
+            
-
+            # check for a cached value
-        self._chunks = []
+            if self._timestamps:
-        self._locations = []
+                return self._timestamps[x + y * 32]
-        self._timestamps = []
+            
-        
+            # go to the correct entry in the chunk timestamp table
-        # go to the beginning of the file
+            self._file.seek(4 * (x + y * 32) + 4096)
-        self._file.seek(0)
+        
-        
+        bytes = self._file.read(4)
-        # read chunk location table
+       
-        for y in xrange(32):
+        global _unsigned_int
-            for x in xrange(32):
+        timestamp = _unsigned_int.unpack(bytes)[0]
-                location = self._read_chunk_location()
+        
-                self._locations.append(location)
+        return timestamp
-                if location:
+    
-                    self._chunks.append((x, y))
+    def get_chunks(self):    
-        
+        """Return a list of all chunks contained in this region file,
-        # read chunk timestamp table
+        as a list of (x, y) coordinate tuples. To load these chunks,
-        for y in xrange(32):
+        provide these coordinates to load_chunk()."""
-            for x in xrange(32):
+        
-                timestamp = self._read_chunk_timestamp()
+        if self._chunks:
-                self._timestamps.append(timestamp)
+            return self._chunks
-
+        if self._locations is None:
-        if closeFile:        
+            self.get_chunk_info()       
-            #free the file object since it isn't safe to be reused in child processes (seek point goes wonky!)
+        self._chunks = filter(None,self._locations) 
-            self._file.close()
+        
-            self._file =  None
+        return self._chunks
-        return self._chunks
+        
-    
+    def get_chunk_info(self,closeFile = True):
-    def get_chunk_timestamp(self, x, y):
+        """Preloads region header information."""
-        """Return the given chunk's modification time. If the given
+        
-        chunk doesn't exist, this number may be nonsense. Like
+        if self._locations:
-        load_chunk(), this will wrap x and y into the range [0, 31].
+            return
-        """
+        
-        x = x % 32
+        if self._file is None:
-        y = y % 32        
+            self._file = open(self._filename,'rb');
-        if self._timestamps is None:
+
-            self.get_chunk_info() 
+        self._chunks = None
-        return self._timestamps[x + y * 32]   
+        self._locations = []
-    
+        self._timestamps = []
-    def chunkExists(self, x, y):
+        
-        """Determines if a chunk exists without triggering loading of the backend data"""
+        # go to the beginning of the file
-        x = x % 32
+        self._file.seek(0)        
-        y = y % 32
+        
-        if self._locations is None:
+        # read chunk location table
-            self.get_chunk_info()
+        locations_append = self._locations.append
-        location = self._locations[x + y * 32]
+        for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:    
-        return location is not None        
+            locations_append(self._read_chunk_location())
-
+        
-    def load_chunk(self, x, y):
+        # read chunk timestamp table
-        """Return a NBTFileReader instance for the given chunk, or
+        timestamp_append = self._timestamps.append
-        None if the given chunk doesn't exist in this region file. If
+        for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:
-        you provide an x or y not between 0 and 31, it will be
+                timestamp_append(self._read_chunk_timestamp())
-        modulo'd into this range (x % 32, etc.) This is so you can
+ 
-        provide chunk coordinates in global coordinates, and still
+        if closeFile:        
-        have the chunks load out of regions properly."""
+            #free the file object since it isn't safe to be reused in child processes (seek point goes wonky!)
-        x = x % 32
+            self._file.close()
-        y = y % 32
+            self._file =  None
-        if self._locations is None:
+        return
-            self.get_chunk_info()   
+    
-                    
+    def get_chunk_timestamp(self, x, y):
-        location = self._locations[x + y * 32]
+        """Return the given chunk's modification time. If the given
-        if location is None:
+        chunk doesn't exist, this number may be nonsense. Like
-            return None
+        load_chunk(), this will wrap x and y into the range [0, 31].
-
+        """
-        if self._file is None:
+        x = x % 32
-            self._file = open(self._filename,'rb'); 
+        y = y % 32        
-        # seek to the data
+        if self._timestamps is None:
-        self._file.seek(location[0])
+            self.get_chunk_info() 
-        
+        return self._timestamps[x + y * 32]   
-        # read in the chunk data header
+    
-        bytes = self._file.read(4)
+    def chunkExists(self, x, y):
-        data_length = struct.unpack(">I", bytes)[0]
+        """Determines if a chunk exists without triggering loading of the backend data"""
-        bytes = self._file.read(1)
+        x = x % 32
-        compression = struct.unpack("B", bytes)[0]
+        y = y % 32
-        
+        if self._locations is None:
-        # figure out the compression
+            self.get_chunk_info()
-        is_gzip = True
+        location = self._locations[x + y * 32]
-        if compression == 1:
+        return location is not None        
-            # gzip -- not used by the official client, but trivial to support here so...
+
-            is_gzip = True
+    def load_chunk(self, x, y):
-        elif compression == 2:
+        """Return a NBTFileReader instance for the given chunk, or
-            # deflate -- pure zlib stream
+        None if the given chunk doesn't exist in this region file. If
-            is_gzip = False
+        you provide an x or y not between 0 and 31, it will be
-        else:
+        modulo'd into this range (x % 32, etc.) This is so you can
-            # unsupported!
+        provide chunk coordinates in global coordinates, and still
-            raise Exception("Unsupported chunk compression type: %i" % (compression))
+        have the chunks load out of regions properly."""
-        # turn the rest of the data into a StringIO object
+        x = x % 32
-        # (using data_length - 1, as we already read 1 byte for compression)
+        y = y % 32
-        data = self._file.read(data_length - 1)
+        if self._locations is None:
-        data = StringIO.StringIO(data)
+            self.get_chunk_info()   
-        
+                    
-        return NBTFileReader(data, is_gzip=is_gzip)
+        location = self._locations[x + y * 32]
        if location is None:
            return None
        if self._file is None:
            self._file = open(self._filename,'rb'); 
        # seek to the data
        self._file.seek(location[0])
        # read in the chunk data header
        bytes = self._file.read(5)        
        data_length,compression =  _chunk_header.unpack(bytes)
        # figure out the compression
        is_gzip = True
        if compression == 1:
            # gzip -- not used by the official client, but trivial to support here so...
            is_gzip = True
        elif compression == 2:
            # deflate -- pure zlib stream
            is_gzip = False
        else:
            # unsupported!
            raise Exception("Unsupported chunk compression type: %i" % (compression))
        # turn the rest of the data into a StringIO object
        # (using data_length - 1, as we already read 1 byte for compression)
        data = self._file.read(data_length - 1)
        data = StringIO.StringIO(data)
        return NBTFileReader(data, is_gzip=is_gzip)