From dbdd5d0fc87356a42a486128d54fd3900ee9e2cf Mon Sep 17 00:00:00 2001
From: Xon <Xon@localhost>
Date: Wed, 23 Mar 2011 16:44:27 +0800
Subject: [PATCH] Switched from struct.unpack (module) -> Struct.unpack
 (class), it compiles the format string and reduces parsing costs. Coalesced a
 few unpack calls into a compound unpack call. Moved the functionality to get
 a list of valid chunks into get_chunks out from get_chunk_info.

---
 nbt.py | 741 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 385 insertions(+), 356 deletions(-)

diff --git a/nbt.py b/nbt.py
index 73730a0..5258ffb 100644
--- a/nbt.py
+++ b/nbt.py
@@ -1,356 +1,385 @@
-#    This file is part of the Minecraft Overviewer.
-#
-#    Minecraft Overviewer is free software: you can redistribute it and/or
-#    modify it under the terms of the GNU General Public License as published
-#    by the Free Software Foundation, either version 3 of the License, or (at
-#    your option) any later version.
-#
-#    Minecraft Overviewer is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-#    Public License for more details.
-#
-#    You should have received a copy of the GNU General Public License along
-#    with the Overviewer.  If not, see <http://www.gnu.org/licenses/>.
-
-import gzip, zlib
-import struct
-import StringIO
-import os
-
-# decorator to handle filename or object as first parameter
-def _file_loader(func):
-    def wrapper(fileobj, *args):
-        if isinstance(fileobj, basestring):
-            if not os.path.isfile(fileobj):
-               return None
-
-            # Is actually a filename
-            fileobj = open(fileobj, 'rb')
-        return func(fileobj, *args)
-    return wrapper
-
-@_file_loader
-def load(fileobj):
-    return NBTFileReader(fileobj).read_all()
-
-def load_from_region(filename, x, y):
-    nbt = load_region(filename).load_chunk(x, y)
-    if nbt is None:
-        return None ## return none.  I think this is who we should indicate missing chunks
-        #raise IOError("No such chunk in region: (%i, %i)" % (x, y))     
-    return nbt.read_all()
-  
-def load_region(filename):            
-    return MCRFileReader(filename)
-  
-class NBTFileReader(object):
-    def __init__(self, fileobj, is_gzip=True):
-        if is_gzip:
-            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
-        else:
-            # pure zlib stream -- maybe later replace this with
-            # a custom zlib file object?
-            data = zlib.decompress(fileobj.read())
-            self._file = StringIO.StringIO(data)
-
-    # These private methods read the payload only of the following types
-    def _read_tag_end(self):
-        # Nothing to read
-        return 0
-
-    def _read_tag_byte(self):
-        byte = self._file.read(1)
-        return struct.unpack("b", byte)[0]
-    
-    def _read_tag_short(self):
-        bytes = self._file.read(2)
-        return struct.unpack(">h", bytes)[0]
-
-    def _read_tag_int(self):
-        bytes = self._file.read(4)
-        return struct.unpack(">i", bytes)[0]
-
-    def _read_tag_long(self):
-        bytes = self._file.read(8)
-        return struct.unpack(">q", bytes)[0]
-
-    def _read_tag_float(self):
-        bytes = self._file.read(4)
-        return struct.unpack(">f", bytes)[0]
-
-    def _read_tag_double(self):
-        bytes = self._file.read(8)
-        return struct.unpack(">d", bytes)[0]
-
-    def _read_tag_byte_array(self):
-        length = self._read_tag_int()
-        bytes = self._file.read(length)
-        return bytes
-
-    def _read_tag_string(self):
-        length = self._read_tag_short()
-
-        # Read the string
-        string = self._file.read(length)
-
-        # decode it and return
-        return string.decode("UTF-8")
-
-    def _read_tag_list(self):
-        tagid = self._read_tag_byte()
-        length = self._read_tag_int()
-
-        read_tagmap = {
-                0: self._read_tag_end,
-                1: self._read_tag_byte,
-                2: self._read_tag_short,
-                3: self._read_tag_int,
-                4: self._read_tag_long,
-                5: self._read_tag_float,
-                6: self._read_tag_double,
-                7: self._read_tag_byte_array,
-                8: self._read_tag_string,
-                9: self._read_tag_list,
-                10:self._read_tag_compound,
-                }
-
-        read_method = read_tagmap[tagid]
-        l = []
-        for _ in xrange(length):
-            l.append(read_method())
-        return l
-
-    def _read_tag_compound(self):
-        # Build a dictionary of all the tag names mapping to their payloads
-        tags = {}
-        while True:
-            # Read a tag
-            tagtype = ord(self._file.read(1))
-
-            if tagtype == 0:
-                break
-
-            name = self._read_tag_string()
-            read_tagmap = {
-                    0: self._read_tag_end,
-                    1: self._read_tag_byte,
-                    2: self._read_tag_short,
-                    3: self._read_tag_int,
-                    4: self._read_tag_long,
-                    5: self._read_tag_float,
-                    6: self._read_tag_double,
-                    7: self._read_tag_byte_array,
-                    8: self._read_tag_string,
-                    9: self._read_tag_list,
-                    10:self._read_tag_compound,
-                    }
-            payload = read_tagmap[tagtype]()
-            
-            tags[name] = payload
-
-        return tags
-
-
-
-    def read_all(self):
-        """Reads the entire file and returns (name, payload)
-        name is the name of the root tag, and payload is a dictionary mapping
-        names to their payloads
-
-        """
-        # Read tag type
-        tagtype = ord(self._file.read(1))
-        if tagtype != 10:
-            raise Exception("Expected a tag compound")
-
-        # Read the tag name
-        name = self._read_tag_string()
-
-        payload = self._read_tag_compound()
-
-        return name, payload
-
-
-# For reference, the MCR format is outlined at
-# <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
-class MCRFileReader(object):
-    """A class for reading chunk region files, as introduced in the
-    Beta 1.3 update. It provides functions for opening individual
-    chunks (as instances of NBTFileReader), getting chunk timestamps,
-    and for listing chunks contained in the file."""
-    
-    def __init__(self, filename):
-        self._file = None
-        self._filename = filename
-        # cache used when the entire header tables are read in get_chunks()
-        self._locations = None
-        self._timestamps = None
-        self._chunks = None
-    
-    def _read_24bit_int(self):
-        """Read in a 24-bit, big-endian int, used in the chunk
-        location table."""
-        
-        ret = 0
-        bytes = self._file.read(3)
-        for i in xrange(3):
-            ret = ret << 8
-            ret += struct.unpack("B", bytes[i])[0]
-        
-        return ret
-    
-    def _read_chunk_location(self, x=None, y=None):
-        """Read and return the (offset, length) of the given chunk
-        coordinate, or None if the requested chunk doesn't exist. x
-        and y must be between 0 and 31, or None. If they are None,
-        then there will be no file seek before doing the read."""
-        
-        if x is not None and y is not None:
-            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
-                raise ValueError("Chunk location out of range.")
-            
-            # check for a cached value
-            if self._locations:
-                return self._locations[x + y * 32]
-            
-            # go to the correct entry in the chunk location table
-            self._file.seek(4 * (x + y * 32))
-        
-        # 3-byte offset in 4KiB sectors
-        offset_sectors = self._read_24bit_int()
-        
-        # 1-byte length in 4KiB sectors, rounded up
-        byte = self._file.read(1)
-        length_sectors = struct.unpack("B", byte)[0]
-        
-        # check for empty chunks
-        if offset_sectors == 0 or length_sectors == 0:
-            return None
-        
-        return (offset_sectors * 4096, length_sectors * 4096)
-    
-    def _read_chunk_timestamp(self, x=None, y=None):
-        """Read and return the last modification time of the given
-        chunk coordinate. x and y must be between 0 and 31, or
-        None. If they are, None, then there will be no file seek
-        before doing the read."""
-        
-        if x is not None and y is not None:
-            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
-                raise ValueError("Chunk location out of range.")
-            
-            # check for a cached value
-            if self._timestamps:
-                return self._timestamps[x + y * 32]
-            
-            # go to the correct entry in the chunk timestamp table
-            self._file.seek(4 * (x + y * 32) + 4096)
-        
-        bytes = self._file.read(4)
-        timestamp = struct.unpack(">I", bytes)[0]
-        
-        return timestamp
-    
-    def get_chunk_info(self,closeFile = True):
-        """Return a list of all chunks contained in this region file,
-        as a list of (x, y) coordinate tuples. To load these chunks,
-        provide these coordinates to load_chunk()."""
-        
-        if self._chunks:
-            return self._chunks
-        
-        if self._file is None:
-            self._file = open(self._filename,'rb');
-
-        self._chunks = []
-        self._locations = []
-        self._timestamps = []
-        
-        # go to the beginning of the file
-        self._file.seek(0)
-        
-        # read chunk location table
-        for y in xrange(32):
-            for x in xrange(32):
-                location = self._read_chunk_location()
-                self._locations.append(location)
-                if location:
-                    self._chunks.append((x, y))
-        
-        # read chunk timestamp table
-        for y in xrange(32):
-            for x in xrange(32):
-                timestamp = self._read_chunk_timestamp()
-                self._timestamps.append(timestamp)
-
-        if closeFile:        
-            #free the file object since it isn't safe to be reused in child processes (seek point goes wonky!)
-            self._file.close()
-            self._file =  None
-        return self._chunks
-    
-    def get_chunk_timestamp(self, x, y):
-        """Return the given chunk's modification time. If the given
-        chunk doesn't exist, this number may be nonsense. Like
-        load_chunk(), this will wrap x and y into the range [0, 31].
-        """
-        x = x % 32
-        y = y % 32        
-        if self._timestamps is None:
-            self.get_chunk_info() 
-        return self._timestamps[x + y * 32]   
-    
-    def chunkExists(self, x, y):
-        """Determines if a chunk exists without triggering loading of the backend data"""
-        x = x % 32
-        y = y % 32
-        if self._locations is None:
-            self.get_chunk_info()
-        location = self._locations[x + y * 32]
-        return location is not None        
-
-    def load_chunk(self, x, y):
-        """Return a NBTFileReader instance for the given chunk, or
-        None if the given chunk doesn't exist in this region file. If
-        you provide an x or y not between 0 and 31, it will be
-        modulo'd into this range (x % 32, etc.) This is so you can
-        provide chunk coordinates in global coordinates, and still
-        have the chunks load out of regions properly."""
-        x = x % 32
-        y = y % 32
-        if self._locations is None:
-            self.get_chunk_info()   
-                    
-        location = self._locations[x + y * 32]
-        if location is None:
-            return None
-
-        if self._file is None:
-            self._file = open(self._filename,'rb'); 
-        # seek to the data
-        self._file.seek(location[0])
-        
-        # read in the chunk data header
-        bytes = self._file.read(4)
-        data_length = struct.unpack(">I", bytes)[0]
-        bytes = self._file.read(1)
-        compression = struct.unpack("B", bytes)[0]
-        
-        # figure out the compression
-        is_gzip = True
-        if compression == 1:
-            # gzip -- not used by the official client, but trivial to support here so...
-            is_gzip = True
-        elif compression == 2:
-            # deflate -- pure zlib stream
-            is_gzip = False
-        else:
-            # unsupported!
-            raise Exception("Unsupported chunk compression type: %i" % (compression))
-        # turn the rest of the data into a StringIO object
-        # (using data_length - 1, as we already read 1 byte for compression)
-        data = self._file.read(data_length - 1)
-        data = StringIO.StringIO(data)
-        
-        return NBTFileReader(data, is_gzip=is_gzip)
+#    This file is part of the Minecraft Overviewer.
+#
+#    Minecraft Overviewer is free software: you can redistribute it and/or
+#    modify it under the terms of the GNU General Public License as published
+#    by the Free Software Foundation, either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    Minecraft Overviewer is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+#    Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with the Overviewer.  If not, see <http://www.gnu.org/licenses/>.
+
+import gzip, zlib
+import struct
+import StringIO
+import os
+
+# decorator to handle filename or object as first parameter
+def _file_loader(func):
+    def wrapper(fileobj, *args):
+        if isinstance(fileobj, basestring):
+            if not os.path.isfile(fileobj):
+               return None
+
+            # Is actually a filename
+            fileobj = open(fileobj, 'rb',4096)
+        return func(fileobj, *args)
+    return wrapper
+
+@_file_loader
+def load(fileobj):
+    return NBTFileReader(fileobj).read_all()
+
+def load_from_region(filename, x, y):
+    nbt = load_region(filename).load_chunk(x, y)
+    if nbt is None:
+        return None ## return none.  I think this is who we should indicate missing chunks
+        #raise IOError("No such chunk in region: (%i, %i)" % (x, y))     
+    return nbt.read_all()
+  
+def load_region(filename):            
+    return MCRFileReader(filename)
+  
+  
+# compile the unpacker's into a classes
+_byte   = struct.Struct("b")
+_short  = struct.Struct(">h")
+_int    = struct.Struct(">i")
+_long   = struct.Struct(">q")
+_float  = struct.Struct(">f")
+_double = struct.Struct(">d") 
+
+_24bit_int = struct.Struct("B B B")
+_unsigned_byte = struct.Struct("B")
+_unsigned_int = struct.Struct(">I")
+_chunk_header = struct.Struct(">I B")
+ 
+class NBTFileReader(object):
+    def __init__(self, fileobj, is_gzip=True):
+        if is_gzip:
+            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
+        else:
+            # pure zlib stream -- maybe later replace this with
+            # a custom zlib file object?
+            data = zlib.decompress(fileobj.read())
+            self._file = StringIO.StringIO(data)
+
+    # These private methods read the payload only of the following types
+    def _read_tag_end(self):
+        # Nothing to read
+        return 0
+
+    def _read_tag_byte(self):
+        byte = self._file.read(1)
+        return _byte.unpack(byte)[0]
+    
+    def _read_tag_short(self):
+        bytes = self._file.read(2)
+        global _short
+        return _short.unpack(bytes)[0]
+
+    def _read_tag_int(self):
+        bytes = self._file.read(4)
+        global _int
+        return _int.unpack(bytes)[0]
+
+    def _read_tag_long(self):
+        bytes = self._file.read(8)
+        global _long
+        return _long.unpack(bytes)[0]
+
+    def _read_tag_float(self):
+        bytes = self._file.read(4)
+        global _float
+        return _float.unpack(bytes)[0]
+
+    def _read_tag_double(self):
+        bytes = self._file.read(8)
+        global _double
+        return _double.unpack(bytes)[0]
+
+    def _read_tag_byte_array(self):
+        length = self._read_tag_int()
+        bytes = self._file.read(length)
+        return bytes
+
+    def _read_tag_string(self):
+        length = self._read_tag_short()
+
+        # Read the string
+        string = self._file.read(length)
+
+        # decode it and return
+        return string.decode("UTF-8")
+
+    def _read_tag_list(self):
+        tagid = self._read_tag_byte()
+        length = self._read_tag_int()
+
+        read_tagmap = {
+                0: self._read_tag_end,
+                1: self._read_tag_byte,
+                2: self._read_tag_short,
+                3: self._read_tag_int,
+                4: self._read_tag_long,
+                5: self._read_tag_float,
+                6: self._read_tag_double,
+                7: self._read_tag_byte_array,
+                8: self._read_tag_string,
+                9: self._read_tag_list,
+                10:self._read_tag_compound,
+                }
+
+        read_method = read_tagmap[tagid]
+        l = []
+        for _ in xrange(length):
+            l.append(read_method())
+        return l
+
+    def _read_tag_compound(self):
+        # Build a dictionary of all the tag names mapping to their payloads
+        tags = {}
+        while True:
+            # Read a tag
+            tagtype = ord(self._file.read(1))
+
+            if tagtype == 0:
+                break
+
+            name = self._read_tag_string()
+            read_tagmap = {
+                    0: self._read_tag_end,
+                    1: self._read_tag_byte,
+                    2: self._read_tag_short,
+                    3: self._read_tag_int,
+                    4: self._read_tag_long,
+                    5: self._read_tag_float,
+                    6: self._read_tag_double,
+                    7: self._read_tag_byte_array,
+                    8: self._read_tag_string,
+                    9: self._read_tag_list,
+                    10:self._read_tag_compound,
+                    }
+            payload = read_tagmap[tagtype]()
+            
+            tags[name] = payload
+
+        return tags
+
+
+
+    def read_all(self):
+        """Reads the entire file and returns (name, payload)
+        name is the name of the root tag, and payload is a dictionary mapping
+        names to their payloads
+
+        """
+        # Read tag type
+        tagtype = ord(self._file.read(1))
+        if tagtype != 10:
+            raise Exception("Expected a tag compound")
+
+        # Read the tag name
+        name = self._read_tag_string()
+
+        payload = self._read_tag_compound()
+
+        return name, payload
+
+
+# For reference, the MCR format is outlined at
+# <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
+class MCRFileReader(object):
+    """A class for reading chunk region files, as introduced in the
+    Beta 1.3 update. It provides functions for opening individual
+    chunks (as instances of NBTFileReader), getting chunk timestamps,
+    and for listing chunks contained in the file."""
+    
+    def __init__(self, filename):
+        self._file = None
+        self._filename = filename
+        # cache used when the entire header tables are read in get_chunks()
+        self._locations = None
+        self._timestamps = None
+        self._chunks = None
+    
+    def _read_24bit_int(self):
+        """Read in a 24-bit, big-endian int, used in the chunk
+        location table."""
+        
+        ret = 0
+        bytes = self._file.read(3)
+        global _24bit_int
+        bytes = _24bit_int.unpack(bytes)
+        for i in xrange(3):
+            ret = ret << 8
+            ret += bytes[i]
+        
+        return ret
+    
+    def _read_chunk_location(self, x=None, y=None):
+        """Read and return the (offset, length) of the given chunk
+        coordinate, or None if the requested chunk doesn't exist. x
+        and y must be between 0 and 31, or None. If they are None,
+        then there will be no file seek before doing the read."""
+        
+        if x is not None and y is not None:
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+                raise ValueError("Chunk location out of range.")
+            
+            # check for a cached value
+            if self._locations:
+                return self._locations[x + y * 32]
+            
+            # go to the correct entry in the chunk location table
+            self._file.seek(4 * (x + y * 32))
+        
+
+        # 3-byte offset in 4KiB sectors
+        offset_sectors = self._read_24bit_int()
+        global _unsigned_byte
+        # 1-byte length in 4KiB sectors, rounded up
+        byte = self._file.read(1)
+        length_sectors = _unsigned_byte.unpack(byte)[0]
+        
+        # check for empty chunks
+        if offset_sectors == 0 or length_sectors == 0:
+            return None
+        
+        return (offset_sectors * 4096, length_sectors * 4096)
+    
+    def _read_chunk_timestamp(self, x=None, y=None):
+        """Read and return the last modification time of the given
+        chunk coordinate. x and y must be between 0 and 31, or
+        None. If they are, None, then there will be no file seek
+        before doing the read."""
+        
+        if x is not None and y is not None:
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+                raise ValueError("Chunk location out of range.")
+            
+            # check for a cached value
+            if self._timestamps:
+                return self._timestamps[x + y * 32]
+            
+            # go to the correct entry in the chunk timestamp table
+            self._file.seek(4 * (x + y * 32) + 4096)
+        
+        bytes = self._file.read(4)
+       
+        global _unsigned_int
+        timestamp = _unsigned_int.unpack(bytes)[0]
+        
+        return timestamp
+    
+    def get_chunks(self):    
+        """Return a list of all chunks contained in this region file,
+        as a list of (x, y) coordinate tuples. To load these chunks,
+        provide these coordinates to load_chunk()."""
+        
+        if self._chunks:
+            return self._chunks
+        if self._locations is None:
+            self.get_chunk_info()       
+        self._chunks = filter(None,self._locations) 
+        
+        return self._chunks
+        
+    def get_chunk_info(self,closeFile = True):
+        """Preloads region header information."""
+        
+        if self._locations:
+            return
+        
+        if self._file is None:
+            self._file = open(self._filename,'rb');
+
+        self._chunks = None
+        self._locations = []
+        self._timestamps = []
+        
+        # go to the beginning of the file
+        self._file.seek(0)        
+        
+        # read chunk location table
+        locations_append = self._locations.append
+        for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:    
+            locations_append(self._read_chunk_location())
+        
+        # read chunk timestamp table
+        timestamp_append = self._timestamps.append
+        for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:
+                timestamp_append(self._read_chunk_timestamp())
+ 
+        if closeFile:        
+            #free the file object since it isn't safe to be reused in child processes (seek point goes wonky!)
+            self._file.close()
+            self._file =  None
+        return
+    
+    def get_chunk_timestamp(self, x, y):
+        """Return the given chunk's modification time. If the given
+        chunk doesn't exist, this number may be nonsense. Like
+        load_chunk(), this will wrap x and y into the range [0, 31].
+        """
+        x = x % 32
+        y = y % 32        
+        if self._timestamps is None:
+            self.get_chunk_info() 
+        return self._timestamps[x + y * 32]   
+    
+    def chunkExists(self, x, y):
+        """Determines if a chunk exists without triggering loading of the backend data"""
+        x = x % 32
+        y = y % 32
+        if self._locations is None:
+            self.get_chunk_info()
+        location = self._locations[x + y * 32]
+        return location is not None        
+
+    def load_chunk(self, x, y):
+        """Return a NBTFileReader instance for the given chunk, or
+        None if the given chunk doesn't exist in this region file. If
+        you provide an x or y not between 0 and 31, it will be
+        modulo'd into this range (x % 32, etc.) This is so you can
+        provide chunk coordinates in global coordinates, and still
+        have the chunks load out of regions properly."""
+        x = x % 32
+        y = y % 32
+        if self._locations is None:
+            self.get_chunk_info()   
+                    
+        location = self._locations[x + y * 32]
+        if location is None:
+            return None
+
+        if self._file is None:
+            self._file = open(self._filename,'rb'); 
+        # seek to the data
+        self._file.seek(location[0])
+        
+        # read in the chunk data header
+        bytes = self._file.read(5)        
+        data_length,compression =  _chunk_header.unpack(bytes)
+        
+        # figure out the compression
+        is_gzip = True
+        if compression == 1:
+            # gzip -- not used by the official client, but trivial to support here so...
+            is_gzip = True
+        elif compression == 2:
+            # deflate -- pure zlib stream
+            is_gzip = False
+        else:
+            # unsupported!
+            raise Exception("Unsupported chunk compression type: %i" % (compression))
+        # turn the rest of the data into a StringIO object
+        # (using data_length - 1, as we already read 1 byte for compression)
+        data = self._file.read(data_length - 1)
+        data = StringIO.StringIO(data)
+        
+        return NBTFileReader(data, is_gzip=is_gzip)