From 45dee1aa39f552cec8b701a6d41316ae146a9f2b Mon Sep 17 00:00:00 2001
From: Aaron Griffith <aargri@gmail.com>
Date: Wed, 23 Feb 2011 00:14:27 -0500
Subject: [PATCH] added MCRFileReader class to nbt.py, to handle new region
 files

---
 nbt.py | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 186 insertions(+), 6 deletions(-)

diff --git a/nbt.py b/nbt.py
index 780acfe..3549497 100644
--- a/nbt.py
+++ b/nbt.py
@@ -13,18 +13,39 @@
 #    You should have received a copy of the GNU General Public License along
 #    with the Overviewer.  If not, see <http://www.gnu.org/licenses/>.
 
-import gzip
+import gzip, zlib
 import struct
+import StringIO
 
+# decorator to handle filename or object as first parameter
+def _file_loader(func):
+    def wrapper(fileobj, *args):
+        if isinstance(fileobj, basestring):
+            # Is actually a filename
+            fileobj = open(fileobj, 'rb')
+        return func(fileobj, *args)
+    return wrapper
+
+@_file_loader
 def load(fileobj):
-    if isinstance(fileobj, basestring):
-        # Is actually a filename
-        fileobj = open(fileobj, 'rb')
     return NBTFileReader(fileobj).read_all()
 
+@_file_loader
+def load_from_region(fileobj, x, y):
+    nbt = MCRFileReader(fileobj).load_chunk(x, y)
+    if not nbt:
+        raise IOError("No such chunk in region: (%i, %i)" % (x, y))
+    return nbt.read_all()
+
 class NBTFileReader(object):
-    def __init__(self, fileobj):
-        self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
+    def __init__(self, fileobj, is_gzip=True):
+        if is_gzip:
+            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
+        else:
+            # pure zlib stream -- maybe later replace this with
+            # a custom zlib file object?
+            data = zlib.decompress(fileobj.read())
+            self._file = StringIO.StringIO(data)
 
     # These private methods read the payload only of the following types
     def _read_tag_end(self):
@@ -143,3 +164,162 @@ class NBTFileReader(object):
 
         return name, payload
 
+
+# For reference, the MCR format is outlined at
+# <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
+class MCRFileReader(object):
+    """A class for reading chunk region files, as introduced in the
+    Beta 1.3 update. It provides functions for opening individual
+    chunks (as instances of NBTFileReader), getting chunk timestamps,
+    and for listing chunks contained in the file."""
+    
+    def __init__(self, fileobj):
+        self._file = fileobj
+        
+        # cache used when the entire header tables are read in get_chunks()
+        self._locations = None
+        self._timestamps = None
+        self._chunks = None
+    
+    def _read_24bit_int(self):
+        """Read in a 24-bit, big-endian int, used in the chunk
+        location table."""
+        
+        ret = 0
+        bytes = self._file.read(3)
+        for i in xrange(3):
+            ret = ret << 8
+            ret += struct.unpack("B", bytes[i])[0]
+        
+        return ret
+    
+    def _read_chunk_location(self, x=None, y=None):
+        """Read and return the (offset, length) of the given chunk
+        coordinate, or None if the requested chunk doesn't exist. x
+        and y must be between 0 and 31, or None. If they are None,
+        then there will be no file seek before doing the read."""
+        
+        if x != None and y != None:
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+                raise ValueError("Chunk location out of range.")
+            
+            # check for a cached value
+            if self._locations:
+                return self._locations[x + y * 32]
+            
+            # go to the correct entry in the chunk location table
+            self._file.seek(4 * (x + y * 32))
+        
+        # 3-byte offset in 4KiB sectors
+        offset_sectors = self._read_24bit_int()
+        
+        # 1-byte length in 4KiB sectors, rounded up
+        byte = self._file.read(1)
+        length_sectors = struct.unpack("B", byte)[0]
+        
+        # check for empty chunks
+        if offset_sectors == 0 or length_sectors == 0:
+            return None
+        
+        return (offset_sectors * 4096, length_sectors * 4096)
+    
+    def _read_chunk_timestamp(self, x=None, y=None):
+        """Read and return the last modification time of the given
+        chunk coordinate. x and y must be between 0 and 31, or
+        None. If they are, None, then there will be no file seek
+        before doing the read."""
+        
+        if x != None and y != None:
+            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
+                raise ValueError("Chunk location out of range.")
+            
+            # check for a cached value
+            if self._timestamps:
+                return self._timestamps[x + y * 32]
+            
+            # go to the correct entry in the chunk timestamp table
+            self._file.seek(4 * (x + y * 32) + 4096)
+        
+        bytes = self._file.read(4)
+        timestamp = struct.unpack(">I", bytes)[0]
+        
+        return timestamp
+    
+    def get_chunks(self):
+        """Return a list of all chunks contained in this region file,
+        as a list of (x, y) coordinate tuples. To load these chunks,
+        provide these coordinates to load_chunk()."""
+        
+        if self._chunks:
+            return self._chunks
+        
+        self._chunks = []
+        self._locations = []
+        self._timestamps = []
+        
+        # go to the beginning of the file
+        self._file.seek(0)
+        
+        # read chunk location table
+        for y in xrange(32):
+            for x in xrange(32):
+                location = self._read_chunk_location()
+                self._locations.append(location)
+                if location:
+                    self._chunks.append((x, y))
+        
+        # read chunk timestamp table
+        for y in xrange(32):
+            for x in xrange(32):
+                timestamp = self._read_chunk_timestamp()
+                self._timestamps.append(timestamp)
+        
+        return self._chunks
+    
+    def get_chunk_timestamp(self, x, y):
+        """Return the given chunk's modification time. If the given
+        chunk doesn't exist, this number may be nonsense. Like
+        load_chunk(), this will wrap x and y into the range [0, 31].
+        """
+        
+        return self._read_chunk_timestamp(x % 32, y % 32)
+    
+    def load_chunk(self, x, y):
+        """Return a NBTFileReader instance for the given chunk, or
+        None if the given chunk doesn't exist in this region file. If
+        you provide an x or y not between 0 and 31, it will be
+        modulo'd into this range (x % 32, etc.) This is so you can
+        provide chunk coordinates in global coordinates, and still
+        have the chunks load out of regions properly."""
+        
+        location = self._read_chunk_location(x % 32, y % 32)
+        if not location:
+            return None
+        
+        # seek to the data
+        self._file.seek(location[0])
+        
+        # read in the chunk data header
+        bytes = self._file.read(4)
+        data_length = struct.unpack(">I", bytes)[0]
+        bytes = self._file.read(1)
+        compression = struct.unpack("B", bytes)[0]
+        
+        # figure out the compression
+        is_gzip = True
+        if compression == 1:
+            # gzip -- not used by the official client, but trivial to support here so...
+            is_gzip = True
+        elif compression == 2:
+            # deflate -- pure zlib stream
+            is_gzip = False
+        else:
+            # unsupported!
+            raise Exception("Unsupported chunk compression type: %i" % (compression,))
+        
+        # turn the rest of the data into a StringIO object
+        # (using data_length - 1, as we already read 1 byte for compression)
+        data = self._file.read(data_length - 1)
+        data = StringIO.StringIO(data)
+        
+        return NBTFileReader(data, is_gzip=is_gzip)