nbt.py now simply *reads files*, nothing more

2011-12-19 02:03:57 -05:00
parent a078b46274
commit 581ee0906a
1 changed files with 121 additions and 255 deletions
--- a/overviewer_core/nbt.py
+++ b/overviewer_core/nbt.py
@@ -16,57 +16,54 @@
 import gzip, zlib
 import struct
 import StringIO
-import numpy
+import functools
 from functools import wraps
 # decorator that turns the first argument from a string into an open file
 # handle
 def _file_loader(func):
-    @wraps(func)
+    @functools.wraps(func)
    def wrapper(fileobj, *args):
        if isinstance(fileobj, basestring):
            # Is actually a filename
-            fileobj = open(fileobj, 'rb',4096)
+            fileobj = open(fileobj, 'rb', 4096)
        return func(fileobj, *args)
    return wrapper
@_file_loader
 def load(fileobj):
    """Reads in the given file as NBT format, parses it, and returns the
-    result
+    result as a (name, data) tuple.    
    """
    return NBTFileReader(fileobj).read_all()
-def load_from_region(filename, x, y, north_direction):
+@_file_loader
-    nbt = load_region(filename, north_direction).load_chunk(x, y)
+def load_region(fileobj):
-    if nbt is None:
+    """Reads in the given file as a MCR region, and returns an object
-        return None ## return none.  I think this is who we should indicate missing chunks
+    for accessing the chunks inside."""
-        #raise IOError("No such chunk in region: (%i, %i)" % (x, y))     
+    return MCRFileReader(fileobj)
-    return nbt.read_all()
+
-  
+class CorruptNBTError(Exception):
-def load_region(filename, north_direction):
+    """An exception raised when the NBTFileReader class encounters
-    return MCRFileReader(filename, north_direction)
+    something unexpected in an NBT file."""
-  
+    pass
 # compile the unpacker's into a classes
 _byte   = struct.Struct("b")
 _short  = struct.Struct(">h")
 _int    = struct.Struct(">i")
 _long   = struct.Struct(">q")
 _float  = struct.Struct(">f")
 _double = struct.Struct(">d") 
 _24bit_int = struct.Struct("B B B")
 _unsigned_byte = struct.Struct("B")
 _unsigned_int = struct.Struct(">I")
 _chunk_header = struct.Struct(">I B")
 class NBTFileReader(object):
    """Low level class that reads the Named Binary Tag format used by Minecraft
    """
    # compile the unpacker's into a classes
    _byte   = struct.Struct("b")
    _short  = struct.Struct(">h")
    _int    = struct.Struct(">i")
    _long   = struct.Struct(">q")
    _float  = struct.Struct(">f")
    _double = struct.Struct(">d") 
    def __init__(self, fileobj, is_gzip=True):
        """Create a NBT parsing object with the given file-like
        object. Setting is_gzip to False parses the file as a zlib
        stream instead."""
        if is_gzip:
            self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
        else:
@@ -75,6 +72,21 @@ class NBTFileReader(object):
            data = zlib.decompress(fileobj.read())
            self._file = StringIO.StringIO(data)
        # mapping of NBT type ids to functions to read them out
        self._read_tagmap = {
            0: self._read_tag_end,
            1: self._read_tag_byte,
            2: self._read_tag_short,
            3: self._read_tag_int,
            4: self._read_tag_long,
            5: self._read_tag_float,
            6: self._read_tag_double,
            7: self._read_tag_byte_array,
            8: self._read_tag_string,
            9: self._read_tag_list,
            10:self._read_tag_compound,
        }
    # These private methods read the payload only of the following types
    def _read_tag_end(self):
        # Nothing to read
@@ -82,32 +94,27 @@ class NBTFileReader(object):
    def _read_tag_byte(self):
        byte = self._file.read(1)
-        return _byte.unpack(byte)[0]
+        return self._byte.unpack(byte)[0]
    def _read_tag_short(self):
        bytes = self._file.read(2)
-        global _short
+        return self._short.unpack(bytes)[0]
        return _short.unpack(bytes)[0]
    def _read_tag_int(self):
        bytes = self._file.read(4)
-        global _int
+        return self._int.unpack(bytes)[0]
        return _int.unpack(bytes)[0]
    def _read_tag_long(self):
        bytes = self._file.read(8)
-        global _long
+        return self._long.unpack(bytes)[0]
        return _long.unpack(bytes)[0]
    def _read_tag_float(self):
        bytes = self._file.read(4)
-        global _float
+        return self._float.unpack(bytes)[0]
        return _float.unpack(bytes)[0]
    def _read_tag_double(self):
        bytes = self._file.read(8)
-        global _double
+        return self._double.unpack(bytes)[0]
        return _double.unpack(bytes)[0]
    def _read_tag_byte_array(self):
        length = self._read_tag_int()
@@ -116,10 +123,8 @@ class NBTFileReader(object):
    def _read_tag_string(self):
        length = self._read_tag_short()
        # Read the string
        string = self._file.read(length)
        # decode it and return
        return string.decode("UTF-8")
@@ -127,21 +132,7 @@ class NBTFileReader(object):
        tagid = self._read_tag_byte()
        length = self._read_tag_int()
-        read_tagmap = {
+        read_method = self._read_tagmap[tagid]
                0: self._read_tag_end,
                1: self._read_tag_byte,
                2: self._read_tag_short,
                3: self._read_tag_int,
                4: self._read_tag_long,
                5: self._read_tag_float,
                6: self._read_tag_double,
                7: self._read_tag_byte_array,
                8: self._read_tag_string,
                9: self._read_tag_list,
                10:self._read_tag_compound,
                }
        read_method = read_tagmap[tagid]
        l = []
        for _ in xrange(length):
            l.append(read_method())
@@ -158,27 +149,11 @@ class NBTFileReader(object):
                break
            name = self._read_tag_string()
-            read_tagmap = {
+            payload = self._read_tagmap[tagtype]()
                    0: self._read_tag_end,
                    1: self._read_tag_byte,
                    2: self._read_tag_short,
                    3: self._read_tag_int,
                    4: self._read_tag_long,
                    5: self._read_tag_float,
                    6: self._read_tag_double,
                    7: self._read_tag_byte_array,
                    8: self._read_tag_string,
                    9: self._read_tag_list,
                    10:self._read_tag_compound,
                    }
            payload = read_tagmap[tagtype]()
            tags[name] = payload
        return tags
-
+    
    def read_all(self):
        """Reads the entire file and returns (name, payload)
        name is the name of the root tag, and payload is a dictionary mapping
@@ -186,179 +161,74 @@ class NBTFileReader(object):
        """
        # Read tag type
-        tagtype = ord(self._file.read(1))
+        try:
-        if tagtype != 10:
+            tagtype = ord(self._file.read(1))
-            raise Exception("Expected a tag compound")
+            if tagtype != 10:
-
+                raise Exception("Expected a tag compound")
-        # Read the tag name
+            
-        name = self._read_tag_string()
+            # Read the tag name
-
+            name = self._read_tag_string()
-        payload = self._read_tag_compound()
+            payload = self._read_tag_compound()
-
+            
-        return name, payload
+            return (name, payload)
        except (struct.error, ValueError), e:
            raise CorruptNBTError("could not parse nbt: %s" % (str(e),))
 class CorruptRegionError(Exception):
    """An exception raised when the MCRFileReader class encounters an
    error during region file parsing.
    """
    pass
 # For reference, the MCR format is outlined at
 # <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
 class MCRFileReader(object):
    """A class for reading chunk region files, as introduced in the
    Beta 1.3 update. It provides functions for opening individual
-    chunks (as instances of NBTFileReader), getting chunk timestamps,
+    chunks (as (name, data) tuples), getting chunk timestamps, and for
-    and for listing chunks contained in the file."""
+    listing chunks contained in the file.
    """
-    def __init__(self, filename, north_direction):
+    _table_format = struct.Struct(">1024I")
    _chunk_header_format = struct.Struct(">I B")
    def __init__(self, fileobj):
        """This creates a region object from the given file-like
        object. Chances are you want to use load_region instead."""
        self._file = fileobj
        # read in the location table
        location_data = self._file.read(4096)
        if not len(location_data) == 4096:
            raise CorruptRegionError("invalid location table")
        # read in the timestamp table
        timestamp_data = self._file.read(4096)
        if not len(timestamp_data) == 4096:
            raise CorruptRegionError("invalid timestamp table")
        # turn this data into a useful list
        self._locations = self._table_format.unpack(location_data)
        self._timestamps = self._table_format.unpack(timestamp_data)
    def close(self):
        """Close the region file and free any resources associated
        with keeping it open. Using this object after closing it
        results in undefined behaviour.
        """
        self._file.close()
        self._file = None
        self._filename = filename
        self.north_direction = north_direction
        # cache used when the entire header tables are read in get_chunks()
        self._locations = None
        self._timestamps = None
        self._chunks = None
    def get_north_rotations(self):
        if self.north_direction == 'upper-left':
            return 3
        elif self.north_direction == 'upper-right':
            return 2
        elif self.north_direction == 'lower-right':
            return 1
        elif self.north_direction == 'lower-left':
            return 0
    def _read_24bit_int(self):
        """Read in a 24-bit, big-endian int, used in the chunk
        location table."""
        ret = 0
        bytes = self._file.read(3)
        global _24bit_int
        bytes = _24bit_int.unpack(bytes)
        for i in xrange(3):
            ret = ret << 8
            ret += bytes[i]
        return ret
    def _read_chunk_location(self, x=None, y=None):
        """Read and return the (offset, length) of the given chunk
        coordinate, or None if the requested chunk doesn't exist. x
        and y must be between 0 and 31, or None. If they are None,
        then there will be no file seek before doing the read."""
        if x is not None and y is not None:
            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
                raise ValueError("Chunk location out of range.")
            # check for a cached value
            if self._locations:
                return self._locations[x + y * 32]
            # go to the correct entry in the chunk location table
            self._file.seek(4 * (x + y * 32))
        try:
            # 3-byte offset in 4KiB sectors
            offset_sectors = self._read_24bit_int()
            # 1-byte length in 4KiB sectors, rounded up
            global _unsigned_byte
            byte = self._file.read(1)
            length_sectors = _unsigned_byte.unpack(byte)[0]
        except (IndexError, struct.error):
            # got a problem somewhere
            return None
        # check for empty chunks
        if offset_sectors == 0 or length_sectors == 0:
            return None
        return (offset_sectors * 4096, length_sectors * 4096)
    def _read_chunk_timestamp(self, x=None, y=None):
        """Read and return the last modification time of the given
        chunk coordinate. x and y must be between 0 and 31, or
        None. If they are, None, then there will be no file seek
        before doing the read."""
        if x is not None and y is not None:
            if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
                raise ValueError("Chunk location out of range.")
            # check for a cached value
            if self._timestamps:
                return self._timestamps[x + y * 32]
            # go to the correct entry in the chunk timestamp table
            self._file.seek(4 * (x + y * 32) + 4096)
        try:
            bytes = self._file.read(4)
            global _unsigned_int
            timestamp = _unsigned_int.unpack(bytes)[0]
        except (IndexError, struct.error):
            return 0
        return timestamp
    def openfile(self):
        #make sure we clean up
        if self._file is None:
            self._file = open(self._filename,'rb')   
    def closefile(self):
        #make sure we clean up
        if self._file is not None:
            self._file.close()
            self._file =  None
    def get_chunks(self):    
-        """Return a list of all chunks contained in this region file,
+        """Return an iterator of all chunks contained in this region
-        as a list of (x, y) coordinate tuples. To load these chunks,
+        file, as (x, y) coordinate tuples. To load these chunks,
        provide these coordinates to load_chunk()."""
        if self._chunks is not None:
            return self._chunks
        if self._locations is None:
            self.get_chunk_info()       
        self._chunks = [] 
        for x in xrange(32): 
            for y in xrange(32): 
-                if self._locations[x + y * 32] is not None:
+                if self._locations[x + y * 32] >> 8 != 0:
-                    self._chunks.append((x,y))
+                    yield (x,y)
        return self._chunks
    def get_chunk_info(self,closeFile = True):
        """Preloads region header information."""
        if self._locations:
            return
        self.openfile()
        self._chunks = None
        self._locations = [0]*32*32
        self._timestamps = []
        # go to the beginning of the file
        self._file.seek(0)        
        # read chunk location table
        locations_index = numpy.reshape(numpy.rot90(numpy.reshape(range(32*32),
                (32, 32)), -self.get_north_rotations()), -1)
        for i in locations_index:
            self._locations[i] = self._read_chunk_location()
        # read chunk timestamp table
        timestamp_append = self._timestamps.append
        for _ in xrange(32*32): 
            timestamp_append(self._read_chunk_timestamp())
        self._timestamps = numpy.reshape(numpy.rot90(numpy.reshape(
                self._timestamps, (32,32)),self.get_north_rotations()), -1)
        if closeFile:        
            self.closefile()
        return
    def get_chunk_timestamp(self, x, y):
        """Return the given chunk's modification time. If the given
        chunk doesn't exist, this number may be nonsense. Like
@@ -366,21 +236,16 @@ class MCRFileReader(object):
        """
        x = x % 32
        y = y % 32        
        if self._timestamps is None:
            self.get_chunk_info() 
        return self._timestamps[x + y * 32]   
-    def chunkExists(self, x, y):
+    def chunk_exists(self, x, y):
-        """Determines if a chunk exists without triggering loading of the backend data"""
+        """Determines if a chunk exists."""
        x = x % 32
        y = y % 32
-        if self._locations is None:
+        return self._locations[x + y * 32] >> 8 != 0
            self.get_chunk_info()
        location = self._locations[x + y * 32]
        return location is not None        
-    def load_chunk(self, x, y,closeFile=True):
+    def load_chunk(self, x, y):
-        """Return a NBTFileReader instance for the given chunk, or
+        """Return a (name, data) tuple for the given chunk, or
        None if the given chunk doesn't exist in this region file. If
        you provide an x or y not between 0 and 31, it will be
        modulo'd into this range (x % 32, etc.) This is so you can
@@ -388,21 +253,21 @@ class MCRFileReader(object):
        have the chunks load out of regions properly."""
        x = x % 32
        y = y % 32
        if self._locations is None:
            self.get_chunk_info()   
        location = self._locations[x + y * 32]
-        if location is None:
+        offset = (location >> 8) * 4096;
        sectors = location & 0xff;
        if offset == 0:
            return None
        self.openfile()
        # seek to the data
-        self._file.seek(location[0])
+        self._file.seek(offset)
        # read in the chunk data header
-        bytes = self._file.read(5)        
+        header = self._file.read(5)
-        data_length,compression =  _chunk_header.unpack(bytes)
+        if len(header) != 5:
            raise CorruptChunkError("chunk header is invalid")
        data_length, compression =  self._chunk_header_format.unpack(header)
        # figure out the compression
        is_gzip = True
@@ -414,12 +279,13 @@ class MCRFileReader(object):
            is_gzip = False
        else:
            # unsupported!
-            raise Exception("Unsupported chunk compression type: %i" % (compression))
+            raise CorruptRegionError("unsupported chunk compression type: %i" % (compression))
        # turn the rest of the data into a StringIO object
        # (using data_length - 1, as we already read 1 byte for compression)
        data = self._file.read(data_length - 1)
        if len(data) != data_length - 1:
            raise CorruptRegionError("chunk length is invalid")
        data = StringIO.StringIO(data)
-        if closeFile:        
+        return NBTFileReader(data, is_gzip=is_gzip).read_all()
            self.closefile()        
        return NBTFileReader(data, is_gzip=is_gzip)