diff --git a/overviewer_core/nbt.py b/overviewer_core/nbt.py index 97160a0..2d0c946 100644 --- a/overviewer_core/nbt.py +++ b/overviewer_core/nbt.py @@ -16,57 +16,54 @@ import gzip, zlib import struct import StringIO -import numpy -from functools import wraps +import functools # decorator that turns the first argument from a string into an open file # handle def _file_loader(func): - @wraps(func) + @functools.wraps(func) def wrapper(fileobj, *args): if isinstance(fileobj, basestring): # Is actually a filename - fileobj = open(fileobj, 'rb',4096) + fileobj = open(fileobj, 'rb', 4096) return func(fileobj, *args) return wrapper @_file_loader def load(fileobj): """Reads in the given file as NBT format, parses it, and returns the - result - + result as a (name, data) tuple. """ return NBTFileReader(fileobj).read_all() -def load_from_region(filename, x, y, north_direction): - nbt = load_region(filename, north_direction).load_chunk(x, y) - if nbt is None: - return None ## return none. I think this is who we should indicate missing chunks - #raise IOError("No such chunk in region: (%i, %i)" % (x, y)) - return nbt.read_all() - -def load_region(filename, north_direction): - return MCRFileReader(filename, north_direction) - - -# compile the unpacker's into a classes -_byte = struct.Struct("b") -_short = struct.Struct(">h") -_int = struct.Struct(">i") -_long = struct.Struct(">q") -_float = struct.Struct(">f") -_double = struct.Struct(">d") +@_file_loader +def load_region(fileobj): + """Reads in the given file as a MCR region, and returns an object + for accessing the chunks inside.""" + return MCRFileReader(fileobj) + +class CorruptNBTError(Exception): + """An exception raised when the NBTFileReader class encounters + something unexpected in an NBT file.""" + pass -_24bit_int = struct.Struct("B B B") -_unsigned_byte = struct.Struct("B") -_unsigned_int = struct.Struct(">I") -_chunk_header = struct.Struct(">I B") - class NBTFileReader(object): """Low level class that reads the Named Binary Tag format used by Minecraft """ + + # compile the unpacker's into a classes + _byte = struct.Struct("b") + _short = struct.Struct(">h") + _int = struct.Struct(">i") + _long = struct.Struct(">q") + _float = struct.Struct(">f") + _double = struct.Struct(">d") + def __init__(self, fileobj, is_gzip=True): + """Create a NBT parsing object with the given file-like + object. Setting is_gzip to False parses the file as a zlib + stream instead.""" if is_gzip: self._file = gzip.GzipFile(fileobj=fileobj, mode='rb') else: @@ -75,6 +72,21 @@ class NBTFileReader(object): data = zlib.decompress(fileobj.read()) self._file = StringIO.StringIO(data) + # mapping of NBT type ids to functions to read them out + self._read_tagmap = { + 0: self._read_tag_end, + 1: self._read_tag_byte, + 2: self._read_tag_short, + 3: self._read_tag_int, + 4: self._read_tag_long, + 5: self._read_tag_float, + 6: self._read_tag_double, + 7: self._read_tag_byte_array, + 8: self._read_tag_string, + 9: self._read_tag_list, + 10:self._read_tag_compound, + } + # These private methods read the payload only of the following types def _read_tag_end(self): # Nothing to read @@ -82,32 +94,27 @@ class NBTFileReader(object): def _read_tag_byte(self): byte = self._file.read(1) - return _byte.unpack(byte)[0] + return self._byte.unpack(byte)[0] def _read_tag_short(self): bytes = self._file.read(2) - global _short - return _short.unpack(bytes)[0] + return self._short.unpack(bytes)[0] def _read_tag_int(self): bytes = self._file.read(4) - global _int - return _int.unpack(bytes)[0] + return self._int.unpack(bytes)[0] def _read_tag_long(self): bytes = self._file.read(8) - global _long - return _long.unpack(bytes)[0] + return self._long.unpack(bytes)[0] def _read_tag_float(self): bytes = self._file.read(4) - global _float - return _float.unpack(bytes)[0] + return self._float.unpack(bytes)[0] def _read_tag_double(self): bytes = self._file.read(8) - global _double - return _double.unpack(bytes)[0] + return self._double.unpack(bytes)[0] def _read_tag_byte_array(self): length = self._read_tag_int() @@ -116,10 +123,8 @@ class NBTFileReader(object): def _read_tag_string(self): length = self._read_tag_short() - # Read the string string = self._file.read(length) - # decode it and return return string.decode("UTF-8") @@ -127,21 +132,7 @@ class NBTFileReader(object): tagid = self._read_tag_byte() length = self._read_tag_int() - read_tagmap = { - 0: self._read_tag_end, - 1: self._read_tag_byte, - 2: self._read_tag_short, - 3: self._read_tag_int, - 4: self._read_tag_long, - 5: self._read_tag_float, - 6: self._read_tag_double, - 7: self._read_tag_byte_array, - 8: self._read_tag_string, - 9: self._read_tag_list, - 10:self._read_tag_compound, - } - - read_method = read_tagmap[tagid] + read_method = self._read_tagmap[tagid] l = [] for _ in xrange(length): l.append(read_method()) @@ -158,27 +149,11 @@ class NBTFileReader(object): break name = self._read_tag_string() - read_tagmap = { - 0: self._read_tag_end, - 1: self._read_tag_byte, - 2: self._read_tag_short, - 3: self._read_tag_int, - 4: self._read_tag_long, - 5: self._read_tag_float, - 6: self._read_tag_double, - 7: self._read_tag_byte_array, - 8: self._read_tag_string, - 9: self._read_tag_list, - 10:self._read_tag_compound, - } - payload = read_tagmap[tagtype]() - + payload = self._read_tagmap[tagtype]() tags[name] = payload return tags - - - + def read_all(self): """Reads the entire file and returns (name, payload) name is the name of the root tag, and payload is a dictionary mapping @@ -186,179 +161,74 @@ class NBTFileReader(object): """ # Read tag type - tagtype = ord(self._file.read(1)) - if tagtype != 10: - raise Exception("Expected a tag compound") - - # Read the tag name - name = self._read_tag_string() - - payload = self._read_tag_compound() - - return name, payload + try: + tagtype = ord(self._file.read(1)) + if tagtype != 10: + raise Exception("Expected a tag compound") + + # Read the tag name + name = self._read_tag_string() + payload = self._read_tag_compound() + + return (name, payload) + except (struct.error, ValueError), e: + raise CorruptNBTError("could not parse nbt: %s" % (str(e),)) +class CorruptRegionError(Exception): + """An exception raised when the MCRFileReader class encounters an + error during region file parsing. + """ + pass # For reference, the MCR format is outlined at # class MCRFileReader(object): """A class for reading chunk region files, as introduced in the Beta 1.3 update. It provides functions for opening individual - chunks (as instances of NBTFileReader), getting chunk timestamps, - and for listing chunks contained in the file.""" + chunks (as (name, data) tuples), getting chunk timestamps, and for + listing chunks contained in the file. + """ - def __init__(self, filename, north_direction): + _table_format = struct.Struct(">1024I") + _chunk_header_format = struct.Struct(">I B") + + def __init__(self, fileobj): + """This creates a region object from the given file-like + object. Chances are you want to use load_region instead.""" + self._file = fileobj + + # read in the location table + location_data = self._file.read(4096) + if not len(location_data) == 4096: + raise CorruptRegionError("invalid location table") + # read in the timestamp table + timestamp_data = self._file.read(4096) + if not len(timestamp_data) == 4096: + raise CorruptRegionError("invalid timestamp table") + + # turn this data into a useful list + self._locations = self._table_format.unpack(location_data) + self._timestamps = self._table_format.unpack(timestamp_data) + + def close(self): + """Close the region file and free any resources associated + with keeping it open. Using this object after closing it + results in undefined behaviour. + """ + + self._file.close() self._file = None - self._filename = filename - self.north_direction = north_direction - # cache used when the entire header tables are read in get_chunks() - self._locations = None - self._timestamps = None - self._chunks = None - - def get_north_rotations(self): - if self.north_direction == 'upper-left': - return 3 - elif self.north_direction == 'upper-right': - return 2 - elif self.north_direction == 'lower-right': - return 1 - elif self.north_direction == 'lower-left': - return 0 - - def _read_24bit_int(self): - """Read in a 24-bit, big-endian int, used in the chunk - location table.""" - - ret = 0 - bytes = self._file.read(3) - global _24bit_int - bytes = _24bit_int.unpack(bytes) - for i in xrange(3): - ret = ret << 8 - ret += bytes[i] - - return ret - - def _read_chunk_location(self, x=None, y=None): - """Read and return the (offset, length) of the given chunk - coordinate, or None if the requested chunk doesn't exist. x - and y must be between 0 and 31, or None. If they are None, - then there will be no file seek before doing the read.""" - - if x is not None and y is not None: - if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32): - raise ValueError("Chunk location out of range.") - - # check for a cached value - if self._locations: - return self._locations[x + y * 32] - - # go to the correct entry in the chunk location table - self._file.seek(4 * (x + y * 32)) - - try: - # 3-byte offset in 4KiB sectors - offset_sectors = self._read_24bit_int() - - # 1-byte length in 4KiB sectors, rounded up - global _unsigned_byte - byte = self._file.read(1) - length_sectors = _unsigned_byte.unpack(byte)[0] - except (IndexError, struct.error): - # got a problem somewhere - return None - - # check for empty chunks - if offset_sectors == 0 or length_sectors == 0: - return None - - return (offset_sectors * 4096, length_sectors * 4096) - - def _read_chunk_timestamp(self, x=None, y=None): - """Read and return the last modification time of the given - chunk coordinate. x and y must be between 0 and 31, or - None. If they are, None, then there will be no file seek - before doing the read.""" - - if x is not None and y is not None: - if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32): - raise ValueError("Chunk location out of range.") - - # check for a cached value - if self._timestamps: - return self._timestamps[x + y * 32] - - # go to the correct entry in the chunk timestamp table - self._file.seek(4 * (x + y * 32) + 4096) - - try: - bytes = self._file.read(4) - global _unsigned_int - timestamp = _unsigned_int.unpack(bytes)[0] - except (IndexError, struct.error): - return 0 - - return timestamp - - def openfile(self): - #make sure we clean up - if self._file is None: - self._file = open(self._filename,'rb') - - def closefile(self): - #make sure we clean up - if self._file is not None: - self._file.close() - self._file = None def get_chunks(self): - """Return a list of all chunks contained in this region file, - as a list of (x, y) coordinate tuples. To load these chunks, + """Return an iterator of all chunks contained in this region + file, as (x, y) coordinate tuples. To load these chunks, provide these coordinates to load_chunk().""" - if self._chunks is not None: - return self._chunks - if self._locations is None: - self.get_chunk_info() - self._chunks = [] for x in xrange(32): for y in xrange(32): - if self._locations[x + y * 32] is not None: - self._chunks.append((x,y)) - return self._chunks + if self._locations[x + y * 32] >> 8 != 0: + yield (x,y) - def get_chunk_info(self,closeFile = True): - """Preloads region header information.""" - - if self._locations: - return - - self.openfile() - - self._chunks = None - self._locations = [0]*32*32 - self._timestamps = [] - - # go to the beginning of the file - self._file.seek(0) - - # read chunk location table - locations_index = numpy.reshape(numpy.rot90(numpy.reshape(range(32*32), - (32, 32)), -self.get_north_rotations()), -1) - for i in locations_index: - self._locations[i] = self._read_chunk_location() - - # read chunk timestamp table - timestamp_append = self._timestamps.append - for _ in xrange(32*32): - timestamp_append(self._read_chunk_timestamp()) - self._timestamps = numpy.reshape(numpy.rot90(numpy.reshape( - self._timestamps, (32,32)),self.get_north_rotations()), -1) - - if closeFile: - self.closefile() - return - def get_chunk_timestamp(self, x, y): """Return the given chunk's modification time. If the given chunk doesn't exist, this number may be nonsense. Like @@ -366,21 +236,16 @@ class MCRFileReader(object): """ x = x % 32 y = y % 32 - if self._timestamps is None: - self.get_chunk_info() return self._timestamps[x + y * 32] - def chunkExists(self, x, y): - """Determines if a chunk exists without triggering loading of the backend data""" + def chunk_exists(self, x, y): + """Determines if a chunk exists.""" x = x % 32 y = y % 32 - if self._locations is None: - self.get_chunk_info() - location = self._locations[x + y * 32] - return location is not None + return self._locations[x + y * 32] >> 8 != 0 - def load_chunk(self, x, y,closeFile=True): - """Return a NBTFileReader instance for the given chunk, or + def load_chunk(self, x, y): + """Return a (name, data) tuple for the given chunk, or None if the given chunk doesn't exist in this region file. If you provide an x or y not between 0 and 31, it will be modulo'd into this range (x % 32, etc.) This is so you can @@ -388,21 +253,21 @@ class MCRFileReader(object): have the chunks load out of regions properly.""" x = x % 32 y = y % 32 - if self._locations is None: - self.get_chunk_info() - location = self._locations[x + y * 32] - if location is None: + offset = (location >> 8) * 4096; + sectors = location & 0xff; + + if offset == 0: return None - - self.openfile() # seek to the data - self._file.seek(location[0]) + self._file.seek(offset) # read in the chunk data header - bytes = self._file.read(5) - data_length,compression = _chunk_header.unpack(bytes) + header = self._file.read(5) + if len(header) != 5: + raise CorruptChunkError("chunk header is invalid") + data_length, compression = self._chunk_header_format.unpack(header) # figure out the compression is_gzip = True @@ -414,12 +279,13 @@ class MCRFileReader(object): is_gzip = False else: # unsupported! - raise Exception("Unsupported chunk compression type: %i" % (compression)) + raise CorruptRegionError("unsupported chunk compression type: %i" % (compression)) + # turn the rest of the data into a StringIO object # (using data_length - 1, as we already read 1 byte for compression) data = self._file.read(data_length - 1) + if len(data) != data_length - 1: + raise CorruptRegionError("chunk length is invalid") data = StringIO.StringIO(data) - if closeFile: - self.closefile() - return NBTFileReader(data, is_gzip=is_gzip) + return NBTFileReader(data, is_gzip=is_gzip).read_all()