0

nbt.py now simply *reads files*, nothing more

This commit is contained in:
Aaron Griffith
2011-12-19 02:03:57 -05:00
parent a078b46274
commit 581ee0906a

View File

@@ -16,57 +16,54 @@
import gzip, zlib import gzip, zlib
import struct import struct
import StringIO import StringIO
import numpy import functools
from functools import wraps
# decorator that turns the first argument from a string into an open file # decorator that turns the first argument from a string into an open file
# handle # handle
def _file_loader(func): def _file_loader(func):
@wraps(func) @functools.wraps(func)
def wrapper(fileobj, *args): def wrapper(fileobj, *args):
if isinstance(fileobj, basestring): if isinstance(fileobj, basestring):
# Is actually a filename # Is actually a filename
fileobj = open(fileobj, 'rb',4096) fileobj = open(fileobj, 'rb', 4096)
return func(fileobj, *args) return func(fileobj, *args)
return wrapper return wrapper
@_file_loader @_file_loader
def load(fileobj): def load(fileobj):
"""Reads in the given file as NBT format, parses it, and returns the """Reads in the given file as NBT format, parses it, and returns the
result result as a (name, data) tuple.
""" """
return NBTFileReader(fileobj).read_all() return NBTFileReader(fileobj).read_all()
def load_from_region(filename, x, y, north_direction): @_file_loader
nbt = load_region(filename, north_direction).load_chunk(x, y) def load_region(fileobj):
if nbt is None: """Reads in the given file as a MCR region, and returns an object
return None ## return none. I think this is who we should indicate missing chunks for accessing the chunks inside."""
#raise IOError("No such chunk in region: (%i, %i)" % (x, y)) return MCRFileReader(fileobj)
return nbt.read_all()
class CorruptNBTError(Exception):
def load_region(filename, north_direction): """An exception raised when the NBTFileReader class encounters
return MCRFileReader(filename, north_direction) something unexpected in an NBT file."""
pass
# compile the unpacker's into a classes
_byte = struct.Struct("b")
_short = struct.Struct(">h")
_int = struct.Struct(">i")
_long = struct.Struct(">q")
_float = struct.Struct(">f")
_double = struct.Struct(">d")
_24bit_int = struct.Struct("B B B")
_unsigned_byte = struct.Struct("B")
_unsigned_int = struct.Struct(">I")
_chunk_header = struct.Struct(">I B")
class NBTFileReader(object): class NBTFileReader(object):
"""Low level class that reads the Named Binary Tag format used by Minecraft """Low level class that reads the Named Binary Tag format used by Minecraft
""" """
# compile the unpacker's into a classes
_byte = struct.Struct("b")
_short = struct.Struct(">h")
_int = struct.Struct(">i")
_long = struct.Struct(">q")
_float = struct.Struct(">f")
_double = struct.Struct(">d")
def __init__(self, fileobj, is_gzip=True): def __init__(self, fileobj, is_gzip=True):
"""Create a NBT parsing object with the given file-like
object. Setting is_gzip to False parses the file as a zlib
stream instead."""
if is_gzip: if is_gzip:
self._file = gzip.GzipFile(fileobj=fileobj, mode='rb') self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
else: else:
@@ -75,6 +72,21 @@ class NBTFileReader(object):
data = zlib.decompress(fileobj.read()) data = zlib.decompress(fileobj.read())
self._file = StringIO.StringIO(data) self._file = StringIO.StringIO(data)
# mapping of NBT type ids to functions to read them out
self._read_tagmap = {
0: self._read_tag_end,
1: self._read_tag_byte,
2: self._read_tag_short,
3: self._read_tag_int,
4: self._read_tag_long,
5: self._read_tag_float,
6: self._read_tag_double,
7: self._read_tag_byte_array,
8: self._read_tag_string,
9: self._read_tag_list,
10:self._read_tag_compound,
}
# These private methods read the payload only of the following types # These private methods read the payload only of the following types
def _read_tag_end(self): def _read_tag_end(self):
# Nothing to read # Nothing to read
@@ -82,32 +94,27 @@ class NBTFileReader(object):
def _read_tag_byte(self): def _read_tag_byte(self):
byte = self._file.read(1) byte = self._file.read(1)
return _byte.unpack(byte)[0] return self._byte.unpack(byte)[0]
def _read_tag_short(self): def _read_tag_short(self):
bytes = self._file.read(2) bytes = self._file.read(2)
global _short return self._short.unpack(bytes)[0]
return _short.unpack(bytes)[0]
def _read_tag_int(self): def _read_tag_int(self):
bytes = self._file.read(4) bytes = self._file.read(4)
global _int return self._int.unpack(bytes)[0]
return _int.unpack(bytes)[0]
def _read_tag_long(self): def _read_tag_long(self):
bytes = self._file.read(8) bytes = self._file.read(8)
global _long return self._long.unpack(bytes)[0]
return _long.unpack(bytes)[0]
def _read_tag_float(self): def _read_tag_float(self):
bytes = self._file.read(4) bytes = self._file.read(4)
global _float return self._float.unpack(bytes)[0]
return _float.unpack(bytes)[0]
def _read_tag_double(self): def _read_tag_double(self):
bytes = self._file.read(8) bytes = self._file.read(8)
global _double return self._double.unpack(bytes)[0]
return _double.unpack(bytes)[0]
def _read_tag_byte_array(self): def _read_tag_byte_array(self):
length = self._read_tag_int() length = self._read_tag_int()
@@ -116,10 +123,8 @@ class NBTFileReader(object):
def _read_tag_string(self): def _read_tag_string(self):
length = self._read_tag_short() length = self._read_tag_short()
# Read the string # Read the string
string = self._file.read(length) string = self._file.read(length)
# decode it and return # decode it and return
return string.decode("UTF-8") return string.decode("UTF-8")
@@ -127,21 +132,7 @@ class NBTFileReader(object):
tagid = self._read_tag_byte() tagid = self._read_tag_byte()
length = self._read_tag_int() length = self._read_tag_int()
read_tagmap = { read_method = self._read_tagmap[tagid]
0: self._read_tag_end,
1: self._read_tag_byte,
2: self._read_tag_short,
3: self._read_tag_int,
4: self._read_tag_long,
5: self._read_tag_float,
6: self._read_tag_double,
7: self._read_tag_byte_array,
8: self._read_tag_string,
9: self._read_tag_list,
10:self._read_tag_compound,
}
read_method = read_tagmap[tagid]
l = [] l = []
for _ in xrange(length): for _ in xrange(length):
l.append(read_method()) l.append(read_method())
@@ -158,27 +149,11 @@ class NBTFileReader(object):
break break
name = self._read_tag_string() name = self._read_tag_string()
read_tagmap = { payload = self._read_tagmap[tagtype]()
0: self._read_tag_end,
1: self._read_tag_byte,
2: self._read_tag_short,
3: self._read_tag_int,
4: self._read_tag_long,
5: self._read_tag_float,
6: self._read_tag_double,
7: self._read_tag_byte_array,
8: self._read_tag_string,
9: self._read_tag_list,
10:self._read_tag_compound,
}
payload = read_tagmap[tagtype]()
tags[name] = payload tags[name] = payload
return tags return tags
def read_all(self): def read_all(self):
"""Reads the entire file and returns (name, payload) """Reads the entire file and returns (name, payload)
name is the name of the root tag, and payload is a dictionary mapping name is the name of the root tag, and payload is a dictionary mapping
@@ -186,179 +161,74 @@ class NBTFileReader(object):
""" """
# Read tag type # Read tag type
tagtype = ord(self._file.read(1)) try:
if tagtype != 10: tagtype = ord(self._file.read(1))
raise Exception("Expected a tag compound") if tagtype != 10:
raise Exception("Expected a tag compound")
# Read the tag name
name = self._read_tag_string() # Read the tag name
name = self._read_tag_string()
payload = self._read_tag_compound() payload = self._read_tag_compound()
return name, payload return (name, payload)
except (struct.error, ValueError), e:
raise CorruptNBTError("could not parse nbt: %s" % (str(e),))
class CorruptRegionError(Exception):
"""An exception raised when the MCRFileReader class encounters an
error during region file parsing.
"""
pass
# For reference, the MCR format is outlined at # For reference, the MCR format is outlined at
# <http://www.minecraftwiki.net/wiki/Beta_Level_Format> # <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
class MCRFileReader(object): class MCRFileReader(object):
"""A class for reading chunk region files, as introduced in the """A class for reading chunk region files, as introduced in the
Beta 1.3 update. It provides functions for opening individual Beta 1.3 update. It provides functions for opening individual
chunks (as instances of NBTFileReader), getting chunk timestamps, chunks (as (name, data) tuples), getting chunk timestamps, and for
and for listing chunks contained in the file.""" listing chunks contained in the file.
"""
def __init__(self, filename, north_direction): _table_format = struct.Struct(">1024I")
_chunk_header_format = struct.Struct(">I B")
def __init__(self, fileobj):
"""This creates a region object from the given file-like
object. Chances are you want to use load_region instead."""
self._file = fileobj
# read in the location table
location_data = self._file.read(4096)
if not len(location_data) == 4096:
raise CorruptRegionError("invalid location table")
# read in the timestamp table
timestamp_data = self._file.read(4096)
if not len(timestamp_data) == 4096:
raise CorruptRegionError("invalid timestamp table")
# turn this data into a useful list
self._locations = self._table_format.unpack(location_data)
self._timestamps = self._table_format.unpack(timestamp_data)
def close(self):
"""Close the region file and free any resources associated
with keeping it open. Using this object after closing it
results in undefined behaviour.
"""
self._file.close()
self._file = None self._file = None
self._filename = filename
self.north_direction = north_direction
# cache used when the entire header tables are read in get_chunks()
self._locations = None
self._timestamps = None
self._chunks = None
def get_north_rotations(self):
if self.north_direction == 'upper-left':
return 3
elif self.north_direction == 'upper-right':
return 2
elif self.north_direction == 'lower-right':
return 1
elif self.north_direction == 'lower-left':
return 0
def _read_24bit_int(self):
"""Read in a 24-bit, big-endian int, used in the chunk
location table."""
ret = 0
bytes = self._file.read(3)
global _24bit_int
bytes = _24bit_int.unpack(bytes)
for i in xrange(3):
ret = ret << 8
ret += bytes[i]
return ret
def _read_chunk_location(self, x=None, y=None):
"""Read and return the (offset, length) of the given chunk
coordinate, or None if the requested chunk doesn't exist. x
and y must be between 0 and 31, or None. If they are None,
then there will be no file seek before doing the read."""
if x is not None and y is not None:
if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
raise ValueError("Chunk location out of range.")
# check for a cached value
if self._locations:
return self._locations[x + y * 32]
# go to the correct entry in the chunk location table
self._file.seek(4 * (x + y * 32))
try:
# 3-byte offset in 4KiB sectors
offset_sectors = self._read_24bit_int()
# 1-byte length in 4KiB sectors, rounded up
global _unsigned_byte
byte = self._file.read(1)
length_sectors = _unsigned_byte.unpack(byte)[0]
except (IndexError, struct.error):
# got a problem somewhere
return None
# check for empty chunks
if offset_sectors == 0 or length_sectors == 0:
return None
return (offset_sectors * 4096, length_sectors * 4096)
def _read_chunk_timestamp(self, x=None, y=None):
"""Read and return the last modification time of the given
chunk coordinate. x and y must be between 0 and 31, or
None. If they are, None, then there will be no file seek
before doing the read."""
if x is not None and y is not None:
if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
raise ValueError("Chunk location out of range.")
# check for a cached value
if self._timestamps:
return self._timestamps[x + y * 32]
# go to the correct entry in the chunk timestamp table
self._file.seek(4 * (x + y * 32) + 4096)
try:
bytes = self._file.read(4)
global _unsigned_int
timestamp = _unsigned_int.unpack(bytes)[0]
except (IndexError, struct.error):
return 0
return timestamp
def openfile(self):
#make sure we clean up
if self._file is None:
self._file = open(self._filename,'rb')
def closefile(self):
#make sure we clean up
if self._file is not None:
self._file.close()
self._file = None
def get_chunks(self): def get_chunks(self):
"""Return a list of all chunks contained in this region file, """Return an iterator of all chunks contained in this region
as a list of (x, y) coordinate tuples. To load these chunks, file, as (x, y) coordinate tuples. To load these chunks,
provide these coordinates to load_chunk().""" provide these coordinates to load_chunk()."""
if self._chunks is not None:
return self._chunks
if self._locations is None:
self.get_chunk_info()
self._chunks = []
for x in xrange(32): for x in xrange(32):
for y in xrange(32): for y in xrange(32):
if self._locations[x + y * 32] is not None: if self._locations[x + y * 32] >> 8 != 0:
self._chunks.append((x,y)) yield (x,y)
return self._chunks
def get_chunk_info(self,closeFile = True):
"""Preloads region header information."""
if self._locations:
return
self.openfile()
self._chunks = None
self._locations = [0]*32*32
self._timestamps = []
# go to the beginning of the file
self._file.seek(0)
# read chunk location table
locations_index = numpy.reshape(numpy.rot90(numpy.reshape(range(32*32),
(32, 32)), -self.get_north_rotations()), -1)
for i in locations_index:
self._locations[i] = self._read_chunk_location()
# read chunk timestamp table
timestamp_append = self._timestamps.append
for _ in xrange(32*32):
timestamp_append(self._read_chunk_timestamp())
self._timestamps = numpy.reshape(numpy.rot90(numpy.reshape(
self._timestamps, (32,32)),self.get_north_rotations()), -1)
if closeFile:
self.closefile()
return
def get_chunk_timestamp(self, x, y): def get_chunk_timestamp(self, x, y):
"""Return the given chunk's modification time. If the given """Return the given chunk's modification time. If the given
chunk doesn't exist, this number may be nonsense. Like chunk doesn't exist, this number may be nonsense. Like
@@ -366,21 +236,16 @@ class MCRFileReader(object):
""" """
x = x % 32 x = x % 32
y = y % 32 y = y % 32
if self._timestamps is None:
self.get_chunk_info()
return self._timestamps[x + y * 32] return self._timestamps[x + y * 32]
def chunkExists(self, x, y): def chunk_exists(self, x, y):
"""Determines if a chunk exists without triggering loading of the backend data""" """Determines if a chunk exists."""
x = x % 32 x = x % 32
y = y % 32 y = y % 32
if self._locations is None: return self._locations[x + y * 32] >> 8 != 0
self.get_chunk_info()
location = self._locations[x + y * 32]
return location is not None
def load_chunk(self, x, y,closeFile=True): def load_chunk(self, x, y):
"""Return a NBTFileReader instance for the given chunk, or """Return a (name, data) tuple for the given chunk, or
None if the given chunk doesn't exist in this region file. If None if the given chunk doesn't exist in this region file. If
you provide an x or y not between 0 and 31, it will be you provide an x or y not between 0 and 31, it will be
modulo'd into this range (x % 32, etc.) This is so you can modulo'd into this range (x % 32, etc.) This is so you can
@@ -388,21 +253,21 @@ class MCRFileReader(object):
have the chunks load out of regions properly.""" have the chunks load out of regions properly."""
x = x % 32 x = x % 32
y = y % 32 y = y % 32
if self._locations is None:
self.get_chunk_info()
location = self._locations[x + y * 32] location = self._locations[x + y * 32]
if location is None: offset = (location >> 8) * 4096;
sectors = location & 0xff;
if offset == 0:
return None return None
self.openfile()
# seek to the data # seek to the data
self._file.seek(location[0]) self._file.seek(offset)
# read in the chunk data header # read in the chunk data header
bytes = self._file.read(5) header = self._file.read(5)
data_length,compression = _chunk_header.unpack(bytes) if len(header) != 5:
raise CorruptChunkError("chunk header is invalid")
data_length, compression = self._chunk_header_format.unpack(header)
# figure out the compression # figure out the compression
is_gzip = True is_gzip = True
@@ -414,12 +279,13 @@ class MCRFileReader(object):
is_gzip = False is_gzip = False
else: else:
# unsupported! # unsupported!
raise Exception("Unsupported chunk compression type: %i" % (compression)) raise CorruptRegionError("unsupported chunk compression type: %i" % (compression))
# turn the rest of the data into a StringIO object # turn the rest of the data into a StringIO object
# (using data_length - 1, as we already read 1 byte for compression) # (using data_length - 1, as we already read 1 byte for compression)
data = self._file.read(data_length - 1) data = self._file.read(data_length - 1)
if len(data) != data_length - 1:
raise CorruptRegionError("chunk length is invalid")
data = StringIO.StringIO(data) data = StringIO.StringIO(data)
if closeFile: return NBTFileReader(data, is_gzip=is_gzip).read_all()
self.closefile()
return NBTFileReader(data, is_gzip=is_gzip)