0

Switched from struct.unpack (module) -> Struct.unpack (class), it compiles the format string and reduces parsing costs. Coalesced a

few unpack calls into a compound unpack call.
Moved the functionality to get a list of valid chunks into get_chunks out from get_chunk_info.
This commit is contained in:
Xon
2011-03-23 16:44:27 +08:00
parent c1b7b12592
commit dbdd5d0fc8

741
nbt.py
View File

@@ -1,356 +1,385 @@
# This file is part of the Minecraft Overviewer. # This file is part of the Minecraft Overviewer.
# #
# Minecraft Overviewer is free software: you can redistribute it and/or # Minecraft Overviewer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published # modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License, or (at # by the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version. # your option) any later version.
# #
# Minecraft Overviewer is distributed in the hope that it will be useful, # Minecraft Overviewer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details. # Public License for more details.
# #
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with the Overviewer. If not, see <http://www.gnu.org/licenses/>. # with the Overviewer. If not, see <http://www.gnu.org/licenses/>.
import gzip, zlib import gzip, zlib
import struct import struct
import StringIO import StringIO
import os import os
# decorator to handle filename or object as first parameter # decorator to handle filename or object as first parameter
def _file_loader(func): def _file_loader(func):
def wrapper(fileobj, *args): def wrapper(fileobj, *args):
if isinstance(fileobj, basestring): if isinstance(fileobj, basestring):
if not os.path.isfile(fileobj): if not os.path.isfile(fileobj):
return None return None
# Is actually a filename # Is actually a filename
fileobj = open(fileobj, 'rb') fileobj = open(fileobj, 'rb',4096)
return func(fileobj, *args) return func(fileobj, *args)
return wrapper return wrapper
@_file_loader @_file_loader
def load(fileobj): def load(fileobj):
return NBTFileReader(fileobj).read_all() return NBTFileReader(fileobj).read_all()
def load_from_region(filename, x, y): def load_from_region(filename, x, y):
nbt = load_region(filename).load_chunk(x, y) nbt = load_region(filename).load_chunk(x, y)
if nbt is None: if nbt is None:
return None ## return none. I think this is who we should indicate missing chunks return None ## return none. I think this is who we should indicate missing chunks
#raise IOError("No such chunk in region: (%i, %i)" % (x, y)) #raise IOError("No such chunk in region: (%i, %i)" % (x, y))
return nbt.read_all() return nbt.read_all()
def load_region(filename): def load_region(filename):
return MCRFileReader(filename) return MCRFileReader(filename)
class NBTFileReader(object):
def __init__(self, fileobj, is_gzip=True): # compile the unpacker's into a classes
if is_gzip: _byte = struct.Struct("b")
self._file = gzip.GzipFile(fileobj=fileobj, mode='rb') _short = struct.Struct(">h")
else: _int = struct.Struct(">i")
# pure zlib stream -- maybe later replace this with _long = struct.Struct(">q")
# a custom zlib file object? _float = struct.Struct(">f")
data = zlib.decompress(fileobj.read()) _double = struct.Struct(">d")
self._file = StringIO.StringIO(data)
_24bit_int = struct.Struct("B B B")
# These private methods read the payload only of the following types _unsigned_byte = struct.Struct("B")
def _read_tag_end(self): _unsigned_int = struct.Struct(">I")
# Nothing to read _chunk_header = struct.Struct(">I B")
return 0
class NBTFileReader(object):
def _read_tag_byte(self): def __init__(self, fileobj, is_gzip=True):
byte = self._file.read(1) if is_gzip:
return struct.unpack("b", byte)[0] self._file = gzip.GzipFile(fileobj=fileobj, mode='rb')
else:
def _read_tag_short(self): # pure zlib stream -- maybe later replace this with
bytes = self._file.read(2) # a custom zlib file object?
return struct.unpack(">h", bytes)[0] data = zlib.decompress(fileobj.read())
self._file = StringIO.StringIO(data)
def _read_tag_int(self):
bytes = self._file.read(4) # These private methods read the payload only of the following types
return struct.unpack(">i", bytes)[0] def _read_tag_end(self):
# Nothing to read
def _read_tag_long(self): return 0
bytes = self._file.read(8)
return struct.unpack(">q", bytes)[0] def _read_tag_byte(self):
byte = self._file.read(1)
def _read_tag_float(self): return _byte.unpack(byte)[0]
bytes = self._file.read(4)
return struct.unpack(">f", bytes)[0] def _read_tag_short(self):
bytes = self._file.read(2)
def _read_tag_double(self): global _short
bytes = self._file.read(8) return _short.unpack(bytes)[0]
return struct.unpack(">d", bytes)[0]
def _read_tag_int(self):
def _read_tag_byte_array(self): bytes = self._file.read(4)
length = self._read_tag_int() global _int
bytes = self._file.read(length) return _int.unpack(bytes)[0]
return bytes
def _read_tag_long(self):
def _read_tag_string(self): bytes = self._file.read(8)
length = self._read_tag_short() global _long
return _long.unpack(bytes)[0]
# Read the string
string = self._file.read(length) def _read_tag_float(self):
bytes = self._file.read(4)
# decode it and return global _float
return string.decode("UTF-8") return _float.unpack(bytes)[0]
def _read_tag_list(self): def _read_tag_double(self):
tagid = self._read_tag_byte() bytes = self._file.read(8)
length = self._read_tag_int() global _double
return _double.unpack(bytes)[0]
read_tagmap = {
0: self._read_tag_end, def _read_tag_byte_array(self):
1: self._read_tag_byte, length = self._read_tag_int()
2: self._read_tag_short, bytes = self._file.read(length)
3: self._read_tag_int, return bytes
4: self._read_tag_long,
5: self._read_tag_float, def _read_tag_string(self):
6: self._read_tag_double, length = self._read_tag_short()
7: self._read_tag_byte_array,
8: self._read_tag_string, # Read the string
9: self._read_tag_list, string = self._file.read(length)
10:self._read_tag_compound,
} # decode it and return
return string.decode("UTF-8")
read_method = read_tagmap[tagid]
l = [] def _read_tag_list(self):
for _ in xrange(length): tagid = self._read_tag_byte()
l.append(read_method()) length = self._read_tag_int()
return l
read_tagmap = {
def _read_tag_compound(self): 0: self._read_tag_end,
# Build a dictionary of all the tag names mapping to their payloads 1: self._read_tag_byte,
tags = {} 2: self._read_tag_short,
while True: 3: self._read_tag_int,
# Read a tag 4: self._read_tag_long,
tagtype = ord(self._file.read(1)) 5: self._read_tag_float,
6: self._read_tag_double,
if tagtype == 0: 7: self._read_tag_byte_array,
break 8: self._read_tag_string,
9: self._read_tag_list,
name = self._read_tag_string() 10:self._read_tag_compound,
read_tagmap = { }
0: self._read_tag_end,
1: self._read_tag_byte, read_method = read_tagmap[tagid]
2: self._read_tag_short, l = []
3: self._read_tag_int, for _ in xrange(length):
4: self._read_tag_long, l.append(read_method())
5: self._read_tag_float, return l
6: self._read_tag_double,
7: self._read_tag_byte_array, def _read_tag_compound(self):
8: self._read_tag_string, # Build a dictionary of all the tag names mapping to their payloads
9: self._read_tag_list, tags = {}
10:self._read_tag_compound, while True:
} # Read a tag
payload = read_tagmap[tagtype]() tagtype = ord(self._file.read(1))
tags[name] = payload if tagtype == 0:
break
return tags
name = self._read_tag_string()
read_tagmap = {
0: self._read_tag_end,
def read_all(self): 1: self._read_tag_byte,
"""Reads the entire file and returns (name, payload) 2: self._read_tag_short,
name is the name of the root tag, and payload is a dictionary mapping 3: self._read_tag_int,
names to their payloads 4: self._read_tag_long,
5: self._read_tag_float,
""" 6: self._read_tag_double,
# Read tag type 7: self._read_tag_byte_array,
tagtype = ord(self._file.read(1)) 8: self._read_tag_string,
if tagtype != 10: 9: self._read_tag_list,
raise Exception("Expected a tag compound") 10:self._read_tag_compound,
}
# Read the tag name payload = read_tagmap[tagtype]()
name = self._read_tag_string()
tags[name] = payload
payload = self._read_tag_compound()
return tags
return name, payload
# For reference, the MCR format is outlined at def read_all(self):
# <http://www.minecraftwiki.net/wiki/Beta_Level_Format> """Reads the entire file and returns (name, payload)
class MCRFileReader(object): name is the name of the root tag, and payload is a dictionary mapping
"""A class for reading chunk region files, as introduced in the names to their payloads
Beta 1.3 update. It provides functions for opening individual
chunks (as instances of NBTFileReader), getting chunk timestamps, """
and for listing chunks contained in the file.""" # Read tag type
tagtype = ord(self._file.read(1))
def __init__(self, filename): if tagtype != 10:
self._file = None raise Exception("Expected a tag compound")
self._filename = filename
# cache used when the entire header tables are read in get_chunks() # Read the tag name
self._locations = None name = self._read_tag_string()
self._timestamps = None
self._chunks = None payload = self._read_tag_compound()
def _read_24bit_int(self): return name, payload
"""Read in a 24-bit, big-endian int, used in the chunk
location table."""
# For reference, the MCR format is outlined at
ret = 0 # <http://www.minecraftwiki.net/wiki/Beta_Level_Format>
bytes = self._file.read(3) class MCRFileReader(object):
for i in xrange(3): """A class for reading chunk region files, as introduced in the
ret = ret << 8 Beta 1.3 update. It provides functions for opening individual
ret += struct.unpack("B", bytes[i])[0] chunks (as instances of NBTFileReader), getting chunk timestamps,
and for listing chunks contained in the file."""
return ret
def __init__(self, filename):
def _read_chunk_location(self, x=None, y=None): self._file = None
"""Read and return the (offset, length) of the given chunk self._filename = filename
coordinate, or None if the requested chunk doesn't exist. x # cache used when the entire header tables are read in get_chunks()
and y must be between 0 and 31, or None. If they are None, self._locations = None
then there will be no file seek before doing the read.""" self._timestamps = None
self._chunks = None
if x is not None and y is not None:
if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32): def _read_24bit_int(self):
raise ValueError("Chunk location out of range.") """Read in a 24-bit, big-endian int, used in the chunk
location table."""
# check for a cached value
if self._locations: ret = 0
return self._locations[x + y * 32] bytes = self._file.read(3)
global _24bit_int
# go to the correct entry in the chunk location table bytes = _24bit_int.unpack(bytes)
self._file.seek(4 * (x + y * 32)) for i in xrange(3):
ret = ret << 8
# 3-byte offset in 4KiB sectors ret += bytes[i]
offset_sectors = self._read_24bit_int()
return ret
# 1-byte length in 4KiB sectors, rounded up
byte = self._file.read(1) def _read_chunk_location(self, x=None, y=None):
length_sectors = struct.unpack("B", byte)[0] """Read and return the (offset, length) of the given chunk
coordinate, or None if the requested chunk doesn't exist. x
# check for empty chunks and y must be between 0 and 31, or None. If they are None,
if offset_sectors == 0 or length_sectors == 0: then there will be no file seek before doing the read."""
return None
if x is not None and y is not None:
return (offset_sectors * 4096, length_sectors * 4096) if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
raise ValueError("Chunk location out of range.")
def _read_chunk_timestamp(self, x=None, y=None):
"""Read and return the last modification time of the given # check for a cached value
chunk coordinate. x and y must be between 0 and 31, or if self._locations:
None. If they are, None, then there will be no file seek return self._locations[x + y * 32]
before doing the read."""
# go to the correct entry in the chunk location table
if x is not None and y is not None: self._file.seek(4 * (x + y * 32))
if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
raise ValueError("Chunk location out of range.")
# 3-byte offset in 4KiB sectors
# check for a cached value offset_sectors = self._read_24bit_int()
if self._timestamps: global _unsigned_byte
return self._timestamps[x + y * 32] # 1-byte length in 4KiB sectors, rounded up
byte = self._file.read(1)
# go to the correct entry in the chunk timestamp table length_sectors = _unsigned_byte.unpack(byte)[0]
self._file.seek(4 * (x + y * 32) + 4096)
# check for empty chunks
bytes = self._file.read(4) if offset_sectors == 0 or length_sectors == 0:
timestamp = struct.unpack(">I", bytes)[0] return None
return timestamp return (offset_sectors * 4096, length_sectors * 4096)
def get_chunk_info(self,closeFile = True): def _read_chunk_timestamp(self, x=None, y=None):
"""Return a list of all chunks contained in this region file, """Read and return the last modification time of the given
as a list of (x, y) coordinate tuples. To load these chunks, chunk coordinate. x and y must be between 0 and 31, or
provide these coordinates to load_chunk().""" None. If they are, None, then there will be no file seek
before doing the read."""
if self._chunks:
return self._chunks if x is not None and y is not None:
if (not x >= 0) or (not x < 32) or (not y >= 0) or (not y < 32):
if self._file is None: raise ValueError("Chunk location out of range.")
self._file = open(self._filename,'rb');
# check for a cached value
self._chunks = [] if self._timestamps:
self._locations = [] return self._timestamps[x + y * 32]
self._timestamps = []
# go to the correct entry in the chunk timestamp table
# go to the beginning of the file self._file.seek(4 * (x + y * 32) + 4096)
self._file.seek(0)
bytes = self._file.read(4)
# read chunk location table
for y in xrange(32): global _unsigned_int
for x in xrange(32): timestamp = _unsigned_int.unpack(bytes)[0]
location = self._read_chunk_location()
self._locations.append(location) return timestamp
if location:
self._chunks.append((x, y)) def get_chunks(self):
"""Return a list of all chunks contained in this region file,
# read chunk timestamp table as a list of (x, y) coordinate tuples. To load these chunks,
for y in xrange(32): provide these coordinates to load_chunk()."""
for x in xrange(32):
timestamp = self._read_chunk_timestamp() if self._chunks:
self._timestamps.append(timestamp) return self._chunks
if self._locations is None:
if closeFile: self.get_chunk_info()
#free the file object since it isn't safe to be reused in child processes (seek point goes wonky!) self._chunks = filter(None,self._locations)
self._file.close()
self._file = None return self._chunks
return self._chunks
def get_chunk_info(self,closeFile = True):
def get_chunk_timestamp(self, x, y): """Preloads region header information."""
"""Return the given chunk's modification time. If the given
chunk doesn't exist, this number may be nonsense. Like if self._locations:
load_chunk(), this will wrap x and y into the range [0, 31]. return
"""
x = x % 32 if self._file is None:
y = y % 32 self._file = open(self._filename,'rb');
if self._timestamps is None:
self.get_chunk_info() self._chunks = None
return self._timestamps[x + y * 32] self._locations = []
self._timestamps = []
def chunkExists(self, x, y):
"""Determines if a chunk exists without triggering loading of the backend data""" # go to the beginning of the file
x = x % 32 self._file.seek(0)
y = y % 32
if self._locations is None: # read chunk location table
self.get_chunk_info() locations_append = self._locations.append
location = self._locations[x + y * 32] for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:
return location is not None locations_append(self._read_chunk_location())
def load_chunk(self, x, y): # read chunk timestamp table
"""Return a NBTFileReader instance for the given chunk, or timestamp_append = self._timestamps.append
None if the given chunk doesn't exist in this region file. If for x, y in [(x,y) for x in xrange(32) for y in xrange(32)]:
you provide an x or y not between 0 and 31, it will be timestamp_append(self._read_chunk_timestamp())
modulo'd into this range (x % 32, etc.) This is so you can
provide chunk coordinates in global coordinates, and still if closeFile:
have the chunks load out of regions properly.""" #free the file object since it isn't safe to be reused in child processes (seek point goes wonky!)
x = x % 32 self._file.close()
y = y % 32 self._file = None
if self._locations is None: return
self.get_chunk_info()
def get_chunk_timestamp(self, x, y):
location = self._locations[x + y * 32] """Return the given chunk's modification time. If the given
if location is None: chunk doesn't exist, this number may be nonsense. Like
return None load_chunk(), this will wrap x and y into the range [0, 31].
"""
if self._file is None: x = x % 32
self._file = open(self._filename,'rb'); y = y % 32
# seek to the data if self._timestamps is None:
self._file.seek(location[0]) self.get_chunk_info()
return self._timestamps[x + y * 32]
# read in the chunk data header
bytes = self._file.read(4) def chunkExists(self, x, y):
data_length = struct.unpack(">I", bytes)[0] """Determines if a chunk exists without triggering loading of the backend data"""
bytes = self._file.read(1) x = x % 32
compression = struct.unpack("B", bytes)[0] y = y % 32
if self._locations is None:
# figure out the compression self.get_chunk_info()
is_gzip = True location = self._locations[x + y * 32]
if compression == 1: return location is not None
# gzip -- not used by the official client, but trivial to support here so...
is_gzip = True def load_chunk(self, x, y):
elif compression == 2: """Return a NBTFileReader instance for the given chunk, or
# deflate -- pure zlib stream None if the given chunk doesn't exist in this region file. If
is_gzip = False you provide an x or y not between 0 and 31, it will be
else: modulo'd into this range (x % 32, etc.) This is so you can
# unsupported! provide chunk coordinates in global coordinates, and still
raise Exception("Unsupported chunk compression type: %i" % (compression)) have the chunks load out of regions properly."""
# turn the rest of the data into a StringIO object x = x % 32
# (using data_length - 1, as we already read 1 byte for compression) y = y % 32
data = self._file.read(data_length - 1) if self._locations is None:
data = StringIO.StringIO(data) self.get_chunk_info()
return NBTFileReader(data, is_gzip=is_gzip) location = self._locations[x + y * 32]
if location is None:
return None
if self._file is None:
self._file = open(self._filename,'rb');
# seek to the data
self._file.seek(location[0])
# read in the chunk data header
bytes = self._file.read(5)
data_length,compression = _chunk_header.unpack(bytes)
# figure out the compression
is_gzip = True
if compression == 1:
# gzip -- not used by the official client, but trivial to support here so...
is_gzip = True
elif compression == 2:
# deflate -- pure zlib stream
is_gzip = False
else:
# unsupported!
raise Exception("Unsupported chunk compression type: %i" % (compression))
# turn the rest of the data into a StringIO object
# (using data_length - 1, as we already read 1 byte for compression)
data = self._file.read(data_length - 1)
data = StringIO.StringIO(data)
return NBTFileReader(data, is_gzip=is_gzip)