0
This repository has been archived on 2025-04-25. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Minecraft-Overviewer/world.py
Andrew Brown a3a4877e6a no longer validates images, runs MUCH faster to scan existing chunks.
If a chunk image can't be loaded, it re-generates it on the fly.
2010-09-12 01:04:31 -04:00

686 lines
27 KiB
Python

import functools
import string
import os
import os.path
import time
import multiprocessing
import hashlib
from PIL import Image
import chunk
base36decode = functools.partial(int, base=36)
def base36encode(number):
"""String repr of a number in base 32"""
if number==0: return '0'
alphabet = string.digits + string.lowercase
if number < 0:
number = -number
neg = True
else:
neg = False
base36 = ''
while number != 0:
number, i = divmod(number, 36)
base36 = alphabet[i] + base36
if neg:
return "-"+base36
else:
return base36
def load_sort_and_process(worlddir):
"""Takes a directory to a world dir, and returns a mapping from (col, row)
to result object"""
all_chunks = find_chunkfiles(worlddir)
mincol, maxcol, minrow, maxrow, translated_chunks = convert_coords(all_chunks)
results = render_chunks_async(translated_chunks, caves=False, processes=5)
return results
def find_chunkfiles(worlddir):
"""Returns a list of all the chunk file locations, and the file they
correspond to.
Returns a list of (chunkx, chunky, filename) where chunkx and chunky are
given in chunk coordinates. Use convert_coords() to turn the resulting list
into an oblique coordinate system"""
all_chunks = []
for dirpath, dirnames, filenames in os.walk(worlddir):
if not dirnames and filenames:
for f in filenames:
if f.startswith("c.") and f.endswith(".dat"):
p = f.split(".")
all_chunks.append((base36decode(p[1]), base36decode(p[2]),
os.path.join(dirpath, f)))
return all_chunks
def render_chunks_async(chunks, caves, processes):
"""Starts up a process pool and renders all the chunks asynchronously.
caves is boolean passed to chunk.render_and_save()
chunks is a list of (chunkx, chunky, chunkfile)
Returns a dictionary mapping (chunkx, chunky) to a
multiprocessing.pool.AsyncResult object
"""
if processes == 1:
# Skip the multiprocessing stuff
print "Rendering chunks synchronously since you requested 1 process"
class MyResult(object):
pass
resultsmap = {}
for i, (chunkx, chunky, chunkfile) in enumerate(chunks):
result = chunk.render_and_save(chunkfile, cave=caves)
print "{0}/{1} chunks rendered".format(i, len(chunks))
resultobj = MyResult()
resultobj.get = lambda: result
resultsmap[(chunkx, chunky)] = resultobj
if i == 6:
import sys
sys.exit(0)
return resultsmap
pool = multiprocessing.Pool(processes=processes)
resultsmap = {}
for chunkx, chunky, chunkfile in chunks:
result = pool.apply_async(chunk.render_and_save, args=(chunkfile,),
kwds=dict(cave=caves))
resultsmap[(chunkx, chunky)] = result
pool.close()
# Stick the pool object in the dict under the key "pool" so it isn't
# garbage collected (which kills the subprocesses)
resultsmap['pool'] = pool
return resultsmap
def render_world(worlddir, cavemode=False, procs=2):
print "Scanning chunks..."
all_chunks = find_chunkfiles(worlddir)
total = len(all_chunks)
print "Done! {0} chunks found".format(total)
if not total:
return
# Create an image big enough for all chunks
# Each chunk is 384 pixels across. Each chunk is vertically 1728 pixels,
# but are spaced only 16*12=192 pixels apart. (Staggered, it's half that)
# Imagine a diagonal coordinate system to address the chunks where
# increasing x goes up-right and increasing z goes down-right. This needs
# to be embedded in a square. How big is this square?
# Each column of chunks has a constant x+z sum of their coordinates, since
# going from a chunk to the one below it involves adding 1 to z and
# subtracting 1 from x. Therefore, the leftmost column is the one that
# minimizes x+z. The rightmost column maximizes x+z
# This means the total width of the image is max sum - the min sum, times
# the horizontal spacing between each neighboring chunk. Since the rows are
# staggered, each row takes up half its actual width: 384/2
# Similarly, each row of chunks has a constant difference between their x
# and z coordinate, since going from from a chunk to the one to its right
# involves an addition of 1 to both x and z.
# So the total height of the image must be the max diff - the min diff,
# times the vertical chunk spacing which is half of 16*12. Additionally,
# 1536-8*12 must be added to the height for the rest of the bottom layer of
# chunks.
# Furthermore, the chunks with the minimum z-x are placed on the image at
# y=0 (in image coordinates, not chunk coordinates). The chunks with the
# minimum x+z are placed on the image at x=0.
# I think I may have forgotten to account for the block heights, the image
# may be short by 12 pixels or so. Not a huge deal.
minsum, maxsum, mindiff, maxdiff, _ = convert_coords(all_chunks)
width = (maxsum - minsum) * 384//2
height = (maxdiff-mindiff) * 8*12 + (12*128-8*12)
print "Final image will be {0}x{1}. (That's {2} bytes!)".format(
width, height, width*height*4)
print "Don't worry though, that's just the memory requirements"
print "The final png will be much smaller"
# Sort the chunks by their row, so when we loop through them it goes top to
# bottom
print "Sorting chunks..."
all_chunks.sort(key=lambda x: x[1]-x[0])
print "Starting up {0} chunk processors...".format(procs)
resultsmap = render_chunks_async(all_chunks, cavemode, procs)
# Oh god create a giant ass image
print "Allocating memory for the giant image"
worldimg = Image.new("RGBA", (width, height))
print "Processing chunks!"
processed = 0
starttime = time.time()
for chunkx, chunky, chunkfile in all_chunks:
# Read in and render the chunk at world coordinates chunkx,chunky
# Where should this chunk go on the image?
column = chunkx + chunky - minsum
row = chunky - chunkx - mindiff
# col0 is at x=0. row0 is at y=0.
# Each col adds 384/2. Each row adds 16*12/2
imgx = 192 * column
imgy = 96 * row
print "Drawing chunk {0},{1} at pos {2},{3}".format(
chunkx, chunky,
imgx, imgy)
print "It's in column {0} row {1}".format(column, row)
# Read it and render
result = resultsmap[(chunkx, chunky)]
chunkimagefile = result.get()
chunkimg = Image.open(chunkimagefile)
# Draw the image sans alpha layer, using the alpha layer as a mask. (We
# don't want the alpha layer actually drawn on the image, this pastes
# it as if it was a layer)
worldimg.paste(chunkimg.convert("RGB"), (imgx, imgy), chunkimg)
processed += 1
print "{0}/{1} chunks rendered. Avg {2}s per chunk".format(processed, total,
(time.time()-starttime)/processed)
print "All done!"
print "Took {0} minutes".format((time.time()-starttime)/60)
return worldimg
def convert_coords(chunks):
"""Takes the list of (chunkx, chunky, chunkfile) where chunkx and chunky
are in the chunk coordinate system, and figures out the row and column in
the image each one should be.
returns mincol, maxcol, minrow, maxrow, chunks_translated
chunks_translated is a list of (col, row, filename)
"""
chunks_translated = []
# columns are determined by the sum of the chunk coords, rows are the
# difference
item = chunks[0]
mincol = maxcol = item[0] + item[1]
minrow = maxrow = item[1] - item[0]
for c in chunks:
col = c[0] + c[1]
mincol = min(mincol, col)
maxcol = max(maxcol, col)
row = c[1] - c[0]
minrow = min(minrow, row)
maxrow = max(maxrow, row)
chunks_translated.append((col, row, c[2]))
return mincol, maxcol, minrow, maxrow, chunks_translated
def render_worldtile(chunkmap, colstart, colend, rowstart, rowend, oldhash):
"""Renders just the specified chunks into a tile. Unlike usual python
conventions, rowend and colend are inclusive. Additionally, the chunks
around the edges are half-way cut off (so that neighboring tiles will
render the other half)
chunkmap is a dictionary mapping (col, row) to an object whose .get()
method returns a chunk filename path (a multiprocessing.pool.AsyncResult
object) as returned from render_chunks_async()
Return value is (image object, hash) where hash is some string that depends
on the image contents.
If no tiles were found, (None, hash) is returned.
oldhash is a hash value of an existing tile. The hash of this tile is
computed before it is rendered, and if they match, rendering is skipped and
(True, oldhash) is returned.
"""
# width of one chunk is 384. Each column is half a chunk wide. The total
# width is (384 + 192*(numcols-1)) since the first column contributes full
# width, and each additional one contributes half since they're staggered.
# However, since we want to cut off half a chunk at each end (384 less
# pixels) and since (colend - colstart + 1) is the number of columns
# inclusive, the equation simplifies to:
width = 192 * (colend - colstart)
# Same deal with height
height = 96 * (rowend - rowstart)
# The standard tile size is 3 columns by 5 rows, which works out to 384x384
# pixels for 8 total chunks. (Since the chunks are staggered but the grid
# is not, some grid coordinates do not address chunks) The two chunks on
# the middle column are shown in full, the two chunks in the middle row are
# half cut off, and the four remaining chunks are one quarter shown.
# The above example with cols 0-3 and rows 0-4 has the chunks arranged like this:
# 0,0 2,0
# 1,1
# 0,2 2,2
# 1,3
# 0,4 2,4
# Due to how the tiles fit together, we may need to render chunks way above
# this (since very few chunks actually touch the top of the sky, some tiles
# way above this one are possibly visible in this tile). Render them
# anyways just in case). That's the reason for the "rowstart-16" below.
# Before we render any tiles, check the hash of each image in this tile to
# see if it's changed.
tilelist = []
imghash = hashlib.md5()
for row in xrange(rowstart-16, rowend+1):
for col in xrange(colstart, colend+1):
chunkresult = chunkmap.get((col, row), None)
if not chunkresult:
continue
chunkfile = chunkresult.get()
tilelist.append((col, row, chunkfile))
# Get the hash of this image and add it to our hash for this tile
imghash.update(
os.path.basename(chunkfile).split(".")[4]
)
digest = imghash.digest()
if not tilelist:
# No chunks were found in this tile
return None, digest
if digest == oldhash:
# All the chunks for this tile have not changed according to the hash
return True, digest
tileimg = Image.new("RGBA", (width, height))
# col colstart will get drawn on the image starting at x coordinates -(384/2)
# row rowstart will get drawn on the image starting at y coordinates -(192/2)
for col, row, chunkfile in tilelist:
try:
chunkimg = Image.open(chunkfile)
except IOError, e:
print "Error opening file", chunkfile
print "Attempting to re-generate it"
os.unlink(chunkfile)
# Do some string manipulation to determine what the chunk file is
# that goes with this image. Then call chunk.render_and_save
dirname, imagename = os.path.split(chunkfile)
parts = imagename.split(".")
datafile = "c.{0}.{1}.dat".format(parts[1],parts[2])
print "Chunk came from data file", datafile
# XXX Don't forget to set cave mode here when it gets implemented!
chunk.render_and_save(os.path.join(dirname, datafile), False)
chunkimg = Image.open(chunkfile)
print "Success"
xpos = -192 + (col-colstart)*192
ypos = -96 + (row-rowstart)*96
#print "Pasting chunk {0},{1} at {2},{3}".format(
# col, row, xpos, ypos)
tileimg.paste(chunkimg.convert("RGB"), (xpos, ypos), chunkimg)
return tileimg, digest
def get_quadtree_depth(colstart, colend, rowstart, rowend):
"""Determines the zoom depth of a requested quadtree.
Return value is an integer >= 0. Higher integers mean higher resolution
maps. This is one less than the maximum zoom (level 0 is a single tile,
level 1 is 2 tiles wide by 2 tiles high, etc.)
"""
# This determines how many zoom levels we need to encompass the entire map.
# We need to make sure that each recursive call splits both dimensions
# evenly into a power of 2 tiles wide and high, so this function determines
# how many splits to make, and generate_quadtree() uses this to adjust the
# row and column limits so that everything splits just right.
#
# This comment makes more sense if you consider it inlined in its call from
# generate_quadtree()
# Since a single tile has 3 columns of chunks and 5 rows of chunks, this
# split needs to be sized into the void so that it is some number of rows
# in the form 2*2^p. And columns must be in the form 4*2^p
# They need to be the same power
# In other words, I need to find the smallest power p such that
# colmid + 2*2^p >= colend and rowmid + 4*2^p >= rowend
# I hope that makes some sense. I don't know how to explain this very well,
# it was some trial and error.
colmid = (colstart + colend) // 2
rowmid = (rowstart + rowend) // 2
for p in xrange(15): # That should be a high enough upper limit
if colmid + 2*2**p >= colend and rowmid + 4*2**p >= rowend:
break
else:
raise Exception("Your map is waaaay to big")
return p
def generate_quadtree(chunkmap, colstart, colend, rowstart, rowend, prefix, procs):
"""Base call for quadtree_recurse. This sets up the recursion and generates
a quadtree given a chunkmap and the ranges.
"""
p = get_quadtree_depth(colstart, colend, rowstart, rowend);
colmid = (colstart + colend) // 2
rowmid = (rowstart + rowend) // 2
# Modify the lower and upper bounds to be sized correctly. See comments in
# get_quadtree_depth()
colstart = colmid - 2*2**p
colend = colmid + 2*2**p
rowstart = rowmid - 4*2**p
rowend = rowmid + 4*2**p
#print " power is", p
#print " new bounds: {0},{1} {2},{3}".format(colstart, colend, rowstart, rowend)
# procs is -1 here since the main process always runs as well, only spawn
# procs-1 /new/ processes
sem = multiprocessing.BoundedSemaphore(procs-1)
quadtree_recurse(chunkmap, colstart, colend, rowstart, rowend, prefix, "base", sem)
def quadtree_recurse(chunkmap, colstart, colend, rowstart, rowend, prefix, quadrant, sem):
"""Recursive method that generates a quadtree.
A single call generates, saves, and returns an image with the range
specified by colstart,colend,rowstart, and rowend.
The image is saved as os.path.join(prefix, quadrant+".png")
If the requested range is larger than a certain threshold, this method will
instead make 4 calls to itself to render the 4 quadrants of the image. The
four pieces are then resized and pasted into one image that is saved and
returned.
If the requested range is not too large, it is generated with
render_worldtile()
The path "prefix" should be a directory where this call should save its
image.
quadrant is used in recursion. If it is "base", the image is saved in the
directory named by prefix, and recursive calls will have quadrant set to
"0" "1" "2" or "3" and prefix will remain unchanged.
If quadrant is anything else, the tile will be saved just the same, but for
recursive calls a directory named quadrant will be created (if it doesn't
exist) and prefix will be set to os.path.join(prefix, quadrant)
So the first call will have prefix "tiles" (e.g.) and quadrant "base" and
will save its image as "tiles/base.png"
The second call will have prefix "tiles" and quadrant "0" and will save its
image as "tiles/0.png". It will create the directory "tiles/0/"
The third call will have prefix "tiles/0", quadrant "0" and will save its image as
"tile/0/0.png"
Each tile outputted is always 384 by 384 pixels.
The last parameter, sem, should be a multiprocessing.Semaphore or
BoundedSemaphore object. Before each recursive call, the semaphore is
acquired without blocking. If the acquire is successful, the recursive call
will spawn a new process. If it is not successful, the recursive call is
run in the same thread. The semaphore is passed to each recursive call, so
any call could spawn new processes if another one exits at some point.
The return from this function is (path, hash) where path is the path to the
file saved, and hash is a byte string that depends on the tile's contents.
If the tile is blank, path will be None, but hash will still be valid.
"""
#if 1 and prefix == "/tmp/testrender/2/1/0/1" and quadrant == "1":
# print "Called with {0},{1} {2},{3}".format(colstart, colend, rowstart, rowend)
# print " prefix:", prefix
# print " quadrant:", quadrant
# dbg = True
#else:
# dbg = False
cols = colend - colstart
rows = rowend - rowstart
# Get the tile's existing hash. Maybe it hasn't changed. Whether this
# function invocation is destined to recurse, or whether we end up calling
# render_worldtile(), the hash will help us short circuit a lot of pixel
# copying.
hashpath = os.path.join(prefix, quadrant+".hash")
if os.path.exists(hashpath):
oldhash = open(hashpath, "rb").read()
else:
# This method (should) never actually return None for a hash, this is
# used so it will always compare unequal.
oldhash = None
if cols == 2 and rows == 4:
# base case: just render the image
img, newhash = render_worldtile(chunkmap, colstart, colend, rowstart, rowend, oldhash)
# There are a few cases to handle here:
# 1) img is None: the image doesn't exist (would have been blank, no
# chunks exist for that range.
# 2) img is True: the image hasn't changed according to the hashes. The
# image object is not returned by render_worldtile, but we do need to
# return the path to it.
# 3) img is a PIL.Image.Image object, a new tile was computed, we need
# to save it and its hash (newhash) to disk.
if not img:
# The image returned is blank, there should not be an image here.
# If one does exist, from a previous world or something, it is not
# deleted, but None is returned to indicate to our caller this tile
# is blank.
remove_tile(prefix, quadrant)
return None, newhash
if img is True:
# No image was returned because the hashes matched. Return the path
# to the image that already exists and is up to date according to
# the hash
path = os.path.join(prefix, quadrant+".png")
if not os.path.exists(path):
# Oops, the image doesn't actually exist. User must have
# deleted it, or must be some bug?
raise Exception("Error, this image should have existed according to the hashes, but didn't")
return path, newhash
# If img was not None or True, it is an image object. The image exists
# and the hashes did not match, so it must have changed. Fall through
# to the last part of this function which saves the image and its hash.
assert isinstance(img, Image.Image)
elif cols < 2 or rows < 4:
raise Exception("Something went wrong, this tile is too small. (Please send "
"me the traceback so I can fix this)")
else:
# Recursively generate each quadrant for this tile
# Find the midpoint
colmid = (colstart + colend) // 2
rowmid = (rowstart + rowend) // 2
# Assert that the split in the center still leaves everything sized
# exactly right by checking divisibility by the final row and
# column sizes. This isn't sufficient, but is necessary for
# success. (A better check would make sure the dimensions fit the
# above equations for the same power of 2)
assert (colmid - colstart) % 2 == 0
assert (colend - colmid) % 2 == 0
assert (rowmid - rowstart) % 4 == 0
assert (rowend - rowmid) % 4 == 0
if quadrant == "base":
newprefix = prefix
else:
# Make the directory for the recursive subcalls
newprefix = os.path.join(prefix, quadrant)
if not os.path.exists(newprefix):
os.mkdir(newprefix)
# Keep a hash of the concatenation of each returned hash. If it matches
# oldhash from above, skip rendering this tile
hasher = hashlib.md5()
# Recurse to generate each quadrant of images
if sem.acquire(False):
Procobj = ReturnableProcess
else:
Procobj = FakeProcess
quad0result = Procobj(sem, target=quadtree_recurse,
args=(chunkmap, colstart, colmid, rowstart, rowmid, newprefix, "0", sem)
)
if sem.acquire(False):
Procobj = ReturnableProcess
else:
Procobj = FakeProcess
quad1result = Procobj(sem, target=quadtree_recurse,
args=(chunkmap, colmid, colend, rowstart, rowmid, newprefix, "1", sem)
)
if sem.acquire(False):
Procobj = ReturnableProcess
else:
Procobj = FakeProcess
quad2result = Procobj(sem, target=quadtree_recurse,
args=(chunkmap, colstart, colmid, rowmid, rowend, newprefix, "2", sem)
)
# Start the processes. If one is a fakeprocess, it will do the
# processing right here instead.
quad0result.start()
quad1result.start()
quad2result.start()
# 3rd quadrent always runs in this process, no need to spawn a new one
# since we're just going to turn around and wait for it.
quad3file, hash3 = quadtree_recurse(chunkmap,
colmid, colend, rowmid, rowend,
newprefix, "3", sem)
quad0file, hash0 = quad0result.get()
quad1file, hash1 = quad1result.get()
quad2file, hash2 = quad2result.get()
#if dbg:
# print quad0file
# print repr(hash0)
# print quad1file
# print repr(hash1)
# print quad2file
# print repr(hash2)
# print quad3file
# print repr(hash3)
# Check the hashes. This is checked even if the tile files returned
# None, since that could happen if either the tile was blank or it
# hasn't changed. So the hashes returned should tell us whether we need
# to update this tile or not.
hasher.update(hash0)
hasher.update(hash1)
hasher.update(hash2)
hasher.update(hash3)
newhash = hasher.digest()
if newhash == oldhash:
# Nothing left to do, this tile already exists and hasn't changed.
#if dbg: print "hashes match, nothing to do"
return os.path.join(prefix, quadrant+".png"), oldhash
# Check here if this tile is actually blank. If all 4 returned quadrant
# filenames are None, this tile should not be rendered. However, we
# still need to return a valid hash for it, so that's why this check is
# below the hash check.
if not (bool(quad0file) or bool(quad1file) or bool(quad2file) or
bool(quad3file)):
remove_tile(prefix, quadrant)
return None, newhash
img = Image.new("RGBA", (384, 384))
if quad0file:
quad0 = Image.open(quad0file).resize((192,192), Image.ANTIALIAS)
img.paste(quad0, (0,0))
if quad1file:
quad1 = Image.open(quad1file).resize((192,192), Image.ANTIALIAS)
img.paste(quad1, (192,0))
if quad2file:
quad2 = Image.open(quad2file).resize((192,192), Image.ANTIALIAS)
img.paste(quad2, (0, 192))
if quad3file:
quad3 = Image.open(quad3file).resize((192,192), Image.ANTIALIAS)
img.paste(quad3, (192, 192))
# At this point, if the tile hasn't change or is blank, the function should
# have returned by now.
assert bool(img)
# Save the image
path = os.path.join(prefix, quadrant+".png")
img.save(path)
print "Saving image", path
# Save the hash
with open(os.path.join(prefix, quadrant+".hash"), 'wb') as hashout:
hashout.write(newhash)
# Return the location and hash of this tile
return path, newhash
def remove_tile(prefix, quadrent):
"""Called when a tile doesn't exist, this deletes an existing tile if it
does
"""
path = os.path.join(prefix, quadrent)
img = path + ".png"
hash = path + ".hash"
if os.path.exists(img):
print "removing", img
os.unlink(img)
if os.path.exists(hash):
os.unlink(hash)
class ReturnableProcess(multiprocessing.Process):
"""Like the standard multiprocessing.Process class, but the return value of
the target method is available by calling get().
The given semaphore is released when the target finishes running"""
def __init__(self, semaphore, *args, **kwargs):
self.__sem = semaphore
multiprocessing.Process.__init__(self, *args, **kwargs)
def run(self):
try:
results = self._target(*self._args, **self._kwargs)
except BaseException, e:
self._respipe_in.send(e)
else:
self._respipe_in.send(results)
finally:
self.__sem.release()
def get(self):
self.join()
ret = self._respipe_out.recv()
if isinstance(ret, BaseException):
raise ret
return ret
def start(self):
self._respipe_out, self._respipe_in = multiprocessing.Pipe()
multiprocessing.Process.start(self)
class FakeProcess(object):
"""Identical interface to the above class, but runs in the same thread.
Used to make the code simpler in quadtree_recurse
"""
def __init__(self, semaphore, target, args=None, kwargs=None):
self._target = target
self._args = args if args else ()
self._kwargs = kwargs if kwargs else {}
def start(self):
self.ret = self._target(*self._args, **self._kwargs)
def get(self):
return self.ret