From 866b4991423c510215cb4bcd5f2b592989373aa8 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Sun, 13 Nov 2011 09:22:19 -0500 Subject: [PATCH] Lots of rearranging and comments touchups fixes progress updates for first level tiles fixed long-standing typo inntertile -> innertile --- overviewer_core/quadtree.py | 59 ++++++---- overviewer_core/rendernode.py | 216 +++++++++++++++++++++++++++------- 2 files changed, 208 insertions(+), 67 deletions(-) diff --git a/overviewer_core/quadtree.py b/overviewer_core/quadtree.py index ea3be60..83c0629 100644 --- a/overviewer_core/quadtree.py +++ b/overviewer_core/quadtree.py @@ -118,6 +118,9 @@ class QuadtreeGen(object): if not os.path.exists(self.full_tiledir): logging.debug("%s doesn't exist, doing a full render", self.full_tiledir) self.forcerender = True + + def __repr__(self): + return "" % self.rendermode def _get_cur_depth(self): """How deep is the quadtree currently in the destdir? This glances in @@ -268,21 +271,9 @@ class QuadtreeGen(object): return chunklist - def get_worldtiles(self): - """Returns an iterator over the tiles of the most detailed layer that - need to be rendered - - """ - # This quadtree object gets replaced by the caller in rendernode.py, - # but we still have to let them know which quadtree this tile belongs - # to. Hence returning both self and the tile. - - dirty_tree = self.scan_chunks() - dirty_tiles = (Tile.from_path(tpath) for tpath in dirty_tree.iterate_dirty()) - return ([self, tile] for tile in dirty_tiles) - def get_innertiles(self,zoom): - """Same as get_worldtiles but for the inntertile routine. + """Returns the inner tiles at the given zoom level that need to be rendered + """ for path in iterate_base4(zoom): # This image is rendered at(relative to the worker's destdir): @@ -376,6 +367,9 @@ class QuadtreeGen(object): There is no return value """ + # The poi_q (point of interest queue) is a multiprocessing Queue + # object, and it gets stashed in the world object by the constructor to + # RenderNode so we can find it right here. poi_queue = self.world.poi_q imgpath = tile.get_filepath(self.full_tiledir, self.imgformat) @@ -398,13 +392,15 @@ class QuadtreeGen(object): if not chunks: # No chunks were found in this tile if not check_tile: - logging.warning("Tile %s was requested for render, but no chunks found! This may be a bug", tile) + logging.warning("%s was requested for render, but no chunks found! This may be a bug", tile) try: os.unlink(imgpath) except OSError, e: # ignore only if the error was "file not found" if e.errno != errno.ENOENT: raise + else: + logging.debug("%s deleted", tile) return # Create the directory if not exists @@ -502,7 +498,7 @@ class QuadtreeGen(object): dirty = DirtyTiles(depth) - logging.debug("Scanning chunks for tiles that need rendering...") + logging.debug(" Scanning chunks for tiles that need rendering...") chunkcount = 0 stime = time.time() @@ -516,7 +512,7 @@ class QuadtreeGen(object): for chunkx, chunky, chunkmtime in self.world.iterate_chunk_metadata(): chunkcount += 1 if chunkcount % 10000 == 0: - logging.debug("%s chunks scanned", chunkcount) + logging.info(" %s chunks scanned", chunkcount) chunkcol, chunkrow = self.world.convert_coords(chunkx, chunky) #logging.debug("Looking at chunk %s,%s", chunkcol, chunkrow) @@ -561,15 +557,18 @@ class QuadtreeGen(object): dirty.set_dirty(tile.path) #logging.debug(" Setting tile as dirty. Will render.") - logging.debug("Done. %s chunks scanned in %s seconds", chunkcount, int(time.time()-stime)) + t = int(time.time()-stime) + logging.debug(" Done. %s chunks scanned in %s second%s", chunkcount, t, + "s" if t != 1 else "") - logging.debug("Counting tiles that need rendering...") - tilecount = 0 - stime = time.time() - for _ in dirty.iterate_dirty(): - tilecount += 1 - logging.debug("Done. %s tiles need to be rendered. (count took %s seconds)", - tilecount, int(time.time()-stime)) + if logging.getLogger().isEnabledFor(logging.DEBUG): + logging.debug(" Counting tiles that need rendering...") + tilecount = 0 + stime = time.time() + for _ in dirty.iterate_dirty(): + tilecount += 1 + logging.debug(" Done. %s tiles need to be rendered. (count took %s seconds)", + tilecount, int(time.time()-stime)) return dirty @@ -734,6 +733,16 @@ class DirtyTiles(object): # long as an unset_dirty() method or similar does not exist. return any(self.children) + def count(self): + """Returns the total number of dirty leaf nodes. + + """ + # TODO: Make this more efficient (although for even the largest trees, + # this takes only seconds) + c = 0 + for _ in self.iterate_dirty(): + c += 1 + return c class Tile(object): """A simple container class that represents a single render-tile. diff --git a/overviewer_core/rendernode.py b/overviewer_core/rendernode.py index c0c24b0..b4af818 100644 --- a/overviewer_core/rendernode.py +++ b/overviewer_core/rendernode.py @@ -24,6 +24,7 @@ import time from . import textures from . import util +from . import quadtree import c_overviewer """ @@ -79,7 +80,16 @@ def pool_initializer(rendernode): class RenderNode(object): def __init__(self, quadtrees, options): - """Distributes the rendering of a list of quadtrees.""" + """Distributes the rendering of a list of quadtrees. + + This class tries not to make any assumptions on whether the given + quadtrees share the same world or whether the given quadtrees share the + same depth/structure. However, those assumptions have not been checked; + quadtrees right now always share the same depth, structure, and + associated world objects. Beware of mixing and matching quadtrees from + different worlds! + + """ if not len(quadtrees) > 0: raise ValueError("there must be at least one quadtree to work on") @@ -100,10 +110,16 @@ class RenderNode(object): if q.world not in self.worlds: self.worlds.append(q.world) - manager = multiprocessing.Manager() # queue for receiving interesting events from the renderer - # (like the discovery of signs! - #stash into the world object like we stash an index into the quadtree + # (like the discovery of signs!) + # stash into the world object like we stash an index into the quadtree + # + # TODO: Managers spawn a sub-process to manage their objects. If p=1, + # fall back to a non-managed queue (like Queue.Queue). (While the + # management process won't do much processing, part of the point of p=1 + # is to ease debugging and profiling by keeping everything in one + # process/thread) + manager = multiprocessing.Manager() for world in self.worlds: world.poi_q = manager.Queue() @@ -141,44 +157,83 @@ class RenderNode(object): else: pool.map_async(bool,xrange(multiprocessing.cpu_count()),1) - # 1 quadtree object per rendermode requested + # The list of quadtrees. There is 1 quadtree object per rendermode + # requested quadtrees = self.quadtrees - # Determine the total number of tiles by adding up the number of tiles - # from each quadtree. Also find the max zoom level (max_p). Even though - # each quadtree will always have the same zoom level, this bit of code - # does not make that assumption. + # Find the max zoom level (max_p). Even though each quadtree will + # always have the same zoom level with the current implementation, this + # bit of code does not make that assumption. + # max_p is stored in the instance so self.print_statusline can see it max_p = 0 - total = 0 for q in quadtrees: - total += 4**q.p if q.p > max_p: max_p = q.p self.max_p = max_p - # The next sections of code render the highest zoom level of tiles. The - # section after render the other levels. - results = collections.deque() - complete = 0 - logging.info("Rendering highest zoom level of tiles now.") - logging.info("Rendering {0} layer{1}".format(len(quadtrees),'s' if len(quadtrees) > 1 else '' )) - logging.info("There are {0} tiles to render".format(total)) - logging.info("There are {0} total levels to render".format(self.max_p)) - logging.info("Don't worry, each level has only 25% as many tiles as the last.") - logging.info("The others will go faster") - count = 0 + # Signal to the quadtrees to scan the chunks and their respective tile + # directories to find what needs to be rendered. We get from this the + # total tiles that need to be rendered (at the highest level across all + # quadtrees) as well as a list of [qtree, DirtyTiles object] + total_rendertiles, dirty_list = self._get_dirty_tiles() + + if total_rendertiles == 0: + logging.info(r"There is no work to do, your map is up to date! \o/") + return + + # Set a reasonable batch size. Groups of tiles are sent to workers in + # batches this large. It should be a multiple of the number of + # quadtrees so that each worker gets corresponding tiles from each + # quadtree in the typical case. batch_size = 4*len(quadtrees) while batch_size < 10: batch_size *= 2 + logging.debug("Will push tiles to worker processes in batches of %s", batch_size) + + # The next sections of code render the highest zoom level of tiles. The + # section after render the other levels. + logging.info("") + logging.info("Rendering highest zoom level of tiles now.") + logging.info("Rendering {0} rendermode{1}".format(len(quadtrees),'s' if len(quadtrees) > 1 else '' )) + logging.info("There are {0} tiles to render at this level".format(total_rendertiles)) + logging.info("There are {0} total levels".format(self.max_p)) + + # results is a queue of multiprocessing.AsyncResult objects. They are + # appended to the end and held in the queue until they are pop'd and + # the results collected. + # complete holds the tally of the number of tiles rendered. Each + # results object returns the number of tiles rendered and is + # accumulated in complete + results = collections.deque() + complete = 0 + + # Iterate over _apply_render_worldtiles(). That generator method + # dispatches batches of tiles to the workers and yields results + # objects. multiprocessing.AsyncResult objects are lazy objects that + # are used to access the values returned by the worker's function, + # which in this case, is render_worldtile_batch() timestamp = time.time() - for result in self._apply_render_worldtiles(pool,batch_size): + for result in self._apply_render_worldtiles(dirty_list, pool, batch_size): results.append(result) + + # The results objects are lazy. The workers will process an item in + # the pool when they get to it, and when we call result.get() it + # blocks until the result is ready. We dont' want to add *all* the + # tiles to the pool becuse we'd have to hold every result object in + # memory. So we add a few batches to the pool / result objects to + # the results queue, then drain the results queue, and repeat. + # every second drain some of the queue timestamp2 = time.time() if timestamp2 >= timestamp + 1: timestamp = timestamp2 count_to_remove = (1000//batch_size) + + # If there are less than count_to_remove items in the results + # queue, drain the point of interest queue and count_to_remove + # items from the results queue if count_to_remove < len(results): + # Drain the point of interest queue for each world for world in self.worlds: try: while (1): @@ -189,28 +244,41 @@ class RenderNode(object): #print "got an item from the queue!" world.POI.append(item[1]) elif item[0] == "removePOI": - world.persistentData['POI'] = filter(lambda x: x['chunk'] != item[1], world.persistentData['POI']) + world.persistentData['POI'] = filter( + lambda x: x['chunk'] != item[1], + world.persistentData['POI'] + ) elif item[0] == "rendered": self.rendered_tiles.append(item[1]) except Queue.Empty: pass + # Now drain the results queue. results has more than + # count_to_remove items in it (as checked above) while count_to_remove > 0: count_to_remove -= 1 complete += results.popleft().get() - self.print_statusline(complete, total, 1) + self.print_statusline(complete, total_rendertiles, 1) + + # If the results queue is getting too big, drain all but + # 500//batch_size items from it if len(results) > (10000//batch_size): # Empty the queue before adding any more, so that memory # required has an upper bound while len(results) > (500//batch_size): complete += results.popleft().get() - self.print_statusline(complete, total, 1) + self.print_statusline(complete, total_rendertiles, 1) - # Wait for the rest of the results + # Loop back to the top, add more items to the queue, and repeat + + # Added all there is to add to the workers. Wait for the rest of the + # results to come in before continuing while len(results) > 0: complete += results.popleft().get() - self.print_statusline(complete, total, 1) + self.print_statusline(complete, total_rendertiles, 1) + + # Now drain the point of interest queues for each world for world in self.worlds: try: while (1): @@ -228,21 +296,37 @@ class RenderNode(object): except Queue.Empty: pass - self.print_statusline(complete, total, 1, True) + # Print the final status line unconditionally + self.print_statusline(complete, total_rendertiles, 1, True) + ########################################## # The highest zoom level has been rendered. - # Now do the lower zoom levels + # Now do the lower zoom levels, working our way down to level 1 for zoom in xrange(self.max_p-1, 0, -1): + # "level" counts up for the status output level = self.max_p - zoom + 1 + assert len(results) == 0 + + # Reset these for this zoom level complete = 0 total = 0 + + # Count up the total tiles to render at this zoom level for q in quadtrees: if zoom <= q.p: total += 4**zoom + logging.info("Starting level {0}".format(level)) timestamp = time.time() - for result in self._apply_render_inntertile(pool, zoom,batch_size): + + # Same deal as above. _apply_render_innertile adds tiles in batch + # to the worker pool and yields result objects that return the + # number of tiles rendered. + # + # XXX Some quadtrees may not have tiles at this zoom level if we're + # not assuming they all have the same depth!! + for result in self._apply_render_innertile(pool, zoom,batch_size): results.append(result) # every second drain some of the queue timestamp2 = time.time() @@ -274,34 +358,75 @@ class RenderNode(object): for q in quadtrees: q.render_innertile(os.path.join(q.destdir, q.tiledir), "base") - def _apply_render_worldtiles(self, pool,batch_size): - """Adds tiles to the render queue and dispatch them to the worker pool. + def _get_dirty_tiles(self): + """Returns two items: + 1) The total number of tiles needing rendering + 2) a list of (qtree, DirtyTiles) objects holding which tiles in the + respective quadtrees need to be rendered + + """ + all_dirty = [] + total = 0 + + logging.info("Scanning for tiles to update. This shouldn't take too long...") + for i, q in enumerate(self.quadtrees): + logging.info("Scanning for tiles in rendermode %s", q.rendermode) + dirty = q.scan_chunks() + + total += dirty.count() + + all_dirty.append((q, dirty)) + + logging.info("Scan finished. %s total tiles need to be rendered at the highest level", total) + return total, all_dirty + + def _apply_render_worldtiles(self, tileset, pool,batch_size): + """This generator method dispatches batches of tiles to the given + worker pool with the function render_worldtile_batch(). It yields + multiprocessing.AsyncResult objects. Each result object returns the + number of tiles rendered. + + tileset is a list of (QuadtreeGen object, DirtyTiles object) Returns an iterator over result objects. Each time a new result is requested, a new batch of tasks are added to the pool and a result object is returned. """ + # Make sure batch_size is a sane value if batch_size < len(self.quadtrees): batch_size = len(self.quadtrees) + + # tileset is a list of (quadtreegen object, dirtytiles tree object) + # We want: a sequence of iterators that each iterate over + # [qtree obj, tile obj] items + def mktileiterable(qtree, dtiletree): + return ([qtree, quadtree.Tile.from_path(tilepath)] for tilepath in dtiletree.iterate_dirty()) + iterables = [] + for qtree, dtiletree in tileset: + tileiterable = mktileiterable(qtree, dtiletree) + iterables.append(tileiterable) + + # batch is a list of (qtree index, Tile object). This list is slowly + # added to and when it reaches size batch_size, it is sent off to the + # pool. batch = [] - jobcount = 0 + # roundrobin add tiles to a batch job (thus they should all roughly work on similar chunks) - iterables = [q.get_worldtiles() for q in self.quadtrees] for job in util.roundrobin(iterables): - # fixup so the worker knows which quadtree this is + # fixup so the worker knows which quadtree this is. It's a bit of a + # hack but it helps not to keep re-sending the qtree objects to the + # workers. job[0] = job[0]._render_index # Put this in the batch to be submited to the pool batch.append(job) - jobcount += 1 - if jobcount >= batch_size: - jobcount = 0 + if len(batch) >= batch_size: yield pool.apply_async(func=render_worldtile_batch, args= [batch]) batch = [] - if jobcount > 0: + if len(batch): yield pool.apply_async(func=render_worldtile_batch, args= [batch]) - def _apply_render_inntertile(self, pool, zoom,batch_size): - """Same as _apply_render_worltiles but for the inntertile routine. + def _apply_render_innertile(self, pool, zoom,batch_size): + """Same as _apply_render_worltiles but for the innertile routine. Returns an iterator that yields result objects from tasks that have been applied to the pool. """ @@ -328,6 +453,13 @@ class RenderNode(object): @catch_keyboardinterrupt def render_worldtile_batch(batch): + """Main entry point for workers processing a render-tile (also called a + world tile). Returns the number of tiles rendered, which is the length of + the batch list passed in + + batch should be a list of (qtree index, tile object) + + """ # batch is a list of items to process. Each item is [quadtree_id, Tile object] global child_rendernode rendernode = child_rendernode