From 866b4991423c510215cb4bcd5f2b592989373aa8 Mon Sep 17 00:00:00 2001
From: Andrew Brown <brownan@gmail.com>
Date: Sun, 13 Nov 2011 09:22:19 -0500
Subject: [PATCH] Lots of rearranging and comments touchups

fixes progress updates for first level tiles
fixed long-standing typo inntertile -> innertile
---
 overviewer_core/quadtree.py   |  59 ++++++----
 overviewer_core/rendernode.py | 216 +++++++++++++++++++++++++++-------
 2 files changed, 208 insertions(+), 67 deletions(-)
diff --git a/overviewer_core/quadtree.py b/overviewer_core/quadtree.py
index ea3be60..83c0629 100644
--- a/overviewer_core/quadtree.py
+++ b/overviewer_core/quadtree.py
@@ -118,6 +118,9 @@ class QuadtreeGen(object):
         if not os.path.exists(self.full_tiledir):
             logging.debug("%s doesn't exist, doing a full render", self.full_tiledir)
             self.forcerender = True
+
+    def __repr__(self):
+        return "<QuadTreeGen for rendermode %r>" % self.rendermode
         
     def _get_cur_depth(self):
         """How deep is the quadtree currently in the destdir? This glances in
@@ -268,21 +271,9 @@ class QuadtreeGen(object):
                     
         return chunklist   
         
-    def get_worldtiles(self):
-        """Returns an iterator over the tiles of the most detailed layer that
-        need to be rendered
-
-        """
-        # This quadtree object gets replaced by the caller in rendernode.py,
-        # but we still have to let them know which quadtree this tile belongs
-        # to. Hence returning both self and the tile.
-
-        dirty_tree = self.scan_chunks()
-        dirty_tiles = (Tile.from_path(tpath) for tpath in dirty_tree.iterate_dirty())
-        return ([self, tile] for tile in dirty_tiles)
-        
     def get_innertiles(self,zoom):
-        """Same as get_worldtiles but for the inntertile routine.
+        """Returns the inner tiles at the given zoom level that need to be rendered
+
         """    
         for path in iterate_base4(zoom):
             # This image is rendered at(relative to the worker's destdir):
@@ -376,6 +367,9 @@ class QuadtreeGen(object):
         There is no return value
         """    
 
+        # The poi_q (point of interest queue) is a multiprocessing Queue
+        # object, and it gets stashed in the world object by the constructor to
+        # RenderNode so we can find it right here.
         poi_queue = self.world.poi_q
 
         imgpath = tile.get_filepath(self.full_tiledir, self.imgformat)
@@ -398,13 +392,15 @@ class QuadtreeGen(object):
         if not chunks:
             # No chunks were found in this tile
             if not check_tile:
-                logging.warning("Tile %s was requested for render, but no chunks found! This may be a bug", tile)
+                logging.warning("%s was requested for render, but no chunks found! This may be a bug", tile)
             try:
                 os.unlink(imgpath)
             except OSError, e:
                 # ignore only if the error was "file not found"
                 if e.errno != errno.ENOENT:
                     raise
+            else:
+                logging.debug("%s deleted", tile)
             return
 
         # Create the directory if not exists
@@ -502,7 +498,7 @@ class QuadtreeGen(object):
 
         dirty = DirtyTiles(depth)
 
-        logging.debug("Scanning chunks for tiles that need rendering...")
+        logging.debug("	Scanning chunks for tiles that need rendering...")
         chunkcount = 0
         stime = time.time()
 
@@ -516,7 +512,7 @@ class QuadtreeGen(object):
         for chunkx, chunky, chunkmtime in self.world.iterate_chunk_metadata():
             chunkcount += 1
             if chunkcount % 10000 == 0:
-                logging.debug("%s chunks scanned", chunkcount)
+                logging.info("	%s chunks scanned", chunkcount)
 
             chunkcol, chunkrow = self.world.convert_coords(chunkx, chunky)
             #logging.debug("Looking at chunk %s,%s", chunkcol, chunkrow)
@@ -561,15 +557,18 @@ class QuadtreeGen(object):
                         dirty.set_dirty(tile.path)
                         #logging.debug("	Setting tile as dirty. Will render.")
 
-        logging.debug("Done. %s chunks scanned in %s seconds", chunkcount, int(time.time()-stime))
+        t = int(time.time()-stime)
+        logging.debug("	Done. %s chunks scanned in %s second%s", chunkcount, t,
+                "s" if t != 1 else "")
 
-        logging.debug("Counting tiles that need rendering...")
-        tilecount = 0
-        stime = time.time()
-        for _ in dirty.iterate_dirty():
-            tilecount += 1
-        logging.debug("Done. %s tiles need to be rendered. (count took %s seconds)",
-                tilecount, int(time.time()-stime))
+        if logging.getLogger().isEnabledFor(logging.DEBUG):
+            logging.debug("	Counting tiles that need rendering...")
+            tilecount = 0
+            stime = time.time()
+            for _ in dirty.iterate_dirty():
+                tilecount += 1
+            logging.debug("	Done. %s tiles need to be rendered. (count took %s seconds)",
+                    tilecount, int(time.time()-stime))
         
         return dirty
 
@@ -734,6 +733,16 @@ class DirtyTiles(object):
         # long as an unset_dirty() method or similar does not exist.
         return any(self.children)
 
+    def count(self):
+        """Returns the total number of dirty leaf nodes.
+
+        """
+        # TODO: Make this more efficient (although for even the largest trees,
+        # this takes only seconds)
+        c = 0
+        for _ in self.iterate_dirty():
+            c += 1
+        return c
 
 class Tile(object):
     """A simple container class that represents a single render-tile.
diff --git a/overviewer_core/rendernode.py b/overviewer_core/rendernode.py
index c0c24b0..b4af818 100644
--- a/overviewer_core/rendernode.py
+++ b/overviewer_core/rendernode.py
@@ -24,6 +24,7 @@ import time
 
 from . import textures
 from . import util
+from . import quadtree
 import c_overviewer
 
 """
@@ -79,7 +80,16 @@ def pool_initializer(rendernode):
             
 class RenderNode(object):
     def __init__(self, quadtrees, options):
-        """Distributes the rendering of a list of quadtrees."""
+        """Distributes the rendering of a list of quadtrees.
+
+        This class tries not to make any assumptions on whether the given
+        quadtrees share the same world or whether the given quadtrees share the
+        same depth/structure. However, those assumptions have not been checked;
+        quadtrees right now always share the same depth, structure, and
+        associated world objects. Beware of mixing and matching quadtrees from
+        different worlds!
+        
+        """
 
         if not len(quadtrees) > 0:
             raise ValueError("there must be at least one quadtree to work on")    
@@ -100,10 +110,16 @@ class RenderNode(object):
             if q.world not in self.worlds:
                 self.worlds.append(q.world)            
 
-        manager = multiprocessing.Manager() 
         # queue for receiving interesting events from the renderer
-        # (like the discovery of signs!
-        #stash into the world object like we stash an index into the quadtree
+        # (like the discovery of signs!)
+        # stash into the world object like we stash an index into the quadtree
+        #
+        # TODO: Managers spawn a sub-process to manage their objects. If p=1,
+        # fall back to a non-managed queue (like Queue.Queue). (While the
+        # management process won't do much processing, part of the point of p=1
+        # is to ease debugging and profiling by keeping everything in one
+        # process/thread)
+        manager = multiprocessing.Manager() 
         for world in self.worlds:
             world.poi_q = manager.Queue() 
 
@@ -141,44 +157,83 @@ class RenderNode(object):
             else:
                 pool.map_async(bool,xrange(multiprocessing.cpu_count()),1)
                 
-        # 1 quadtree object per rendermode requested
+        # The list of quadtrees. There is 1 quadtree object per rendermode
+        # requested
         quadtrees = self.quadtrees
         
-        # Determine the total number of tiles by adding up the number of tiles
-        # from each quadtree. Also find the max zoom level (max_p). Even though
-        # each quadtree will always have the same zoom level, this bit of code
-        # does not make that assumption.
+        # Find the max zoom level (max_p). Even though each quadtree will
+        # always have the same zoom level with the current implementation, this
+        # bit of code does not make that assumption.
+        # max_p is stored in the instance so self.print_statusline can see it
         max_p = 0
-        total = 0
         for q in quadtrees:
-            total += 4**q.p
             if q.p > max_p:
                 max_p = q.p
         self.max_p = max_p
 
-        # The next sections of code render the highest zoom level of tiles. The
-        # section after render the other levels.
-        results = collections.deque()
-        complete = 0
-        logging.info("Rendering highest zoom level of tiles now.")
-        logging.info("Rendering {0} layer{1}".format(len(quadtrees),'s' if len(quadtrees) > 1 else '' ))
-        logging.info("There are {0} tiles to render".format(total))        
-        logging.info("There are {0} total levels to render".format(self.max_p))
-        logging.info("Don't worry, each level has only 25% as many tiles as the last.")
-        logging.info("The others will go faster")
-        count = 0
+        # Signal to the quadtrees to scan the chunks and their respective tile
+        # directories to find what needs to be rendered. We get from this the
+        # total tiles that need to be rendered (at the highest level across all
+        # quadtrees) as well as a list of [qtree, DirtyTiles object]
+        total_rendertiles, dirty_list = self._get_dirty_tiles()
+
+        if total_rendertiles == 0:
+            logging.info(r"There is no work to do, your map is up to date! \o/")
+            return
+
+        # Set a reasonable batch size. Groups of tiles are sent to workers in
+        # batches this large. It should be a multiple of the number of
+        # quadtrees so that each worker gets corresponding tiles from each
+        # quadtree in the typical case.
         batch_size = 4*len(quadtrees)
         while batch_size < 10:
             batch_size *= 2
+        logging.debug("Will push tiles to worker processes in batches of %s", batch_size)
+
+        # The next sections of code render the highest zoom level of tiles. The
+        # section after render the other levels.
+        logging.info("")
+        logging.info("Rendering highest zoom level of tiles now.")
+        logging.info("Rendering {0} rendermode{1}".format(len(quadtrees),'s' if len(quadtrees) > 1 else '' ))
+        logging.info("There are {0} tiles to render at this level".format(total_rendertiles))        
+        logging.info("There are {0} total levels".format(self.max_p))
+
+        # results is a queue of multiprocessing.AsyncResult objects. They are
+        # appended to the end and held in the queue until they are pop'd and
+        # the results collected.
+        # complete holds the tally of the number of tiles rendered. Each
+        # results object returns the number of tiles rendered and is
+        # accumulated in complete
+        results = collections.deque()
+        complete = 0
+
+        # Iterate over _apply_render_worldtiles(). That generator method
+        # dispatches batches of tiles to the workers and yields results
+        # objects. multiprocessing.AsyncResult objects are lazy objects that
+        # are used to access the values returned by the worker's function,
+        # which in this case, is render_worldtile_batch()
         timestamp = time.time()
-        for result in self._apply_render_worldtiles(pool,batch_size):
+        for result in self._apply_render_worldtiles(dirty_list, pool, batch_size):
             results.append(result)               
+
+            # The results objects are lazy. The workers will process an item in
+            # the pool when they get to it, and when we call result.get() it
+            # blocks until the result is ready. We dont' want to add *all* the
+            # tiles to the pool becuse we'd have to hold every result object in
+            # memory. So we add a few batches to the pool / result objects to
+            # the results queue, then drain the results queue, and repeat.
+
             # every second drain some of the queue
             timestamp2 = time.time()
             if timestamp2 >= timestamp + 1:
                 timestamp = timestamp2                
                 count_to_remove = (1000//batch_size)
+
+                # If there are less than count_to_remove items in the results
+                # queue, drain the point of interest queue and count_to_remove
+                # items from the results queue
                 if count_to_remove < len(results):
+                    # Drain the point of interest queue for each world
                     for world in self.worlds:
                         try:
                             while (1):
@@ -189,28 +244,41 @@ class RenderNode(object):
                                         #print "got an item from the queue!"
                                         world.POI.append(item[1])
                                 elif item[0] == "removePOI":
-                                    world.persistentData['POI'] = filter(lambda x: x['chunk'] != item[1], world.persistentData['POI'])
+                                    world.persistentData['POI'] = filter(
+                                            lambda x: x['chunk'] != item[1],
+                                            world.persistentData['POI']
+                                            )
 
                                 elif item[0] == "rendered":
                                     self.rendered_tiles.append(item[1])
 
                         except Queue.Empty:
                             pass
+                    # Now drain the results queue. results has more than
+                    # count_to_remove items in it (as checked above)
                     while count_to_remove > 0:
                         count_to_remove -= 1
                         complete += results.popleft().get()
-                        self.print_statusline(complete, total, 1)  
+                        self.print_statusline(complete, total_rendertiles, 1)  
+
+            # If the results queue is getting too big, drain all but
+            # 500//batch_size items from it
             if len(results) > (10000//batch_size):
                 # Empty the queue before adding any more, so that memory
                 # required has an upper bound
                 while len(results) > (500//batch_size):
                     complete += results.popleft().get()
-                    self.print_statusline(complete, total, 1)
+                    self.print_statusline(complete, total_rendertiles, 1)
 
-        # Wait for the rest of the results
+            # Loop back to the top, add more items to the queue, and repeat
+
+        # Added all there is to add to the workers. Wait for the rest of the
+        # results to come in before continuing
         while len(results) > 0:
             complete += results.popleft().get()
-            self.print_statusline(complete, total, 1)
+            self.print_statusline(complete, total_rendertiles, 1)
+
+        # Now drain the point of interest queues for each world
         for world in self.worlds:    
             try:
                 while (1):
@@ -228,21 +296,37 @@ class RenderNode(object):
             except Queue.Empty:
                 pass
 
-        self.print_statusline(complete, total, 1, True)
+        # Print the final status line unconditionally
+        self.print_statusline(complete, total_rendertiles, 1, True)
 
+        ##########################################
         # The highest zoom level has been rendered.
-        # Now do the lower zoom levels
+        # Now do the lower zoom levels, working our way down to level 1
         for zoom in xrange(self.max_p-1, 0, -1):
+            # "level" counts up for the status output
             level = self.max_p - zoom + 1
+
             assert len(results) == 0
+
+            # Reset these for this zoom level
             complete = 0
             total = 0
+
+            # Count up the total tiles to render at this zoom level
             for q in quadtrees:
                 if zoom <= q.p:
                     total += 4**zoom
+
             logging.info("Starting level {0}".format(level))
             timestamp = time.time()
-            for result in self._apply_render_inntertile(pool, zoom,batch_size):
+
+            # Same deal as above. _apply_render_innertile adds tiles in batch
+            # to the worker pool and yields result objects that return the
+            # number of tiles rendered.
+            #
+            # XXX Some quadtrees may not have tiles at this zoom level if we're
+            # not assuming they all have the same depth!!
+            for result in self._apply_render_innertile(pool, zoom,batch_size):
                 results.append(result)
                 # every second drain some of the queue
                 timestamp2 = time.time()
@@ -274,34 +358,75 @@ class RenderNode(object):
         for q in quadtrees:
             q.render_innertile(os.path.join(q.destdir, q.tiledir), "base")
 
-    def _apply_render_worldtiles(self, pool,batch_size):
-        """Adds tiles to the render queue and dispatch them to the worker pool.
+    def _get_dirty_tiles(self):
+        """Returns two items:
+        1) The total number of tiles needing rendering
+        2) a list of (qtree, DirtyTiles) objects holding which tiles in the
+           respective quadtrees need to be rendered
+
+        """
+        all_dirty = []
+        total = 0
+
+        logging.info("Scanning for tiles to update. This shouldn't take too long...")
+        for i, q in enumerate(self.quadtrees):
+            logging.info("Scanning for tiles in rendermode %s", q.rendermode)
+            dirty = q.scan_chunks()
+
+            total += dirty.count()
+
+            all_dirty.append((q, dirty))
+
+        logging.info("Scan finished. %s total tiles need to be rendered at the highest level", total)
+        return total, all_dirty
+
+    def _apply_render_worldtiles(self, tileset, pool,batch_size):
+        """This generator method dispatches batches of tiles to the given
+        worker pool with the function render_worldtile_batch(). It yields
+        multiprocessing.AsyncResult objects. Each result object returns the
+        number of tiles rendered.
+
+        tileset is a list of (QuadtreeGen object, DirtyTiles object)
         
         Returns an iterator over result objects. Each time a new result is
         requested, a new batch of tasks are added to the pool and a result
         object is returned.
         """
+        # Make sure batch_size is a sane value
         if batch_size < len(self.quadtrees):
             batch_size = len(self.quadtrees)
+
+        # tileset is a list of (quadtreegen object, dirtytiles tree object)
+        # We want: a sequence of iterators that each iterate over
+        # [qtree obj, tile obj] items
+        def mktileiterable(qtree, dtiletree):
+            return ([qtree, quadtree.Tile.from_path(tilepath)] for tilepath in dtiletree.iterate_dirty())
+        iterables = []
+        for qtree, dtiletree in tileset:
+            tileiterable = mktileiterable(qtree, dtiletree)
+            iterables.append(tileiterable)
+        
+        # batch is a list of (qtree index, Tile object). This list is slowly
+        # added to and when it reaches size batch_size, it is sent off to the
+        # pool.
         batch = []
-        jobcount = 0
+
         # roundrobin add tiles to a batch job (thus they should all roughly work on similar chunks)
-        iterables = [q.get_worldtiles() for q in self.quadtrees]
         for job in util.roundrobin(iterables):
-            # fixup so the worker knows which quadtree this is                 
+            # fixup so the worker knows which quadtree this is. It's a bit of a
+            # hack but it helps not to keep re-sending the qtree objects to the
+            # workers.
             job[0] = job[0]._render_index
             # Put this in the batch to be submited to the pool  
             batch.append(job)
-            jobcount += 1
-            if jobcount >= batch_size:
-                jobcount = 0
+            if len(batch) >= batch_size:
                 yield pool.apply_async(func=render_worldtile_batch, args= [batch])
                 batch = []
-        if jobcount > 0:
+        if len(batch):
             yield pool.apply_async(func=render_worldtile_batch, args= [batch])
 
-    def _apply_render_inntertile(self, pool, zoom,batch_size):
-        """Same as _apply_render_worltiles but for the inntertile routine.
+    def _apply_render_innertile(self, pool, zoom,batch_size):
+        """Same as _apply_render_worltiles but for the innertile routine.
         Returns an iterator that yields result objects from tasks that have
         been applied to the pool.
         """
@@ -328,6 +453,13 @@ class RenderNode(object):
             
 @catch_keyboardinterrupt
 def render_worldtile_batch(batch):
+    """Main entry point for workers processing a render-tile (also called a
+    world tile).  Returns the number of tiles rendered, which is the length of
+    the batch list passed in
+
+    batch should be a list of (qtree index, tile object)
+
+    """
     # batch is a list of items to process. Each item is [quadtree_id, Tile object]
     global child_rendernode
     rendernode = child_rendernode