Browse Source

Keep only the largest connected components of images for pano generation

Baptiste Jonglez 10 years ago
parent
commit
075a35dedf
2 changed files with 145 additions and 0 deletions
  1. 124 0
      panorama/hugin.py
  2. 21 0
      panorama/tasks.py

+ 124 - 0
panorama/hugin.py

@@ -0,0 +1,124 @@
+"""
+Various utilities to work with Hugin's file format (.pto)
+
+http://hugin.sourceforge.net/docs/manual/PTOptimizer.html
+
+Ideally, we would use the SWIG-generated Python interface, but it might
+not be available everywhere, and it is missing some useful bits of the API
+(most notably graph handling).
+"""
+
+import sys
+import re
+from collections import defaultdict
+from pprint import pprint
+
+# Regex matching a single item in a line
+ITEM_REGEX = re.compile(r"([a-zA-Z]+[^\" ]+ ?)|([a-zA-Z]+\".*?\" ?)")
+
+def parse_pto(f):
+    """Very basic parsing of a .pto file, without attempting to interpret any
+    fields.  Might be missing some functionalities.
+    Returns a dict of the form:
+
+    {'i': [
+            ['w3264', 'h2448', 'f0', 'v=0', 'n"my picture.jpg"'],
+            ['w3264', 'h2448', 'f0', 'v=0', 'n"my other picture.jpg"'],
+          ],
+     'p': [
+            ['f1', 'w16117', 'h2284', 'v360', 'E14.3378', 'R0', 'S0,16117,525,2116', 'n"TIFF_m c:LZW r:CROP"']
+          ],
+    }
+
+    In other words, each line is represent as a list of its items,
+    classified by the "category" (first char on the line)
+    """
+    res = defaultdict(list)
+    for line in f:
+        line = line.strip()
+        if len(line) == 0 or line.startswith("#"):
+            continue
+        if line == "*":
+            break
+        category = line[0]
+        items = [m.group(0).strip() for m in ITEM_REGEX.finditer(line[2:])]
+        res[category].append(items)
+    return res
+
+
+def union_find(nodes, edges):
+    """Very inefficient union-find algorithm to find connected components"""
+    def union(n1, n2):
+        if root[n1] == root[n2]:
+            return
+        components[root[n1]].update(components[root[n2]])
+        components[root[n2]].clear()
+        del components[root[n2]]
+        # Update root for all nodes that pointed to root[n2]
+        root_n2 = root[n2]
+        for node, parent in root.items():
+            if parent == root_n2:
+                root[node] = root[n1]
+
+    components = {node: {node} for node in nodes}
+    root = {node: node for node in nodes}
+    for (n1, n2) in edges:
+        union(n1, n2)
+    return components.values()
+
+
+def compute_connected_components(pto):
+    """Compute connected components of a project, where nodes are images and
+    edges are control points linking images.  Images are index from 0."""
+    nb_images = len(pto['i'])
+    # Set of frozenset({image1, image2})
+    edges = set()
+    for controlpoint in pto['c']:
+        image1 = image2 = None
+        for item in controlpoint:
+            if item[0] == 'n':
+                image1 = int(item[1:])
+            if item[0] == 'N':
+                image2 = int(item[1:])
+            if image1 is not None and image2 is not None and image1 != image2:
+                edges.add(frozenset({image1, image2}))
+    return union_find(list(range(nb_images)), edges)
+
+
+def filter_images(input_pto, images, output_pto):
+    """Filter the input pto file to only keep the specified images, and write
+    the result to [output_pto].
+
+    We assume that only cpfind has been run on the input pto.  In
+    particular, we don't touch optimisation variables or project
+    parameters (FOV, size, etc), since they are not normally set at this
+    early stage in the pipeline.
+    """
+    with open(input_pto) as f:
+        with open(output_pto, 'w') as out:
+            image_id = 0
+            for line in f:
+                # Check image declarations
+                if line[0] == 'i':
+                    if image_id in images:
+                        out.write(line)
+                    image_id += 1
+                # Check control points
+                elif line[0] == 'c':
+                    discard = False
+                    for item in ITEM_REGEX.finditer(line[2:]):
+                        item = item.group(0).strip()
+                        # The control point references a removed image
+                        if (item[0] == 'n' or item[0] == 'N') and int(item[1:]) not in images:
+                            discard = True
+                    if not discard:
+                        out.write(line)
+                # Just copy the line to the output
+                else:
+                    out.write(line)
+
+
+if __name__ == '__main__':
+    with open(sys.argv[1]) as f:
+        #pprint(parse_pto(f))
+        pprint(compute_connected_components(parse_pto(f)))

+ 21 - 0
panorama/tasks.py

@@ -9,6 +9,7 @@ import tempfile
 from celery import shared_task, chain
 
 from .gen_tiles import gen_tiles
+from .hugin import parse_pto, compute_connected_components, filter_images
 
 
 @shared_task
@@ -30,6 +31,25 @@ def cpfind(input_pto, output_pto):
     return output_pto
 
 @shared_task
+def prune_images(input_pto, output_pto):
+    """
+    Given a pto and control points, keep the largest connected components
+    of linked images.  This allows to discard images that are not
+    connected to the rest of the images.
+
+    Implementation details: the python interface to Hugin (hsi) could be
+    used, but it does not export precisely the bits we need to compute the
+    graph of linked images (the CPGraph class).  Thus, the simplest
+    solution is to parse the .pto file by hand.
+    """
+    with open(input_pto) as f:
+        components = compute_connected_components(parse_pto(f))
+    component = max(components, key=len)
+    filter_images(input_pto, component, output_pto)
+    print("Keeping largest component ({} images)".format(len(component)))
+    return output_pto
+
+@shared_task
 def cpclean(input_pto, output_pto):
     subprocess.call(["cpclean", "-o", output_pto, input_pto])
     return output_pto
@@ -69,6 +89,7 @@ def panorama_pipeline(images, output_image):
     pto = lambda filename: os.path.join(d, filename + ".pto")
     pipeline = pto_gen.s(pto("pto_gen"), images) | \
                cpfind.s(pto("cpfind")) | \
+               prune_images.s(pto("prune_images")) | \
                cpclean.s(pto("cpclean")) | \
                linefind.s(pto("linefind")) | \
                autooptimiser.s(pto("autooptimiser")) | \