123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- """
- Various utilities to work with Hugin's file format (.pto)
- http://hugin.sourceforge.net/docs/manual/PTOptimizer.html
- Ideally, we would use the SWIG-generated Python interface, but it might
- not be available everywhere, and it is missing some useful bits of the API
- (most notably graph handling).
- """
- import sys
- import re
- from collections import defaultdict
- from pprint import pprint
- # Regex matching a single item in a line
- ITEM_REGEX = re.compile(r"([a-zA-Z]+[^\" ]+ ?)|([a-zA-Z]+\".*?\" ?)")
- def parse_pto(f):
- """Very basic parsing of a .pto file, without attempting to interpret any
- fields. Might be missing some functionalities.
- Returns a dict of the form:
- {'i': [
- ['w3264', 'h2448', 'f0', 'v=0', 'n"my picture.jpg"'],
- ['w3264', 'h2448', 'f0', 'v=0', 'n"my other picture.jpg"'],
- ],
- 'p': [
- ['f1', 'w16117', 'h2284', 'v360', 'E14.3378', 'R0', 'S0,16117,525,2116', 'n"TIFF_m c:LZW r:CROP"']
- ],
- }
- In other words, each line is represent as a list of its items,
- classified by the "category" (first char on the line)
- """
- res = defaultdict(list)
- for line in f:
- line = line.strip()
- if len(line) == 0 or line.startswith("#"):
- continue
- if line == "*":
- break
- category = line[0]
- items = [m.group(0).strip() for m in ITEM_REGEX.finditer(line[2:])]
- res[category].append(items)
- return res
- def union_find(nodes, edges):
- """Very inefficient union-find algorithm to find connected components"""
- def union(n1, n2):
- if root[n1] == root[n2]:
- return
- components[root[n1]].update(components[root[n2]])
- components[root[n2]].clear()
- del components[root[n2]]
- # Update root for all nodes that pointed to root[n2]
- root_n2 = root[n2]
- for node, parent in root.items():
- if parent == root_n2:
- root[node] = root[n1]
- components = {node: {node} for node in nodes}
- root = {node: node for node in nodes}
- for (n1, n2) in edges:
- union(n1, n2)
- return components.values()
- def compute_connected_components(pto):
- """Compute connected components of a project, where nodes are images and
- edges are control points linking images. Images are index from 0."""
- nb_images = len(pto['i'])
- # Set of frozenset({image1, image2})
- edges = set()
- for controlpoint in pto['c']:
- image1 = image2 = None
- for item in controlpoint:
- if item[0] == 'n':
- image1 = int(item[1:])
- if item[0] == 'N':
- image2 = int(item[1:])
- if image1 is not None and image2 is not None and image1 != image2:
- edges.add(frozenset({image1, image2}))
- return union_find(list(range(nb_images)), edges)
- def filter_images(input_pto, images, output_pto):
- """Filter the input pto file to only keep the specified images, and write
- the result to [output_pto].
- We assume that only cpfind has been run on the input pto. In
- particular, we don't touch optimisation variables or project
- parameters (FOV, size, etc), since they are not normally set at this
- early stage in the pipeline.
- """
- with open(input_pto) as f:
- with open(output_pto, 'w') as out:
- image_id = 0
- for line in f:
- # Check image declarations
- if line[0] == 'i':
- if image_id in images:
- out.write(line)
- image_id += 1
- # Check control points
- elif line[0] == 'c':
- discard = False
- for item in ITEM_REGEX.finditer(line[2:]):
- item = item.group(0).strip()
- # The control point references a removed image
- if (item[0] == 'n' or item[0] == 'N') and int(item[1:]) not in images:
- discard = True
- if not discard:
- out.write(line)
- # Just copy the line to the output
- else:
- out.write(line)
- if __name__ == '__main__':
- with open(sys.argv[1]) as f:
- #pprint(parse_pto(f))
- pprint(compute_connected_components(parse_pto(f)))
|