#!/usr/bin/env python3 """Parses the inetnum data from the registry to turn it into a tree-like structure in JSON, that can be used for instance with d3js. """ import json import sys import os import time from netaddr import IPNetwork, IPSet import dn42 # A tree is represented by a dictionary 'node' → [children], where [children] # is the list of *direct* children of 'node' def print_tree(tree, indent=0): for p, children in tree.items(): print("{}{}{}".format(' ' * indent, p, ':' if children else '')) for child in children: print_tree(child, indent + 2) def print_json_tree(tree, indent=0): print("{}{}{}".format(' ' * indent, '[' + str(tree['prefix']) + ']' if "display" in tree else tree['prefix'], ':' if tree['children'] else '')) if 'children' in tree: for p in tree["children"]: print_json_tree(p, indent + 2) def empty_prefix_tree(): return { 'prefix': IPNetwork('0.0.0.0/0'), 'children': [] } # Since we work with subnets, we know that given subnets 'a' and 'b', either # 'a' includes 'b', either 'b' includes 'a', either 'a' and 'b' don't # otherlap. def insert_prefix(tree, prefix): """Each node of the tree has a variable number of children. A tree always has a (dummy) root, otherwise we could have multiple roots. The root compare greater than any prefix. Algo: start with an empty tree (only a root). For each prefix $p$, try to insert it in the current tree: - if the current node $n$ has no child, insert $p$ as a child of $n$ - else, for each child $s$ of the current node $n$: - if $p$ is a subset of $s$, recursively insert $p$ in $s$ and return - if $p$ is a superset of $s$, collect all children $s'$ of $n$ that are also subsets of $p$, and move them as children of $p$. Then insert $p$ as a child of $n$ and return. - if neither case happened, just insert $p$ as a child of $n$. """ for child in tree['children']: if prefix in child['prefix']: insert_prefix(child, prefix) return subsets = [child for child in tree['children'] if child['prefix'] in prefix] for c in subsets: tree['children'].remove(c) tree['children'].append({ 'prefix': prefix, 'children': subsets}) def filter_tree(tree, prefix): """Keep only the parts of the tree that are included in the given prefix.""" tree['children'] = [c for c in tree['children'] if c['prefix'] in prefix] for c in tree['children']: filter_tree(c, prefix) def sort_tree(tree): """Recursively sorts all children of the tree. This does not work if prefixes are represented as IPSet().""" for c in tree["children"]: sort_tree(c) tree["children"].sort(key=lambda t : t['prefix']) def fill_blanks(tree): for c in tree["children"]: fill_blanks(c) used_set = IPSet([c['prefix'] for c in tree['children']]) if used_set.size is not 0: unused_set = IPSet([tree['prefix']]) ^ used_set tree["children"].extend([{ "prefix": p, "children": [], "display": "none"} for p in unused_set.iter_cidrs()]) def prefixes_tree(prefixes): tree = empty_prefix_tree() for p in (IPNetwork(p) for p in prefixes): #print_json_tree(tree) #print() insert_prefix(tree, p) # Only keep dn42 range filter_tree(tree, dn42.ADDRSPACE) # Then sort subnets sort_tree(tree) # Add dummy subnets for the visualization for child in tree["children"]: fill_blanks(child) # And resort! sort_tree(tree) return tree def compute_size(tree): for c in tree["children"]: compute_size(c) tree['size'] = tree["prefix"].size def serialize(tree): """Transforms all IPSet() and IPNetwork() to strings""" for c in tree["children"]: serialize(c) tree["prefix"] = str(tree["prefix"]) if __name__ == '__main__': inetnums_dir = sys.argv[1] prefixtree_out = sys.argv[2] inetnums_out = sys.argv[3] inetnums = dn42.parse_records(inetnums_dir) tree = prefixes_tree(inetnums.keys()) #print_json_tree(tree) compute_size(tree) serialize(tree) # Don't display the root tree["display"] = "none" tree["date"] = time.time() tree["prefixes"] = len(inetnums) # TODO: add mtn revision tree["origin"] = "registry" with open(prefixtree_out, "w") as f: json.dump(tree, f) with open(inetnums_out, "w") as f: json.dump(inetnums, f)