123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- #!/usr/bin/env python3
- """Parses the inetnum data from the registry to turn it into a tree-like
- structure in JSON, that can be used for instance with d3js.
- """
- import json
- import sys
- import os
- import time
- from netaddr import IPNetwork, IPSet
- import dn42
- # A tree is represented by a dictionary 'node' → [children], where [children]
- # is the list of *direct* children of 'node'
- def print_tree(tree, indent=0):
- for p, children in tree.items():
- print("{}{}{}".format(' ' * indent, p, ':' if children else ''))
- for child in children:
- print_tree(child, indent + 2)
- def print_json_tree(tree, indent=0):
- print("{}{}{}".format(' ' * indent,
- '[' + str(tree['prefix']) + ']'
- if "display" in tree else tree['prefix'],
- ':' if tree['children'] else ''))
- if 'children' in tree:
- for p in tree["children"]:
- print_json_tree(p, indent + 2)
- def empty_prefix_tree():
- return { 'prefix': IPNetwork('0.0.0.0/0'),
- 'children': [] }
- # Since we work with subnets, we know that given subnets 'a' and 'b', either
- # 'a' includes 'b', either 'b' includes 'a', either 'a' and 'b' don't
- # otherlap.
- def insert_prefix(tree, prefix):
- """Each node of the tree has a variable number of children. A tree always has
- a (dummy) root, otherwise we could have multiple roots. The root compare
- greater than any prefix.
- Algo: start with an empty tree (only a root). For each prefix $p$, try to
- insert it in the current tree:
- - if the current node $n$ has no child, insert $p$ as a child of $n$
- - else, for each child $s$ of the current node $n$:
- - if $p$ is a subset of $s$, recursively insert $p$ in $s$ and return
- - if $p$ is a superset of $s$, collect all children $s'$ of $n$ that are
- also subsets of $p$, and move them as children of $p$. Then insert $p$
- as a child of $n$ and return.
- - if neither case happened, just insert $p$ as a child of $n$.
- """
- for child in tree['children']:
- if prefix in child['prefix']:
- insert_prefix(child, prefix)
- return
- subsets = [child for child in tree['children']
- if child['prefix'] in prefix]
- for c in subsets:
- tree['children'].remove(c)
- tree['children'].append({ 'prefix': prefix, 'children': subsets})
- def filter_tree(tree, prefix):
- """Keep only the parts of the tree that are included in the given prefix."""
- tree['children'] = [c for c in tree['children'] if c['prefix'] in prefix]
- for c in tree['children']:
- filter_tree(c, prefix)
- def sort_tree(tree):
- """Recursively sorts all children of the tree. This does not work if
- prefixes are represented as IPSet()."""
- for c in tree["children"]:
- sort_tree(c)
- tree["children"].sort(key=lambda t : t['prefix'])
- def fill_blanks(tree):
- for c in tree["children"]:
- fill_blanks(c)
- used_set = IPSet([c['prefix'] for c in tree['children']])
- if used_set.size is not 0:
- unused_set = IPSet([tree['prefix']]) ^ used_set
- tree["children"].extend([{ "prefix": p, "children": [], "display": "none"}
- for p in unused_set.iter_cidrs()])
- def prefixes_tree(prefixes):
- tree = empty_prefix_tree()
- for p in (IPNetwork(p) for p in prefixes):
- #print_json_tree(tree)
- #print()
- insert_prefix(tree, p)
- # Only keep dn42 range
- filter_tree(tree, dn42.ADDRSPACE)
- # Then sort subnets
- sort_tree(tree)
- # Add dummy subnets for the visualization
- for child in tree["children"]:
- fill_blanks(child)
- # And resort!
- sort_tree(tree)
- return tree
- def compute_size(tree):
- for c in tree["children"]:
- compute_size(c)
- tree['size'] = tree["prefix"].size
- def serialize(tree):
- """Transforms all IPSet() and IPNetwork() to strings"""
- for c in tree["children"]:
- serialize(c)
- tree["prefix"] = str(tree["prefix"])
- if __name__ == '__main__':
- inetnums_dir = sys.argv[1]
- prefixtree_out = sys.argv[2]
- inetnums_out = sys.argv[3]
- inetnums = dn42.parse_records(inetnums_dir)
- tree = prefixes_tree(inetnums.keys())
- #print_json_tree(tree)
- compute_size(tree)
- serialize(tree)
- # Don't display the root
- tree["display"] = "none"
- tree["date"] = time.time()
- tree["prefixes"] = len(inetnums)
- # TODO: add mtn revision
- tree["origin"] = "registry"
- with open(prefixtree_out, "w") as f:
- json.dump(tree, f)
- with open(inetnums_out, "w") as f:
- json.dump(inetnums, f)
|