dn42-registry-routes.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #!/usr/bin/env python3
  2. """Parses the inetnum data from the registry to turn it into a tree-like
  3. structure in JSON, that can be used for instance with d3js.
  4. """
  5. import json
  6. import sys
  7. import os
  8. import time
  9. from netaddr import IPNetwork, IPSet
  10. import dn42
  11. # A tree is represented by a dictionary 'node' → [children], where [children]
  12. # is the list of *direct* children of 'node'
  13. def print_tree(tree, indent=0):
  14. for p, children in tree.items():
  15. print("{}{}{}".format(' ' * indent, p, ':' if children else ''))
  16. for child in children:
  17. print_tree(child, indent + 2)
  18. def print_json_tree(tree, indent=0):
  19. print("{}{}{}".format(' ' * indent,
  20. '[' + str(tree['prefix']) + ']'
  21. if "display" in tree else tree['prefix'],
  22. ':' if tree['children'] else ''))
  23. if 'children' in tree:
  24. for p in tree["children"]:
  25. print_json_tree(p, indent + 2)
  26. def empty_prefix_tree():
  27. return { 'prefix': IPNetwork('0.0.0.0/0'),
  28. 'children': [] }
  29. # Since we work with subnets, we know that given subnets 'a' and 'b', either
  30. # 'a' includes 'b', either 'b' includes 'a', either 'a' and 'b' don't
  31. # otherlap.
  32. def insert_prefix(tree, prefix):
  33. """Each node of the tree has a variable number of children. A tree always has
  34. a (dummy) root, otherwise we could have multiple roots. The root compare
  35. greater than any prefix.
  36. Algo: start with an empty tree (only a root). For each prefix $p$, try to
  37. insert it in the current tree:
  38. - if the current node $n$ has no child, insert $p$ as a child of $n$
  39. - else, for each child $s$ of the current node $n$:
  40. - if $p$ is a subset of $s$, recursively insert $p$ in $s$ and return
  41. - if $p$ is a superset of $s$, collect all children $s'$ of $n$ that are
  42. also subsets of $p$, and move them as children of $p$. Then insert $p$
  43. as a child of $n$ and return.
  44. - if neither case happened, just insert $p$ as a child of $n$.
  45. """
  46. for child in tree['children']:
  47. if prefix in child['prefix']:
  48. insert_prefix(child, prefix)
  49. return
  50. subsets = [child for child in tree['children']
  51. if child['prefix'] in prefix]
  52. for c in subsets:
  53. tree['children'].remove(c)
  54. tree['children'].append({ 'prefix': prefix, 'children': subsets})
  55. def filter_tree(tree, prefix):
  56. """Keep only the parts of the tree that are included in the given prefix."""
  57. tree['children'] = [c for c in tree['children'] if c['prefix'] in prefix]
  58. for c in tree['children']:
  59. filter_tree(c, prefix)
  60. def sort_tree(tree):
  61. """Recursively sorts all children of the tree. This does not work if
  62. prefixes are represented as IPSet()."""
  63. for c in tree["children"]:
  64. sort_tree(c)
  65. tree["children"].sort(key=lambda t : t['prefix'])
  66. def fill_blanks(tree):
  67. for c in tree["children"]:
  68. fill_blanks(c)
  69. used_set = IPSet([c['prefix'] for c in tree['children']])
  70. if used_set.size is not 0:
  71. unused_set = IPSet([tree['prefix']]) ^ used_set
  72. tree["children"].extend([{ "prefix": p, "children": [], "display": "none"}
  73. for p in unused_set.iter_cidrs()])
  74. def prefixes_tree(prefixes):
  75. tree = empty_prefix_tree()
  76. for p in (IPNetwork(p) for p in prefixes):
  77. #print_json_tree(tree)
  78. #print()
  79. insert_prefix(tree, p)
  80. # Only keep dn42 range
  81. filter_tree(tree, dn42.ADDRSPACE)
  82. # Then sort subnets
  83. sort_tree(tree)
  84. # Add dummy subnets for the visualization
  85. for child in tree["children"]:
  86. fill_blanks(child)
  87. # And resort!
  88. sort_tree(tree)
  89. return tree
  90. def compute_size(tree):
  91. for c in tree["children"]:
  92. compute_size(c)
  93. tree['size'] = tree["prefix"].size
  94. def serialize(tree):
  95. """Transforms all IPSet() and IPNetwork() to strings"""
  96. for c in tree["children"]:
  97. serialize(c)
  98. tree["prefix"] = str(tree["prefix"])
  99. if __name__ == '__main__':
  100. inetnums_dir = sys.argv[1]
  101. prefixtree_out = sys.argv[2]
  102. inetnums_out = sys.argv[3]
  103. inetnums = dn42.parse_records(inetnums_dir)
  104. tree = prefixes_tree(inetnums.keys())
  105. #print_json_tree(tree)
  106. compute_size(tree)
  107. serialize(tree)
  108. # Don't display the root
  109. tree["display"] = "none"
  110. tree["date"] = time.time()
  111. tree["prefixes"] = len(inetnums)
  112. # TODO: add mtn revision
  113. tree["origin"] = "registry"
  114. with open(prefixtree_out, "w") as f:
  115. json.dump(tree, f)
  116. with open(inetnums_out, "w") as f:
  117. json.dump(inetnums, f)