toc.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. '''
  2. toc
  3. ===================================
  4. This plugin generates tocs for pages and articles.
  5. '''
  6. from __future__ import unicode_literals
  7. import logging
  8. import re
  9. from bs4 import BeautifulSoup, Comment
  10. from pelican import contents, signals
  11. from pelican.utils import python_2_unicode_compatible, slugify
  12. logger = logging.getLogger(__name__)
  13. '''
  14. https://github.com/waylan/Python-Markdown/blob/master/markdown/extensions/headerid.py
  15. '''
  16. IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
  17. def unique(id, ids):
  18. """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
  19. while id in ids or not id:
  20. m = IDCOUNT_RE.match(id)
  21. if m:
  22. id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
  23. else:
  24. id = '%s_%d' % (id, 1)
  25. ids.add(id)
  26. return id
  27. '''
  28. end
  29. '''
  30. @python_2_unicode_compatible
  31. class HtmlTreeNode(object):
  32. def __init__(self, parent, header, level, id):
  33. self.children = []
  34. self.parent = parent
  35. self.header = header
  36. self.level = level
  37. self.id = id
  38. def add(self, new_header, ids):
  39. new_level = new_header.name
  40. new_string = new_header.string
  41. new_id = new_header.attrs.get('id')
  42. if not new_string:
  43. new_string = new_header.find_all(
  44. text=lambda t: not isinstance(t, Comment),
  45. recursive=True)
  46. new_string = "".join(new_string)
  47. if not new_id:
  48. new_id = slugify(new_string, ())
  49. new_id = unique(new_id, ids) # make sure id is unique
  50. new_header.attrs['id'] = new_id
  51. if(self.level < new_level):
  52. new_node = HtmlTreeNode(self, new_string, new_level, new_id)
  53. self.children += [new_node]
  54. return new_node, new_header
  55. elif(self.level == new_level):
  56. new_node = HtmlTreeNode(self.parent, new_string, new_level, new_id)
  57. self.parent.children += [new_node]
  58. return new_node, new_header
  59. elif(self.level > new_level):
  60. return self.parent.add(new_header, ids)
  61. def __str__(self):
  62. if self.children:
  63. ret = ("<ul>{}</ul>"
  64. .format('{}'*len(self.children))
  65. .format(*self.children))
  66. else:
  67. ret = ""
  68. if not self.parent:
  69. ret = "<div id='toc'>{}</div>".format(ret)
  70. else:
  71. ret = ("<a class='toc-href' href='#{0}' title='{1}'>{1}</a>{2}"
  72. .format(self.id, self.header, ret))
  73. ret = "<li>{}</li>".format(ret)
  74. return ret
  75. def init_default_config(pelican):
  76. from pelican.settings import DEFAULT_CONFIG
  77. TOC_DEFAULT = {
  78. 'TOC_HEADERS': '^h[1-6]',
  79. 'TOC_RUN': 'true'
  80. }
  81. DEFAULT_CONFIG.setdefault('TOC', TOC_DEFAULT)
  82. if(pelican):
  83. pelican.settings.setdefault('TOC', TOC_DEFAULT)
  84. def generate_toc(content):
  85. if isinstance(content, contents.Static):
  86. return
  87. _toc_run = content.metadata.get(
  88. 'toc_run',
  89. content.settings['TOC']['TOC_RUN'])
  90. if not _toc_run == 'true':
  91. return
  92. all_ids = set()
  93. title = content.metadata.get('title', 'Title')
  94. tree = node = HtmlTreeNode(None, '', 'h0', '')
  95. soup = BeautifulSoup(content._content, 'html.parser')
  96. settoc = False
  97. try:
  98. header_re = re.compile(content.metadata.get(
  99. 'toc_headers', content.settings['TOC']['TOC_HEADERS']))
  100. except re.error as e:
  101. logger.error("TOC_HEADERS '%s' is not a valid re\n%s",
  102. content.settings['TOC']['TOC_HEADERS'])
  103. raise e
  104. for header in soup.findAll(header_re):
  105. settoc = True
  106. node, new_header = node.add(header, all_ids)
  107. header.replaceWith(new_header) # to get our ids back into soup
  108. if (settoc):
  109. tree_string = '{}'.format(tree)
  110. tree_soup = BeautifulSoup(tree_string, 'html.parser')
  111. content.toc = tree_soup.decode(formatter='html')
  112. content._content = soup.decode(formatter='html')
  113. def register():
  114. signals.initialized.connect(init_default_config)
  115. signals.content_object_init.connect(generate_toc)