Browse Source

eur-lex: non-brittle way to exclude informative sections.

There is a variety of sections that are statements, notes and so on. They
are interesting and should end up in the TOC too but they're neither
articles nor annexes and therefore have to be handled differently.

This commit removes an exclusion that was specific to 2014/53 and instead
filters on elements that start with 'Article' or 'ANNEX'; it's English-only
but works well.
Adrien Nader 8 years ago
parent
commit
27cdbf8d1e
1 changed files with 4 additions and 6 deletions
  1. 4 6
      eur-lex/eur-lex_toc.user.js

+ 4 - 6
eur-lex/eur-lex_toc.user.js

@@ -47,12 +47,14 @@ var ol_annexes = document.createElement('ol');
 h1_articles.textContent = 'Articles';
 h1_annexes.textContent = 'Annexes';
 
-var article_titles = document.evaluate("//div[@lang='EN']/p[@class='ti-art']",
+var article_titles = document.evaluate("//div[@lang='EN']/p[@class='ti-art'][starts-with(text(), 'Article')]",
   document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
 
-var annex_titles = document.evaluate("//div[@lang='EN']/div[@id]/p[1][@class='doc-ti']",
+var annex_titles = document.evaluate("//div[@lang='EN']/div[@id]/p[1][@class='doc-ti'][starts-with(text(), 'ANNEX')]",
   document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
 
+/* TODO: there are non-article and non-annex sections that should go in the TOC too. */
+
 for (var i = 0; i < article_titles.snapshotLength; i++) {
   var id = article_titles.snapshotItem(i);
   var title = id.nextElementSibling;
@@ -73,10 +75,6 @@ for (var i = 0; i < annex_titles.snapshotLength; i++) {
   var id = annex_titles.snapshotItem(i);
   var title = id.nextElementSibling;
 
-  if (id.textContent === 'STATEMENT OF THE EUROPEAN PARLIAMENT') {
-    continue;
-  }
-
   var n = id.textContent.replace(/ANNEX /, '');
 
   id.id = 'annex-' + n;