rfc.py 691 B

1234567891011121314151617181920212223242526272829
  1. from __future__ import absolute_import
  2. import re
  3. def get_pages(filename):
  4. with open(filename) as f:
  5. data = f.read()
  6. return data.split('\x0c')
  7. header_pattern = re.compile(r'^RFC \d+\s+.*\s+(\w+ \d{4})$', re.M)
  8. footer_pattern = re.compile(r'^\w+\s+\w+\s+\[Page \d+\]$', re.M)
  9. def remove_header(page):
  10. page = header_pattern.sub('', page)
  11. return page.lstrip('\n')
  12. def remove_footer(page):
  13. page = footer_pattern.sub('', page)
  14. return page.rstrip() + '\n\n'
  15. def clean_pages():
  16. return map(remove_header, map(remove_footer, get_pages('rfc2812.txt')))
  17. def save_clean():
  18. with open('rfc2812-clean.txt', 'w') as f:
  19. map(f.write, clean_pages())
  20. if __name__ == '__main__':
  21. save_clean()