Browse Source

Better handling&logging of exception in the validator

Since the validator is implemented as a generator, exceptions in the
online version of the validator were not catched anywhere, and with
gunicorn (in prod), they were not logged nor reported.
A custom version of the stream_with_context generator was implemented
that catch exceptions and send them to flask's logger.

Exceptions in the cron task version of the validator would simply crash
the script. It's fixed now to catch them and print them on stdout.

The validator is not really supposed to raise exceptions; it's supposed to
handle (most of) them and print the corresponding message; but since it's
doing HTTP requests and other complex stuff, a lot could happen and there
are no guarantees all exceptions will be handled.
Gu1 11 years ago
parent
commit
c4591fa65d
4 changed files with 71 additions and 7 deletions
  1. 7 2
      ffdnispdb/crawler.py
  2. 13 1
      ffdnispdb/cron_task.py
  3. 47 0
      ffdnispdb/utils.py
  4. 4 4
      ffdnispdb/views.py

+ 7 - 2
ffdnispdb/crawler.py

@@ -2,6 +2,7 @@
 
 import io
 import cgi
+import sys
 import pytz
 from datetime import datetime, timedelta
 from werkzeug.http import parse_date
@@ -122,8 +123,12 @@ class Crawler(object):
             yield self.err('Too many redirects')
         except requests.exceptions.RequestException as e:
             yield self.err('Internal request exception')
-#        except Exception as e:
-#            yield self.err('Unexpected request exception')
+        except Exception as e:
+            # Unexpected exception: abort the validation, then re-raise it
+            # so that it's logged.
+            tb = sys.exc_info()[2]
+            yield self.abort('Unexpected request exception')
+            raise e, None, tb
 
         if r is None:
             yield self.abort('Connection could not be established, aborting')

+ 13 - 1
ffdnispdb/cron_task.py

@@ -110,7 +110,15 @@ try:
             db.session.commit()
 
             validator=TextValidator()
-            log=''.join(validator(isp.json_url, isp.cache_info or {}))
+            log = ''
+            exc, exc_trace = None, None
+            try:
+                for l in validator(isp.json_url, isp.cache_info or {}):
+                    log += l
+            except Exception as e:
+                exc = e
+                exc_trace = traceback.format_exc()
+
             if not validator.success: # handle error
                 isp.update_error_strike += 1
                 # reset cache info (to force refetch next time)
@@ -124,6 +132,10 @@ try:
                     send_warning_email(isp, log)
 
                 print log.rstrip()+'\n'
+                if exc:
+                    print u'Unexpected exception in the validator: %r' % exc
+                    print exc_log
+
                 continue
 
             if validator.modified:

+ 47 - 0
ffdnispdb/utils.py

@@ -1,10 +1,12 @@
 # -*- coding: utf-8 -*-
 
 from flask import current_app
+from flask.globals import _request_ctx_stack
 from collections import OrderedDict
 from datetime import datetime
 import pytz
 import json
+import sys
 from . import db
 
 
@@ -67,3 +69,48 @@ def filesize_fmt(num):
             return fmt(num, x)
         num /= 1024.0
     return fmt(num, 'TiB')
+
+
+def stream_with_ctx_and_exc(generator_or_function):
+    """
+    taken from flask's code, added exception logging
+    """
+    try:
+        gen = iter(generator_or_function)
+    except TypeError:
+        def decorator(*args, **kwargs):
+            gen = generator_or_function()
+            return stream_with_context(gen)
+        return update_wrapper(decorator, generator_or_function)
+
+    def generator():
+        ctx = _request_ctx_stack.top
+        if ctx is None:
+            raise RuntimeError('Attempted to stream with context but '
+                'there was no context in the first place to keep around.')
+        with ctx:
+            # Dummy sentinel.  Has to be inside the context block or we're
+            # not actually keeping the context around.
+            yield None
+
+            # The try/finally is here so that if someone passes a WSGI level
+            # iterator in we're still running the cleanup logic.  Generators
+            # don't need that because they are closed on their destruction
+            # automatically.
+            try:
+                for item in gen:
+                    yield item
+            except Exception as e:
+                exc_type, exc_value, tb = sys.exc_info()
+                current_app.log_exception((exc_type, exc_value, tb))
+            finally:
+                if hasattr(gen, 'close'):
+                    gen.close()
+
+    # The trick is to start the generator.  Then the code execution runs until
+    # the first dummy None is yielded at which point the context was already
+    # pushed.  This item is discarded.  Then when the iteration continues the
+    # real generator is executed.
+    wrapped_g = generator()
+    next(wrapped_g)
+    return wrapped_g

+ 4 - 4
ffdnispdb/views.py

@@ -2,7 +2,7 @@
 
 from flask import request, redirect, url_for, abort, \
     render_template, flash, json, session, Response, Markup, \
-    stream_with_context, current_app, Blueprint
+    current_app, Blueprint
 from flask.ext.babel import gettext as _, get_locale
 from flask.ext.mail import Message
 import itsdangerous
@@ -16,7 +16,7 @@ locale.setlocale(locale.LC_ALL, '')
 from time import time
 import os.path
 
-from . import forms
+from . import forms, utils
 from .constants import STEPS, STEPS_LABELS, LOCALES_FLAGS
 from . import db, cache, mail
 from .models import ISP, ISPWhoosh, CoveredArea, RegisteredOffice
@@ -254,7 +254,7 @@ def json_url_validator():
         session['form_json']['validator'] = time()
 
     validator = WebValidator(session._get_current_object(), 'form_json')
-    return Response(stream_with_context(
+    return Response(utils.stream_with_ctx_and_exc(
         validator(session['form_json']['url'])
     ), mimetype="text/event-stream")
 
@@ -317,7 +317,7 @@ def reactivate_validator():
         session['form_reactivate']['validator'] = time()
 
     validator = PrettyValidator(session._get_current_object(), 'form_reactivate')
-    return Response(stream_with_context(
+    return Response(utils.stream_with_ctx_and_exc(
         validator(p.json_url, p.cache_info or {})
     ), mimetype="text/event-stream")