Parcourir la source

Add a script intended to be launched as a cron task every X minutes

Gu1 il y a 11 ans
Parent
commit
ced962956a
3 fichiers modifiés avec 116 ajouts et 7 suppressions
  1. 9 4
      ffdnispdb/crawler.py
  2. 104 0
      ffdnispdb/cron_task.py
  3. 3 3
      ffdnispdb/models.py

+ 9 - 4
ffdnispdb/crawler.py

@@ -20,7 +20,11 @@ class Crawler(object):
 
     MAX_JSON_SIZE=1*1024*1024
 
-    escape=staticmethod(lambda x:x)
+    escape=staticmethod(lambda x: unicode(str(x), 'utf8'))
+
+    def __init__(self):
+        self.success=False
+        self.jdict={}
 
     def m(self, msg, evt=None):
         if not evt:
@@ -84,8 +88,8 @@ class Crawler(object):
             yield self.err('Too many redirects')
         except requests.exceptions.RequestException as e:
             yield self.err('Internal request exception')
-        except Exception as e:
-            yield self.err('Unexpected request exception')
+#        except Exception as e:
+#            yield self.err('Unexpected request exception')
 
         if r is None:
             yield self.abort('Connection could not be established, aborting')
@@ -189,6 +193,7 @@ class Crawler(object):
                self.m(json.dumps({'passed': 1}), 'control'))
 
         self.jdict=jdict
+        self.success=True
         self.done_cb()
 
 
@@ -198,7 +203,7 @@ class PrettyValidator(Crawler):
     def __init__(self, session=None, *args, **kwargs):
         super(PrettyValidator, self).__init__(*args, **kwargs)
         self.session=session
-        self.escape=escape
+        self.escape=lambda x: escape(unicode(str(x), 'utf8'))
 
     def m(self, msg, evt=None):
         return u'%sdata: %s\n\n'%(u'event: %s\n'%evt if evt else '', msg)

+ 104 - 0
ffdnispdb/cron_task.py

@@ -0,0 +1,104 @@
+#!/usr/bin/env python2
+
+
+import signal
+import traceback
+from sys import stderr
+from datetime import datetime, timedelta
+from ffdnispdb.crawler import TextValidator
+from ffdnispdb.models import ISP
+from ffdnispdb import db
+
+
+MAX_RUNTIME=15*60
+
+class Timeout(Exception):
+    pass
+
+class ScriptTimeout(Exception):
+    """
+    Script exceeded its allowed run time
+    """
+
+
+strike=1
+last_isp=-1
+script_begin=datetime.now()
+def timeout_handler(signum, frame):
+    global last_isp, strike
+    if script_begin < datetime.now()-timedelta(seconds=MAX_RUNTIME):
+        raise ScriptTimeout
+
+    if last_isp == isp.id:
+        strike += 1
+        if strike > 2:
+            # three strikes, you're out.
+            print "you're out", isp
+            signal.alarm(6)
+            raise Timeout
+    else:
+        last_isp = isp.id
+        strike = 1
+
+    signal.alarm(6)
+
+signal.signal(signal.SIGALRM, timeout_handler)
+signal.alarm(6)
+
+
+try:
+    for isp in ISP.query.filter(ISP.is_disabled == False,
+                                ISP.json_url != None,
+                                ISP.next_update < datetime.now(),
+                                ISP.update_error_strike < 3)\
+                        .order_by(ISP.last_update_success):
+        try:
+            print u'%s: Attempting to update %s'%(datetime.now(), isp)
+            print u'    last successful update=%s'%(isp.last_update_success)
+            print u'    last update attempt=%s'%(isp.last_update_attempt)
+            print u'    next update was scheduled %s ago'%(datetime.now()-isp.next_update)
+            print u'    strike=%d'%(isp.update_error_strike)
+
+            isp.last_update_attempt=datetime.now()
+            db.session.add(isp)
+            db.session.commit()
+
+            validator=TextValidator()
+            log=''.join(validator(isp.json_url+'ab'))
+            if not validator.success: # handle error
+                isp.update_error_strike += 1
+                #isp.next_update = bla
+                db.session.add(isp)
+                db.session.commit()
+                print u'%s: Error while updating:'%(datetime.now())
+                if isp.update_error_strike >= 3:
+                    # send email
+                    print u'    three strikes, you\'re out'
+
+                print log.rstrip()+'\n'
+                continue
+
+            isp.json = validator.jdict
+            isp.last_update_success = isp.last_update_attempt
+            isp.update_error_strike = 0
+            #isp.next_update = bla
+            db.session.add(isp)
+            db.session.commit()
+
+            print u'%s: Update successful !'%(datetime.now())
+            print u'    next update is scheduled for %s\n'%(isp.next_update)
+        except Timeout:
+            print u'%s: Timeout while updating:'%(datetime.now())
+            isp=ISP.query.get(isp.id)
+            isp.update_error_strike += 1
+            db.session.add(isp)
+            db.session.commit()
+            if isp.update_error_strike >= 3:
+                # send email
+                print u'    three strikes, you\'re out'
+            print traceback.format_exc()
+
+except ScriptTimeout:
+    pass
+except Timeout:
+    pass

+ 3 - 3
ffdnispdb/models.py

@@ -52,9 +52,9 @@ class ISP(db.Model):
     json_url = db.Column(db.String)
     last_update_success = db.Column(db.DateTime)
     last_update_attempt = db.Column(db.DateTime)
-    is_updatable = db.Column(db.Boolean, default=True) # set to False to disable JSON-URL updates
+    update_error_strike = db.Column(db.Integer, default=0) # if >= 3; then updates are disabled
+    next_update = db.Column(db.DateTime, default=datetime.now())
     tech_email = db.Column(db.String)
-    cache_info = db.Column(db.Text)
     json = db.Column(MutableDict.as_mutable(JSONEncodedDict))
 
     def __init__(self, *args, **kwargs):
@@ -87,7 +87,7 @@ class ISP(db.Model):
         return d
 
     def __repr__(self):
-        return '<ISP %r>' % (self.shortname if self.shortname else self.name,)
+        return u'<ISP %r>' % (self.shortname if self.shortname else self.name,)
 
 
 def pre_save_hook(sess):