from flask import escape, json
import requests
import io
from ispformat.validator import validate_isp
from .models import ISP
class Crawler(object):
MAX_JSON_SIZE=1*1024*1024
format_validation_errors=unicode
escape=lambda x:x
def m(self, msg, evt=None):
return u'%sdata: %s\n\n'%(u'event: %s\n'%evt if evt else '', msg)
def err(self, msg, *args):
return self.m(u'! %s'%msg, *args)
def warn(self, msg):
return self.m(u'@ %s'%msg)
def info(self, msg):
return self.m(u'\u2013 %s'%msg)
def abort(self, msg):
return (self.m('
== %s'%msg)+
self.m(json.dumps({'closed': 1}), 'control'))
def done_cb(self):
pass
def __call__(self, url):
esc=self.escape
yield self.m('Starting the validation process...')
r=None
try:
yield self.m('* Attempting to retreive %s'%url)
r=requests.get(url, verify='/etc/ssl/certs/ca-certificates.crt',
headers={'User-Agent': 'FFDN DB validator'},
stream=True, timeout=10)
except requests.exceptions.SSLError as e:
yield self.err('Unable to connect, SSL Error: %s
'%esc(e))
except requests.exceptions.ConnectionError as e:
yield self.err('Unable to connect: %s
'%e)
except requests.exceptions.Timeout as e:
yield self.err('Connection timeout')
except requests.exceptions.TooManyRedirects as e:
yield self.err('Too many redirects')
except requests.exceptions.RequestException as e:
yield self.err('Internal request exception')
except Exception as e:
yield self.err('Unexpected request exception')
if r is None:
yield self.abort('Connection could not be established, aborting')
return
yield self.info('Connection established')
yield self.info('Response code: %s %s'%(esc(r.status_code), esc(r.reason)))
try:
r.raise_for_status()
except requests.exceptions.HTTPError as e:
yield cls.err('Response code indicates an error')
yield cls.abort('Invalid response code')
return
yield self.info('Content type: %s'%(esc(r.headers.get('content-type', 'not defined'))))
if not r.headers.get('content-type'):
yield self.error('Content-type MUST be defined')
yield self.abort('The file must have a proper content-type to continue')
elif r.headers.get('content-type').lower() != 'application/json':
yield self.warn('Content-type SHOULD be application/json')
if not r.encoding:
yield self.warn('Encoding not set. Assuming it\'s unicode, as per RFC4627 section 3')
yield self.info('Content length: %s'%(esc(r.headers.get('content-length', 'not set'))))
cl=r.headers.get('content-length')
if not cl:
yield self.warn('No content-length. Note that we will not process a file whose size exceed 1MiB')
elif int(cl) > self.MAX_JSON_SIZE:
yield self.abort('File too big ! File size must be less then 1MiB')
yield self.info('Reading response into memory...')
b=io.BytesIO()
for d in r.iter_content(requests.models.CONTENT_CHUNK_SIZE):
b.write(d)
if b.tell() > self.MAX_JSON_SIZE:
yield self.abort('File too big ! File size must be less then 1MiB')
return
r._content=b.getvalue()
del b
yield self.info('Successfully read %d bytes'%len(r.content))
yield self.m('
* Parsing the JSON file')
if not r.encoding:
charset=requests.utils.guess_json_utf(r.content)
if not charset:
yield self.err('Unable to guess unicode charset')
yield self.abort('The file MUST be unicode-encoded when no explicit charset is in the content-type')
return
yield self.info('Guessed charset: %s'%charset)
try:
txt=r.content.decode(r.encoding or charset)
yield self.info('Successfully decoded file as %s'%esc(r.encoding or charset))
except LookupError as e:
yield self.err('Invalid/unknown charset: %s'%esc(e))
yield self.abort('Charset error, Cannot continue')
return
except UnicodeDecodeError as e:
yield self.err('Unicode decode error: %s'%e)
yield self.abort('Charset error, cannot continue')
return
except Exception:
yield self.abort('Unexpected charset error')
return
jdict=None
try:
jdict=json.loads(txt)
except ValueError as e:
yield self.err('Error while parsing JSON: %s'%esc(e))
except Exception as e:
yield self.err('Unexpected error while parsing JSON: %s'%esc(e))
if not jdict:
yield self.abort('Could not parse JSON')
return
yield self.info('JSON parsed successfully')
yield self.m('
* Validating the JSON against the schema')
v=list(validate_isp(jdict))
if v:
yield self.err('Validation errors:
%s'%esc(self.format_validation_errors(v)))
yield self.abort('Your JSON file does not follow the schema, please fix it')
return
else:
yield self.info('Done. No errors encountered \o')
# check name uniqueness
where = (ISP.name == jdict['name'])
if 'shortname' in jdict and jdict['shortname']:
where |= (ISP.shortname == jdict.get('shortname'))
if ISP.query.filter(where).count() > 0:
yield self.err('An ISP named "%s" already exist'%esc(
jdict['name']+(' ('+jdict['shortname']+')' if jdict.get('shortname') else '')
))
yield self.abort('The name of your ISP must be unique')
return
yield (self.m('
== All good ! You can click on Confirm now')+
self.m(json.dumps({'passed': 1}), 'control'))
self.jdict=jdict
self.done_cb()
class PrettyValidator(Crawler):
def __init__(self, session=None, *args, **kwargs):
super(PrettyValidator, self).__init__(*args, **kwargs)
self.session=session
self.escape=escape
def err(self, msg, *args):
return self.m(u'! %s'%msg, *args)
def warn(self, msg):
return self.m(u'@ %s'%msg)
def info(self, msg):
return self.m(u'– %s'%msg)
def abort(self, msg):
return (self.m(u'
== %s'%msg)+
self.m(json.dumps({'closed': 1}), 'control'))
def format_validation_errors(self, errs):
r=[]
for e in errs:
r.append(u' %s: %s'%('.'.join(list(e.schema_path)[1:]), str(e)))
return '\n'.join(r)
def done_cb(self):
self.session['form_json']['validated']=True
self.session['form_json']['jdict']=self.jdict
self.session.save()