123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- import datetime
- from email._parseaddr import _daynames, _monthnames, _timezones
- def _parsedate_tz(data):
- """Convert date to extended time tuple.
- The last (additional) element is the time zone offset in seconds, except if
- the timezone was specified as -0000. In that case the last element is
- None. This indicates a UTC timestamp that explicitly declaims knowledge of
- the source timezone, as opposed to a +0000 timestamp that indicates the
- source timezone really was UTC.
- """
- if not data:
- return
- data = data.split()
- # The FWS after the comma after the day-of-week is optional, so search and
- # adjust for this.
- if data[0].endswith(',') or data[0].lower() in _daynames:
- # There's a dayname here. Skip it
- del data[0]
- else:
- i = data[0].rfind(',')
- if i >= 0:
- data[0] = data[0][i+1:]
- if len(data) == 3: # RFC 850 date, deprecated
- stuff = data[0].split('-')
- if len(stuff) == 3:
- data = stuff + data[1:]
- if len(data) == 4:
- s = data[3]
- i = s.find('+')
- if i == -1:
- i = s.find('-')
- if i > 0:
- data[3:] = [s[:i], s[i:]]
- else:
- data.append('') # Dummy tz
- if len(data) < 5:
- return None
- data = data[:5]
- [dd, mm, yy, tm, tz] = data
- mm = mm.lower()
- if mm not in _monthnames:
- dd, mm = mm, dd.lower()
- if mm not in _monthnames:
- return None
- mm = _monthnames.index(mm) + 1
- if mm > 12:
- mm -= 12
- if dd[-1] == ',':
- dd = dd[:-1]
- i = yy.find(':')
- if i > 0:
- yy, tm = tm, yy
- if yy[-1] == ',':
- yy = yy[:-1]
- if not yy[0].isdigit():
- yy, tz = tz, yy
- if tm[-1] == ',':
- tm = tm[:-1]
- tm = tm.split(':')
- if len(tm) == 2:
- [thh, tmm] = tm
- tss = '0'
- elif len(tm) == 3:
- [thh, tmm, tss] = tm
- elif len(tm) == 1 and '.' in tm[0]:
- # Some non-compliant MUAs use '.' to separate time elements.
- tm = tm[0].split('.')
- if len(tm) == 2:
- [thh, tmm] = tm
- tss = 0
- elif len(tm) == 3:
- [thh, tmm, tss] = tm
- else:
- return None
- try:
- yy = int(yy)
- dd = int(dd)
- thh = int(thh)
- tmm = int(tmm)
- tss = int(tss)
- except ValueError:
- return None
- # Check for a yy specified in two-digit format, then convert it to the
- # appropriate four-digit format, according to the POSIX standard. RFC 822
- # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
- # mandates a 4-digit yy. For more information, see the documentation for
- # the time module.
- if yy < 100:
- # The year is between 1969 and 1999 (inclusive).
- if yy > 68:
- yy += 1900
- # The year is between 2000 and 2068 (inclusive).
- else:
- yy += 2000
- tzoffset = None
- tz = tz.upper()
- if tz in _timezones:
- tzoffset = _timezones[tz]
- else:
- try:
- tzoffset = int(tz)
- except ValueError:
- pass
- if tzoffset==0 and tz.startswith('-'):
- tzoffset = None
- # Convert a timezone offset into seconds ; -0500 -> -18000
- if tzoffset:
- if tzoffset < 0:
- tzsign = -1
- tzoffset = -tzoffset
- else:
- tzsign = 1
- tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
- # Daylight Saving Time flag is set to -1, since DST is unknown.
- return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
- def parsedate_to_datetime(data):
- dtuple, tz = _parsedate_tz(data)
- if tz is None:
- return datetime.datetime(*dtuple[:6])
- return datetime.datetime(
- *dtuple[:6],
- tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|