|
@@ -23,6 +23,7 @@ class CsvStatementParser(object):
|
|
self.first_ops = {}
|
|
self.first_ops = {}
|
|
self.last_ops = {}
|
|
self.last_ops = {}
|
|
self.daterange = [datetime.now(), datetime.fromordinal(1)]
|
|
self.daterange = [datetime.now(), datetime.fromordinal(1)]
|
|
|
|
+ self.dups = dict() # holds counters for duplicate lines
|
|
|
|
|
|
|
|
|
|
def parse(self, filename):
|
|
def parse(self, filename):
|
|
@@ -51,10 +52,20 @@ class CsvStatementParser(object):
|
|
|
|
|
|
|
|
|
|
def _parse_file(self, filename, reader):
|
|
def _parse_file(self, filename, reader):
|
|
|
|
+ self.dups = dict() # Duplicate counters must be reset for each file
|
|
print("Lecture du fichier %s" % os.path.basename(filename))
|
|
print("Lecture du fichier %s" % os.path.basename(filename))
|
|
for row in reader:
|
|
for row in reader:
|
|
opdate = datetime.strptime(row[self.date_fieldname], '%d/%m/%Y')
|
|
opdate = datetime.strptime(row[self.date_fieldname], '%d/%m/%Y')
|
|
ophash = datetime.strftime(opdate, '%Y-%m-%d') + hashlib.md5(json.dumps(row).encode()).hexdigest()
|
|
ophash = datetime.strftime(opdate, '%Y-%m-%d') + hashlib.md5(json.dumps(row).encode()).hexdigest()
|
|
|
|
+ # Special use case: one file contains multiple identical lines.
|
|
|
|
+ # Then we append a counter to the duplicate ophash.
|
|
|
|
+ if ophash in self.lines:
|
|
|
|
+ print("*** Duplicate line found in {}: {}".format(filename, ';'.join(row.values())))
|
|
|
|
+ if ophash not in self.dups:
|
|
|
|
+ self.dups[ophash] = 1
|
|
|
|
+ self.dups[ophash] = self.dups[ophash] + 1
|
|
|
|
+ ophash = ophash + "-" + str(self.dups[ophash])
|
|
|
|
+ # print(" We have now :\n {}\n {}".format("\n ".join([h + " // " + "".join(v.values()) for h,v in self.lines.items() if h.startswith(ophash[0:10])]), ophash + " // " + "".join(row.values()))) # XXX DEBUG
|
|
self.lines[ophash] = {k:v for k,v in row.items() if k != ''}
|
|
self.lines[ophash] = {k:v for k,v in row.items() if k != ''}
|
|
# Adjust dateranges
|
|
# Adjust dateranges
|
|
if opdate < self.daterange[0]:
|
|
if opdate < self.daterange[0]:
|