Browse Source

Gros bug corrigé sur la de détection des doublons légitimes.

On se retrouvait avec des lignes doublées dans les CSV mensuels générés.
pitchum 7 years ago
parent
commit
46d68acf72
1 changed files with 4 additions and 4 deletions
  1. 4 4
      ccoop-resplit

+ 4 - 4
ccoop-resplit

@@ -59,13 +59,13 @@ class CsvStatementParser(object):
             ophash = datetime.strftime(opdate, '%Y-%m-%d') + hashlib.md5(json.dumps(row).encode()).hexdigest()
             # Special use case: one file contains multiple identical lines.
             # Then we append a counter to the duplicate ophash.
-            if ophash in self.lines:
-                print("*** Duplicate line found in {}: {}".format(filename, ';'.join(row.values())))
-                if ophash not in self.dups:
-                    self.dups[ophash] = 1
+            if ophash in self.dups:
+#                print("    *** doublon trouvé dans '{}': {}".format(filename, ';'.join(row.values())))
                 self.dups[ophash] = self.dups[ophash] + 1
                 ophash = ophash + "-" + str(self.dups[ophash])
                 # print("   We have now :\n  {}\n  {}".format("\n  ".join([h + "   // " + "".join(v.values()) for h,v in self.lines.items() if h.startswith(ophash[0:10])]), ophash + " // " + "".join(row.values()))) # XXX DEBUG
+            else:
+                self.dups[ophash] = 0
             self.lines[ophash] = {k:v for k,v in row.items() if k != ''}
             # Adjust dateranges
             if opdate < self.daterange[0]: