Browse Source

Draft of import function with payment-member matching

Alexandre Aubin 8 years ago
parent
commit
0fe3467826
1 changed files with 242 additions and 0 deletions
  1. 242 0
      coin/billing/management/commands/import_payments_from_csv.py

+ 242 - 0
coin/billing/management/commands/import_payments_from_csv.py

@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+# Standard python libs
+import csv
+import datetime
+import re
+import os
+import json
+
+import logging
+
+# Django specific imports
+from argparse import RawTextHelpFormatter
+from django.core.management.base import BaseCommand, CommandError
+
+# Coin specific imports
+from coin.members.models import Member
+from coin.billing.models import Invoice, Payment
+
+# Parser / import / matcher configuration
+
+# The CSV delimiter
+DELIMITER=str(';')
+# The date format in the CSV
+DATE_FORMAT="%d/%m/%Y"
+# The default regex used to match the label of a payment with a member ID
+ID_REGEX=r"(?i)\bID[\s\-\_\/]*(\d+)\b"
+
+
+
+class Command(BaseCommand):
+
+    help = """
+Import payments from a CSV file from a bank.  The payments will automatically be
+parsed, and there'll be an attempt to automatically match payments with members.
+
+The matching is performed using the label of the payment.
+- First, try to find a string such as 'ID-42' where 42 is the member's ID
+- Second (if no ID found), try to find a member username (with no ambiguity with
+  respect to other usernames)
+- Third (if no username found), try to find a member family name (with no
+  ambiguity with respect to other family name)
+
+This script will check if a payment has already been registered with same
+properies (date, label, price) to avoid creating duplicate payments inside coin.
+
+By default, only a dry-run is perfomed to let you see what will happen ! You
+should run this command with --commit if you agree with the dry-run."""
+              
+    def create_parser(self, *args, **kwargs):
+        parser = super(Command, self).create_parser(*args, **kwargs)
+        parser.formatter_class = RawTextHelpFormatter
+        return parser
+
+    def add_arguments(self, parser):
+
+        parser.add_argument(
+            'filename', 
+            type=str,
+            help="The CSV filename to be parsed"
+        )
+
+        parser.add_argument(
+            '--commit',
+            action='store_true',
+            dest='commit',
+            default=False,
+            help='Agree with the proposed change and commit them'
+        )
+
+
+    def handle(self, *args, **options):
+
+        assert options["filename"] != ""
+
+        if not os.path.isfile(options["filename"]):
+            raise CommandError("This file does not exists.")
+
+        payments = self.convertCSVToDicts(self.cleanCSV(self.loadCSV(options["filename"])))
+
+        members = Member.objects.filter(status="member")
+        
+        #members = [ { "id": 5, "username": "toto",    "familyname": "Michu"  },
+        #            { "id": 3, "username": "johndoe", "familyname": "Doe"    }  ]
+
+        payments.append({ "date:":"someDate", "label":"foo ID 43 zob", "amount":30.0})
+        payments.append({ "date:":"someDate", "label":"foo JohnDoe zob", "amount":30.0})
+        payments.append({ "date:":"someDate", "label":"foo John Doe zob", "amount":30.0})
+        
+        print json.dumps(payments, indent=4, separators=(',', ': '))
+        
+        maybeMatchedPayments = self.tryToMatchPaymentWithMembers(payments, members)
+      
+        print json.dumps(maybeMatchedPayments, indent=4, separators=(',', ': '))
+        print "Number of payments found       : " + str(len(maybeMatchedPayments))
+        print "Number of payments matched     : " + str(len([p for p in maybeMatchedPayments if     p["memberMatched"]]))
+        print "Number of payments not matched : " + str(len([p for p in maybeMatchedPayments if not p["memberMatched"]]))
+        return
+
+
+
+    def isDate(self, text):
+        try:
+            datetime.datetime.strptime(text, DATE_FORMAT)
+            return True
+        except ValueError:
+            return False
+
+
+    def isMoneyAmount(self, text):
+        try:
+            float(text.replace(",","."))
+            return True
+        except ValueError:
+            return False
+
+
+    def loadCSV(self, filename):
+        with open(filename, "r") as f:
+            return list(csv.reader(f, delimiter=DELIMITER))
+
+
+    def cleanCSV(self, data):
+
+        output = []
+
+        for i, row in enumerate(data):
+
+            if len(row) < 4:
+                #logging.warning("Ignoring the following row (bad number of elements) :")
+                #logging.warning(str(row))
+                continue
+            
+            if not self.isDate(row[0]):
+                logging.warning("Ignoring the following row (bad format for date in the first column) :")
+                logging.warning(str(row))
+                continue
+
+            if self.isMoneyAmount(row[2]):
+                #logging.warning("Ignoring the following row (not a payment from a member) :")
+                #logging.warning(str(row))
+                logging.warning("Ignoring row %s (not a payment from a member)" % str(i))
+                continue
+
+            if not self.isMoneyAmount(row[3]):
+                logging.warning("Ignoring the following row (bad format for money amount in third colum) :")
+                logging.warning(str(row))
+                continue
+
+            output.append(row)
+
+        return output
+
+
+    def convertCSVToDicts(self, data):
+
+        output = []
+
+        for row in data:
+            payment = {}
+
+            payment["date"] = row[0]
+            payment["label"] = row[1]
+            payment["amount"] = float(row[3].replace(",","."))
+
+            output.append(payment)
+
+        return output
+
+
+
+    def tryToMatchPaymentWithMembers(self, payments, members):
+
+        idregex = re.compile(ID_REGEX)
+
+        for payment in payments:
+            
+            paymentLabel = str(payment["label"])
+
+            # First, attempt to match the member ID
+            idmatches = idregex.findall(paymentLabel)
+            if len(idmatches) == 1:
+                i = int(idmatches[0])
+                memberMatches = [ member.username for member in members if member.pk==i ]
+                if len(memberMatches) == 1:
+                    payment["memberMatched"] = memberMatches[0]
+                    #print "Matched by ID to "+memberMatches[0]
+                    continue
+
+
+            # Second, attempt to find the username
+            usernamematch = None
+            for member in members:
+                matches = re.compile(r"(?i)\b"+re.escape(member.username)+r"\b") \
+                            .findall(paymentLabel)
+                # If not found, try next
+                if len(matches) == 0:
+                    continue
+                # If we already had a match, abort the whole search because we
+                # have multiple usernames matched !
+                if usernamematch != None:
+                    usernamematch = None
+                    break
+
+                usernamematch = member.username
+
+            if usernamematch != None:
+                payment["memberMatched"] = usernamematch
+                #print "Matched by username to "+usernamematch
+                continue 
+
+
+            # Third, attempt to match by family name
+            familynamematch = None
+            for member in members:
+                matches = re.compile(r"(?i)\b"+re.escape(str(member.last_name))+r"\b") \
+                            .findall(paymentLabel)
+                # If not found, try next
+                if len(matches) == 0:
+                    continue
+                # If this familyname was matched several time, abort the whole search
+                if len(matches) > 1:
+                    familynamematch = None
+                    break
+                # If we already had a match, abort the whole search because we
+                # have multiple familynames matched !
+                if familynamematch != None:
+                    familynamematch = None
+                    break
+
+                familynamematch = str(member.last_name)
+
+            if familynamematch != None:
+                payment["memberMatched"] = familynamematch
+                #print "Matched by familyname to "+familynamematch
+                continue 
+
+            #print "Could not match"
+            payment["memberMatched"] = None
+
+        return payments