import_payments_from_csv.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. # Standard python libs
  4. import csv
  5. import datetime
  6. import re
  7. import os
  8. import json
  9. import logging
  10. # Django specific imports
  11. from argparse import RawTextHelpFormatter
  12. from django.core.management.base import BaseCommand, CommandError
  13. # Coin specific imports
  14. from coin.members.models import Member
  15. from coin.billing.models import Payment
  16. # Parser / import / matcher configuration
  17. # The CSV delimiter
  18. DELIMITER=str(';')
  19. # The date format in the CSV
  20. DATE_FORMAT="%d/%m/%Y"
  21. # The default regex used to match the label of a payment with a member ID
  22. ID_REGEX=r"(?i)(\b|_)ID[\s\-\_\/]*(\d+)(\b|_)"
  23. class Command(BaseCommand):
  24. help = """
  25. Import payments from a CSV file from a bank. The payments will automatically be
  26. parsed, and there'll be an attempt to automatically match payments with members.
  27. The matching is performed using the label of the payment.
  28. - First, try to find a string such as 'ID-42' where 42 is the member's ID
  29. - Second (if no ID found), try to find a member username (with no ambiguity with
  30. respect to other usernames)
  31. - Third (if no username found), try to find a member family name (with no
  32. ambiguity with respect to other family name)
  33. This script will check if a payment has already been registered with same
  34. properies (date, label, price) to avoid creating duplicate payments inside coin.
  35. By default, only a dry-run is perfomed to let you see what will happen ! You
  36. should run this command with --commit if you agree with the dry-run."""
  37. def create_parser(self, *args, **kwargs):
  38. parser = super(Command, self).create_parser(*args, **kwargs)
  39. parser.formatter_class = RawTextHelpFormatter
  40. return parser
  41. def add_arguments(self, parser):
  42. parser.add_argument(
  43. 'filename',
  44. type=str,
  45. help="The CSV filename to be parsed"
  46. )
  47. parser.add_argument(
  48. '--commit',
  49. action='store_true',
  50. dest='commit',
  51. default=False,
  52. help='Agree with the proposed change and commit them'
  53. )
  54. def handle(self, *args, **options):
  55. assert options["filename"] != ""
  56. if not os.path.isfile(options["filename"]):
  57. raise CommandError("This file does not exists.")
  58. payments = self.convertCSVToDicts(self.cleanCSV(self.loadCSV(options["filename"])))
  59. # Dummy payments for test
  60. #payments = []
  61. #payments.append({ "date": "2017-03-02", "label":"foo ID 43 zob", "amount":30.0})
  62. #payments.append({ "date": "2017-03-14", "label":"foo JohnDoe zob", "amount":30.0})
  63. #payments.append({ "date": "2017-04-03", "label":"foo John Doe zob", "amount":30.0})
  64. payments = self.tryToMatchPaymentWithMembers(payments)
  65. newPayments = self.filterAlreadyKnownPayments(payments)
  66. numberOfAlreadyKnownPayments = len(payments)-len(newPayments)
  67. numberOfNewPayments = len(newPayments)
  68. if (numberOfNewPayments > 0) :
  69. print "======================================================"
  70. print " > New payments found"
  71. print json.dumps(newPayments, indent=4, separators=(',', ': '))
  72. print "======================================================"
  73. print "Number of already known payments found : " + str(numberOfAlreadyKnownPayments)
  74. print "Number of new payments found : " + str(numberOfNewPayments)
  75. print "Number of new payments matched : " + str(len([p for p in newPayments if p["memberMatched"]]))
  76. print "Number of payments not matched : " + str(len([p for p in newPayments if not p["memberMatched"]]))
  77. print "======================================================"
  78. if numberOfNewPayments == 0:
  79. print "Nothing to do, everything looks up to date !"
  80. return
  81. if not options["commit"]:
  82. print "Please carefully review the matches, then if everything \n" \
  83. "looks alright, use --commit to register these new payments."
  84. else:
  85. self.addNewPayments(newPayments)
  86. def isDate(self, text):
  87. try:
  88. datetime.datetime.strptime(text, DATE_FORMAT)
  89. return True
  90. except ValueError:
  91. return False
  92. def isMoneyAmount(self, text):
  93. try:
  94. float(text.replace(",","."))
  95. return True
  96. except ValueError:
  97. return False
  98. def loadCSV(self, filename):
  99. with open(filename, "r") as f:
  100. return list(csv.reader(f, delimiter=DELIMITER))
  101. def cleanCSV(self, data):
  102. output = []
  103. for i, row in enumerate(data):
  104. for j in range(len(row)):
  105. row[j] = row[j].decode('utf-8')
  106. if len(row) < 4:
  107. continue
  108. if not self.isDate(row[0]):
  109. logging.warning("Ignoring the following row (bad format for date in the first column) :")
  110. logging.warning(str(row))
  111. continue
  112. if self.isMoneyAmount(row[2]):
  113. logging.warning("Ignoring row %s (not a payment)" % str(i))
  114. logging.warning(str(row))
  115. continue
  116. if not self.isMoneyAmount(row[3]):
  117. logging.warning("Ignoring the following row (bad format for money amount in colun three) :")
  118. logging.warning(str(row))
  119. continue
  120. # Clean the date
  121. row[0] = datetime.datetime.strptime(row[0], DATE_FORMAT).strftime("%Y-%m-%d")
  122. # Clean the label ...
  123. row[4] = row[4].replace('\r', ' ')
  124. row[4] = row[4].replace('\n', ' ')
  125. output.append(row)
  126. return output
  127. def convertCSVToDicts(self, data):
  128. output = []
  129. for row in data:
  130. payment = {}
  131. payment["date"] = row[0]
  132. payment["label"] = row[4]
  133. payment["amount"] = float(row[3].replace(",","."))
  134. output.append(payment)
  135. return output
  136. def tryToMatchPaymentWithMembers(self, payments):
  137. members = Member.objects.filter(status="member")
  138. idregex = re.compile(ID_REGEX)
  139. for payment in payments:
  140. paymentLabel = payment["label"]
  141. # First, attempt to match the member ID
  142. idmatches = idregex.findall(paymentLabel)
  143. if len(idmatches) == 1:
  144. i = int(idmatches[0][1])
  145. memberMatches = [ member.username for member in members if member.pk==i ]
  146. if len(memberMatches) == 1:
  147. payment["memberMatched"] = memberMatches[0]
  148. #print "Matched by ID to "+memberMatches[0]
  149. continue
  150. # Second, attempt to find the username
  151. usernamematch = None
  152. for member in members:
  153. matches = re.compile(r"(?i)(\b|_)"+re.escape(member.username)+r"(\b|_)") \
  154. .findall(paymentLabel)
  155. # If not found, try next
  156. if len(matches) == 0:
  157. continue
  158. # If we already had a match, abort the whole search because we
  159. # have multiple usernames matched !
  160. if usernamematch != None:
  161. usernamematch = None
  162. break
  163. usernamematch = member.username
  164. if usernamematch != None:
  165. payment["memberMatched"] = usernamematch
  166. #print "Matched by username to "+usernamematch
  167. continue
  168. # Third, attempt to match by family name
  169. familynamematch = None
  170. for member in members:
  171. matches = re.compile(r"(?i)(\b|_)"+re.escape(str(member.last_name))+r"(\b|_)") \
  172. .findall(paymentLabel)
  173. # If not found, try next
  174. if len(matches) == 0:
  175. continue
  176. # If this familyname was matched several time, abort the whole search
  177. if len(matches) > 1:
  178. familynamematch = None
  179. break
  180. # If we already had a match, abort the whole search because we
  181. # have multiple familynames matched !
  182. if familynamematch != None:
  183. familynamematch = None
  184. break
  185. familynamematch = str(member.last_name)
  186. if familynamematch != None:
  187. payment["memberMatched"] = familynamematch
  188. #print "Matched by familyname to "+familynamematch
  189. continue
  190. #print "Could not match"
  191. payment["memberMatched"] = None
  192. return payments
  193. def filterAlreadyKnownPayments(self, payments):
  194. newPayments = []
  195. knownPayments = Payment.objects.all()
  196. for payment in payments:
  197. foundMatch = False
  198. for knownPayment in knownPayments:
  199. if (str(knownPayment.date) == payment["date"].encode('utf-8')) \
  200. and (knownPayment.label == payment["label"]) \
  201. and (float(knownPayment.amount) == float(payment["amount"])):
  202. foundMatch = True
  203. break
  204. if not foundMatch:
  205. newPayments.append(payment)
  206. return newPayments
  207. def addNewPayments(self, newPayments):
  208. for newPayment in newPayments:
  209. # Get the member if there's a member matched
  210. member = None
  211. if newPayment["memberMatched"]:
  212. member = Member.objects.filter(username=newPayment["memberMatched"])
  213. assert len(member) == 1
  214. member = member[0]
  215. print "Adding new payment : "
  216. print newPayment
  217. # Create the payment
  218. payment = Payment.objects.create(amount=float(newPayment["amount"]),
  219. label=newPayment["label"].encode('utf-8'),
  220. date=newPayment["date"].encode('utf-8'),
  221. member=member)