import_payments_from_csv.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. # Standard python libs
  4. import csv
  5. import datetime
  6. import re
  7. import os
  8. import json
  9. import logging
  10. # Django specific imports
  11. from argparse import RawTextHelpFormatter
  12. from django.core.management.base import BaseCommand, CommandError
  13. # Coin specific imports
  14. from coin.members.models import Member
  15. from coin.billing.models import Payment
  16. # Parser / import / matcher configuration
  17. # The CSV delimiter
  18. DELIMITER=str(';')
  19. # The date format in the CSV
  20. DATE_FORMAT="%d/%m/%Y"
  21. # The default regex used to match the label of a payment with a member ID
  22. ID_REGEX=r"(?i)(\b|_)ID[\s\-\_\/]*(\d+)(\b|_)"
  23. # If the label of the payment contains one of these, the payment won't be
  24. # matched to a member when importing it.
  25. KEYWORDS_TO_NOTMATCH=[ "DON", "MECENAT", "REM CHQ" ]
  26. class Command(BaseCommand):
  27. help = """
  28. Import payments from a CSV file from a bank. The payments will automatically be
  29. parsed, and there'll be an attempt to automatically match payments with members.
  30. The matching is performed using the label of the payment.
  31. - First, try to find a string such as 'ID-42' where 42 is the member's ID
  32. - Second (if no ID found), try to find a member username (with no ambiguity with
  33. respect to other usernames)
  34. - Third (if no username found), try to find a member family name (with no
  35. ambiguity with respect to other family name)
  36. This script will check if a payment has already been registered with same
  37. properies (date, label, price) to avoid creating duplicate payments inside coin.
  38. By default, only a dry-run is perfomed to let you see what will happen ! You
  39. should run this command with --commit if you agree with the dry-run."""
  40. def create_parser(self, *args, **kwargs):
  41. parser = super(Command, self).create_parser(*args, **kwargs)
  42. parser.formatter_class = RawTextHelpFormatter
  43. return parser
  44. def add_arguments(self, parser):
  45. parser.add_argument(
  46. 'filename',
  47. type=str,
  48. help="The CSV filename to be parsed"
  49. )
  50. parser.add_argument(
  51. '--commit',
  52. action='store_true',
  53. dest='commit',
  54. default=False,
  55. help='Agree with the proposed change and commit them'
  56. )
  57. def handle(self, *args, **options):
  58. assert options["filename"] != ""
  59. if not os.path.isfile(options["filename"]):
  60. raise CommandError("This file does not exists.")
  61. payments = self.convertCSVToDicts(self.cleanCSV(self.loadCSV(options["filename"])))
  62. payments = self.tryToMatchPaymentWithMembers(payments)
  63. newPayments = self.filterAlreadyKnownPayments(payments)
  64. newPayments = self.unmatchPaymentWithKeywords(newPayments)
  65. numberOfAlreadyKnownPayments = len(payments)-len(newPayments)
  66. numberOfNewPayments = len(newPayments)
  67. if (numberOfNewPayments > 0) :
  68. print "======================================================"
  69. print " > New payments found"
  70. print json.dumps(newPayments, indent=4, separators=(',', ': '))
  71. print "======================================================"
  72. print "Number of already known payments found : " + str(numberOfAlreadyKnownPayments)
  73. print "Number of new payments found : " + str(numberOfNewPayments)
  74. print "Number of new payments matched : " + str(len([p for p in newPayments if p["memberMatched"]]))
  75. print "Number of payments not matched : " + str(len([p for p in newPayments if not p["memberMatched"]]))
  76. print "======================================================"
  77. if numberOfNewPayments == 0:
  78. print "Nothing to do, everything looks up to date !"
  79. return
  80. if not options["commit"]:
  81. print "Please carefully review the matches, then if everything \n" \
  82. "looks alright, use --commit to register these new payments."
  83. else:
  84. self.addNewPayments(newPayments)
  85. def isDate(self, text):
  86. try:
  87. datetime.datetime.strptime(text, DATE_FORMAT)
  88. return True
  89. except ValueError:
  90. return False
  91. def isMoneyAmount(self, text):
  92. try:
  93. float(text.replace(",","."))
  94. return True
  95. except ValueError:
  96. return False
  97. def loadCSV(self, filename):
  98. with open(filename, "r") as f:
  99. return list(csv.reader(f, delimiter=DELIMITER))
  100. def cleanCSV(self, data):
  101. output = []
  102. for i, row in enumerate(data):
  103. for j in range(len(row)):
  104. row[j] = row[j].decode('utf-8')
  105. if len(row) < 4:
  106. continue
  107. if not self.isDate(row[0]):
  108. logging.warning("Ignoring the following row (bad format for date in the first column) :")
  109. logging.warning(str(row))
  110. continue
  111. if self.isMoneyAmount(row[2]):
  112. logging.warning("Ignoring row %s (not a payment)" % str(i))
  113. logging.warning(str(row))
  114. continue
  115. if not self.isMoneyAmount(row[3]):
  116. logging.warning("Ignoring the following row (bad format for money amount in colun three) :")
  117. logging.warning(str(row))
  118. continue
  119. # Clean the date
  120. row[0] = datetime.datetime.strptime(row[0], DATE_FORMAT).strftime("%Y-%m-%d")
  121. # Clean the label ...
  122. row[4] = row[4].replace('\r', ' ')
  123. row[4] = row[4].replace('\n', ' ')
  124. output.append(row)
  125. return output
  126. def convertCSVToDicts(self, data):
  127. output = []
  128. for row in data:
  129. payment = {}
  130. payment["date"] = row[0]
  131. payment["label"] = row[4]
  132. payment["amount"] = float(row[3].replace(",","."))
  133. output.append(payment)
  134. return output
  135. def tryToMatchPaymentWithMembers(self, payments):
  136. members = Member.objects.filter(status="member")
  137. idregex = re.compile(ID_REGEX)
  138. for payment in payments:
  139. paymentLabel = payment["label"]
  140. # First, attempt to match the member ID
  141. idmatches = idregex.findall(paymentLabel)
  142. if len(idmatches) == 1:
  143. i = int(idmatches[0][1])
  144. memberMatches = [ member.username for member in members if member.pk==i ]
  145. if len(memberMatches) == 1:
  146. payment["memberMatched"] = memberMatches[0]
  147. #print "Matched by ID to "+memberMatches[0]
  148. continue
  149. # Second, attempt to find the username
  150. usernamematch = None
  151. for member in members:
  152. matches = re.compile(r"(?i)(\b|_)"+re.escape(member.username)+r"(\b|_)") \
  153. .findall(paymentLabel)
  154. # If not found, try next
  155. if len(matches) == 0:
  156. continue
  157. # If we already had a match, abort the whole search because we
  158. # have multiple usernames matched !
  159. if usernamematch != None:
  160. usernamematch = None
  161. break
  162. usernamematch = member.username
  163. if usernamematch != None:
  164. payment["memberMatched"] = usernamematch
  165. #print "Matched by username to "+usernamematch
  166. continue
  167. # Third, attempt to match by family name
  168. familynamematch = None
  169. for member in members:
  170. matches = re.compile(r"(?i)(\b|_)"+re.escape(str(member.last_name))+r"(\b|_)") \
  171. .findall(paymentLabel)
  172. # If not found, try next
  173. if len(matches) == 0:
  174. continue
  175. # If this familyname was matched several time, abort the whole search
  176. if len(matches) > 1:
  177. familynamematch = None
  178. break
  179. # If we already had a match, abort the whole search because we
  180. # have multiple familynames matched !
  181. if familynamematch != None:
  182. familynamematch = None
  183. break
  184. familynamematch = str(member.last_name)
  185. if familynamematch != None:
  186. payment["memberMatched"] = familynamematch
  187. #print "Matched by familyname to "+familynamematch
  188. continue
  189. #print "Could not match"
  190. payment["memberMatched"] = None
  191. return payments
  192. def unmatchPaymentWithKeywords(self, payments):
  193. matchers = {}
  194. for keyword in KEYWORDS_TO_NOTMATCH:
  195. matchers[keyword] = re.compile(r"(?i)(\b|_|-)"+re.escape(keyword)+r"(\b|_|-)")
  196. for i, payment in enumerate(payments):
  197. # If no match found, don't filter anyway
  198. if payment["memberMatched"] == None:
  199. continue
  200. for keyword, matcher in matchers.items():
  201. matches = matcher.findall(payment["label"])
  202. # If not found, try next
  203. if len(matches) == 0:
  204. continue
  205. print "Ignoring possible match for payment '%s' because " \
  206. "it contains the keyword %s" \
  207. % (payment["label"], keyword)
  208. payments[i]["memberMatched"] = None
  209. break
  210. return payments
  211. def filterAlreadyKnownPayments(self, payments):
  212. newPayments = []
  213. knownPayments = Payment.objects.all()
  214. for payment in payments:
  215. foundMatch = False
  216. for knownPayment in knownPayments:
  217. if (str(knownPayment.date) == payment["date"].encode('utf-8')) \
  218. and (knownPayment.label == payment["label"]) \
  219. and (float(knownPayment.amount) == float(payment["amount"])):
  220. foundMatch = True
  221. break
  222. if not foundMatch:
  223. newPayments.append(payment)
  224. return newPayments
  225. def addNewPayments(self, newPayments):
  226. for newPayment in newPayments:
  227. # Get the member if there's a member matched
  228. member = None
  229. if newPayment["memberMatched"]:
  230. member = Member.objects.filter(username=newPayment["memberMatched"])
  231. assert len(member) == 1
  232. member = member[0]
  233. print "Adding new payment : "
  234. print newPayment
  235. # Create the payment
  236. payment = Payment.objects.create(amount=float(newPayment["amount"]),
  237. label=newPayment["label"].encode('utf-8'),
  238. date=newPayment["date"].encode('utf-8'),
  239. member=member)