Source code for woob.tools.capabilities.bank.transactions

# Copyright(C) 2009-2012  Romain Bignon
#
# This file is part of woob.
#
# woob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# woob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with woob. If not, see <http://www.gnu.org/licenses/>.

from collections import OrderedDict
from decimal import Decimal, InvalidOperation
import datetime
import re

from woob.browser.elements import TableElement, ItemElement
from woob.browser.filters.html import TableCell
from woob.browser.filters.standard import Filter, CleanText, CleanDecimal
from woob.capabilities import NotAvailable, NotLoaded
from woob.capabilities.bank import Transaction, Account
from woob.exceptions import ParseError
from woob.tools.date import new_datetime
from woob.tools.log import getLogger
from woob.tools.misc import classproperty


__all__ = [
    'FrenchTransaction', 'AmericanTransaction',
    'sorted_transactions', 'merge_iterators', 'keep_only_card_transactions',
    'omit_deferred_transactions',
]


def parse_with_patterns(raw, obj, patterns):
    obj.label = raw

    for pattern, _type in patterns:
        m = pattern.match(raw)
        if m:
            args = m.groupdict()

            def inargs(key):
                """
                inner function to check if a key is in args,
                and is not None.
                """
                return args.get(key, None) is not None

            obj.type = _type
            labels = [args[name].strip() for name in ('text', 'text2') if inargs(name)]
            if labels:
                obj.label = ' '.join(labels)

            if inargs('category'):
                obj.category = args['category'].strip()

            # Set date from information in raw label.
            if inargs('dd') and inargs('mm'):
                dd = int(args['dd']) if args['dd'] != '00' else 1
                mm = int(args['mm'])

                if inargs('yy'):
                    yy = int(args['yy'])
                else:
                    d = obj.date
                    try:
                        d = d.replace(month=mm, day=dd)
                    except ValueError:
                        d = d.replace(year=d.year-1, month=mm, day=dd)

                    yy = d.year
                    if d > obj.date:
                        yy -= 1

                if yy < 100:
                    yy += 2000

                try:
                    if inargs('HH') and inargs('MM'):
                        obj.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM']))
                    else:
                        obj.rdate = datetime.date(yy, mm, dd)
                except ValueError as e:
                    raise ParseError('Unable to parse date in label %r: %s' % (raw, e))

            break


[docs]class FrenchTransaction(Transaction): """ Transaction with some helpers for french bank websites. """ PATTERNS = [] def __init__(self, id='', *args, **kwargs): super(FrenchTransaction, self).__init__(id, *args, **kwargs) self._logger = getLogger('%s.FrenchTransaction' % __name__)
[docs] @classmethod def clean_amount(klass, text): """ Clean a string containing an amount. """ text = text.replace('.','').replace(',','.') return re.sub(r'[^\d\-\.]', '', text)
[docs] def set_amount(self, credit='', debit=''): """ Set an amount value from a string. Can take two strings if there are both credit and debit columns. """ credit = self.clean_amount(credit) debit = self.clean_amount(debit) if len(debit) > 0: self.amount = - abs(Decimal(debit)) elif len(credit) > 0: self.amount = Decimal(credit) else: self.amount = Decimal('0')
[docs] def parse_date(self, date): if date is None: return NotAvailable if not isinstance(date, (datetime.date, datetime.datetime)): if date.isdigit() and len(date) == 8: date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2])) elif '/' in date: date = datetime.date(*reversed([int(x) for x in date.split('/')])) if not isinstance(date, (datetime.date, datetime.datetime)): self._logger.warning('Unable to parse date %r' % date) date = NotAvailable elif date.year < 100: date = date.replace(year=2000 + date.year) return date
[docs] def parse(self, date, raw, vdate=None): r""" Parse date and raw strings to create datetime.date objects, determine the type of transaction, and create a simplified label When calling this method, you should have defined patterns (in the PATTERN class attribute) with a list containing tuples of regexp and the associated type, for example:: PATTERNS = [(re.compile(r'^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER), (re.compile(r'^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER), (re.compile(r'^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'), FrenchTransaction.TYPE_CARD) ] In regexps, you can define this patterns: * text: part of label to store in simplified label * category: part of label representing the category * yy, mm, dd, HH, MM: date and time parts """ self.date = self.parse_date(date) self.vdate = self.parse_date(vdate) self.rdate = self.date self.raw = raw.replace('\n', ' ').strip() try: parse_with_patterns(self.raw, self, self.PATTERNS) except ParseError as e: self._logger.warning('Unable to date in label %r: %s' % (self.raw, e))
[docs] @classproperty def TransactionElement(k): class _TransactionElement(ItemElement): klass = k obj_date = klass.Date(TableCell('date')) obj_vdate = klass.Date(TableCell('vdate', 'date')) obj_raw = klass.Raw(TableCell('raw')) obj_amount = klass.Amount(TableCell('credit'), TableCell('debit', default='')) return _TransactionElement
[docs] @classproperty def TransactionsElement(klass): class _TransactionsElement(TableElement): col_date = ['Date'] col_vdate = ['Valeur'] col_raw = ['Opération', 'Libellé', 'Intitulé opération'] col_credit = ['Crédit', 'Montant'] col_debit = ['Débit'] item = klass.TransactionElement return _TransactionsElement
[docs] class Date(CleanText): def __call__(self, item): date = super(FrenchTransaction.Date, self).__call__(item) return date
[docs] def filter(self, date): date = super(FrenchTransaction.Date, self).filter(date) if date is None: return NotAvailable if not isinstance(date, (datetime.date, datetime.datetime)): if date.isdigit() and len(date) == 8: date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2])) elif '/' in date: date = datetime.date(*reversed([int(x) for x in date.split('/')])) if not isinstance(date, (datetime.date, datetime.datetime)): date = NotAvailable elif date.year < 100: date = date.replace(year=2000 + date.year) return date
[docs] @classmethod def Raw(klass, *args, **kwargs): patterns = klass.PATTERNS class Filter(CleanText): def __call__(self, item): raw = super(Filter, self).__call__(item) if item.obj.rdate is NotLoaded: item.obj.rdate = item.obj.date parse_with_patterns(raw, item.obj, patterns) return raw def filter(self, text): text = super(Filter, self).filter(text) return text.replace('\n', ' ').strip() return Filter(*args, **kwargs)
[docs] class Currency(CleanText):
[docs] def filter(self, text): text = super(FrenchTransaction.Currency, self).filter(text) return Account.get_currency(text)
[docs] class Amount(Filter): def __init__(self, credit, debit=None, replace_dots=True): self.credit_selector = credit self.debit_selector = debit self.replace_dots = replace_dots def __call__(self, item): if self.debit_selector: try: return - abs(CleanDecimal(self.debit_selector, replace_dots=self.replace_dots)(item)) except InvalidOperation: pass if self.credit_selector: try: return CleanDecimal(self.credit_selector, replace_dots=self.replace_dots)(item) except InvalidOperation: pass return Decimal('0')
[docs]class AmericanTransaction(Transaction): """ Transaction with some helpers for american bank websites. """
[docs] @classmethod def clean_amount(klass, text): """ Clean a string containing an amount. """ # Convert "American" UUU.CC format to "French" UUU,CC format if re.search(r'\d\.\d\d(?: [A-Z]+)?$', text): text = text.replace(',', ' ').replace('.', ',') return FrenchTransaction.clean_amount(text)
[docs] @classmethod def decimal_amount(klass, text): """ Convert a string containing an amount to Decimal. """ amnt = AmericanTransaction.clean_amount(text) return Decimal(amnt) if amnt else Decimal('0')
[docs]def sorted_transactions(iterable): """Sort an iterable of transactions in reverse chronological order""" return sorted(iterable, reverse=True, key=lambda tr: (tr.date, new_datetime(tr.rdate) if tr.rdate else datetime.datetime.min))
[docs]def merge_iterators(*iterables): """Merge transactions iterators keeping sort order. Each iterator must already be sorted in reverse chronological order. """ def keyfunc(kv): return (kv[1].date, kv[1].rdate) its = OrderedDict((iter(it), None) for it in iterables) for k in list(its): try: its[k] = next(k) except StopIteration: del its[k] while its: k, v = max(its.items(), key=keyfunc) yield v try: its[k] = next(k) except StopIteration: del its[k]
[docs]def keep_only_card_transactions(it, match_func=None): """Filter iterator to keep transactions with card types. This helper should typically be used when a banking site returns card and non-card transactions mixed on the same checking account. Types kept are `TYPE_DEFERRED_CARD` and `TYPE_CARD_SUMMARY`. Additionally, the amount is inversed for transactions with type `TYPE_CARD_SUMMARY`. This is because on the deferred debit card account, summaries should be positive as the amount is debitted from checking account to credit the card account. The `match_func` can be provided in case of multiple cards, to only return transactions of one card. :param match_func: optional function to filter transactions further :type match_func: callable or None """ for tr in it: if tr.type == tr.TYPE_DEFERRED_CARD: if match_func is None or match_func(tr): yield tr elif tr.type == tr.TYPE_CARD_SUMMARY: if match_func is None or match_func(tr): tr.amount = -tr.amount yield tr
[docs]def omit_deferred_transactions(it): """Filter iterator to omit transactions with type `TYPE_DEFERRED_CARD`. This helper should typically be used when a banking site returns card and non-card transactions mixed on the same checking account. """ for tr in it: if tr.type != tr.TYPE_DEFERRED_CARD: yield tr