util: Add parse_currency_dec.

The current importers trim lots of extraneous symbols and whitespace from currency strings before passing them to Decimal(). This function takes care of all that in a single place.
2017-12-18 23:04:05 -05:00 · 2017-12-18 23:04:05 -05:00 · 6ea28c2c89
commit 6ea28c2c89
parent 0734b6f7a5
6 changed files with 54 additions and 9 deletions
--- a/import2ledger/importers/nbpy2017.py
+++ b/import2ledger/importers/nbpy2017.py
@ -88,7 +88,7 @@ class Invoice2017:
            elif description.startswith('Early Bird ('):
                self.ticket_rate = self.DISCOUNT_TICKET_RATE
            if qty:
-                self.amount += decimal.Decimal(total.lstrip('$'))
+                self.amount += util.parse_currency_dec(total)

    def _read_invoice_activity(self, table, first_row_text, rows_text):
        self.actions = [{
--- a/import2ledger/importers/patreon.py
+++ b/import2ledger/importers/patreon.py
@ -11,6 +11,9 @@ class IncomeImporter(_csv.CSVImporterBase):
        'Pledge',
        'Status',
    ])
+    COPIED_FIELDS = {
+        'Pledge': 'amount',
+    }
    ENTRY_SEED = {
        'currency': 'USD',
    }
@ -28,7 +31,6 @@ class IncomeImporter(_csv.CSVImporterBase):
            return None
        else:
            return {
-                'amount': row['Pledge'].replace(',', ''),
                'payee': '{0[FirstName]} {0[LastName]}'.format(row),
            }

@ -41,7 +43,7 @@ class FeeImporterBase(_csv.CSVImporterBase):

    def _read_row(self, row):
        return {
-            'amount': row[self.AMOUNT_FIELD].lstrip('$'),
+            'amount': row[self.AMOUNT_FIELD],
            'date': util.strpdate(row['Month'], '%Y-%m'),
        }

--- a/import2ledger/importers/stripe.py
+++ b/import2ledger/importers/stripe.py
@ -27,6 +27,6 @@ class PaymentImporter(_csv.CSVImporterBase):
            return {
                'currency': row['Converted Currency'].upper(),
                'date': util.strpdate(row['Created (UTC)'].split(None, 1)[0], self.DATE_FMT),
-                'fee': decimal.Decimal(row['Fee']),
-                'tax': decimal.Decimal(row['Tax']),
+                'fee': util.parse_currency_dec(row['Fee']),
+                'tax': util.parse_currency_dec(row['Tax']),
            }
--- a/import2ledger/template.py
+++ b/import2ledger/template.py
@ -9,7 +9,7 @@ import tokenize

 import babel.numbers

-from . import errors
+from . import errors, util

 class TokenTransformer:
    def __init__(self, source):
@ -253,7 +253,7 @@ class Template:
        template_vars.update(
            date=date.strftime(self.date_fmt),
            payee=payee,
-            amount=decimal.Decimal(amount),
+            amount=util.parse_currency_dec(amount),
            currency=currency,
        )
        for key, value in template_vars.items():
--- a/import2ledger/util.py
+++ b/import2ledger/util.py
@ -1,5 +1,48 @@
 import datetime
+import decimal
 import functools
+import re
+import unicodedata
+
+import babel.numbers
+
+@functools.lru_cache()
+def _currency_pattern(locale):
+    minus = babel.numbers.get_minus_sign_symbol(locale)
+    plus = babel.numbers.get_plus_sign_symbol(locale)
+    dec_sym = babel.numbers.get_decimal_symbol(locale)
+    sep_sym = '.' if dec_sym == ',' else ','
+    return r'([{}{}]?)\s*(\W?)\s*(\d+(?:{}\d+)*(?:{}\d*)?)'.format(
+        minus,
+        plus,
+        re.escape(sep_sym),
+        re.escape(dec_sym),
+    )
+
+def parse_currency_dec(s, locale='en_US_POSIX'):
+    try:
+        match = re.search(_currency_pattern(locale), s)
+    except TypeError:
+        return decimal.Decimal(s)
+    if not match:
+        raise ValueError("no decimal found in {!r}".format(s))
+    # There may be extra symbols/text before the number, after the number,
+    # or between the number and its sign—but only in one of those places.
+    extra = None
+    for extra_s in [s[:match.start()], match.group(2), s[match.end():]]:
+        extra_s = extra_s.strip()
+        if extra and extra_s:
+            raise ValueError("too much extraneous text in {!r}".format(s))
+        extra = extra_s
+    # The only extra text allowed is currency specifiers like plain symbols,
+    # 'A$', 'US$', 'CAD', 'USD $', etc.
+    # Trim any currency symbol.
+    if extra and unicodedata.category(extra[-1]) == 'Sc':
+        extra = extra[:-1].strip()
+    # Anything remaining should look like currency specifier text.
+    if extra and ((len(extra) > 3) or (not extra.isalpha())):
+        raise ValueError("non-currency text in {!r}: {!r}".format(s, extra))
+    return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale)

 def _rejoin_slice_words(method_name, source, wordslice, sep=None, limit=None, joiner=None):
    if joiner is None:
--- a/tests/test_importers.py
+++ b/tests/test_importers.py
@ -7,7 +7,7 @@ import re

 import pytest
 import yaml
-from import2ledger import importers
+from import2ledger import importers, util

 from . import DATA_DIR

@ -35,7 +35,7 @@ class TestImporters:
        with source_path.open() as source_file:
            importer = import_class(source_file)
            for actual, expected in itertools.zip_longest(importer, expect_results):
-                actual['amount'] = decimal.Decimal(actual['amount'])
+                actual['amount'] = util.parse_currency_dec(actual['amount'])
                assert actual == expected

    def test_loader(self):