util: Add parse_currency_dec.

The current importers trim lots of extraneous symbols and whitespace from
currency strings before passing them to Decimal().  This function takes care
of all that in a single place.
This commit is contained in:
Brett Smith 2017-12-18 23:04:05 -05:00
parent 0734b6f7a5
commit 6ea28c2c89
6 changed files with 54 additions and 9 deletions

View file

@ -88,7 +88,7 @@ class Invoice2017:
elif description.startswith('Early Bird ('): elif description.startswith('Early Bird ('):
self.ticket_rate = self.DISCOUNT_TICKET_RATE self.ticket_rate = self.DISCOUNT_TICKET_RATE
if qty: if qty:
self.amount += decimal.Decimal(total.lstrip('$')) self.amount += util.parse_currency_dec(total)
def _read_invoice_activity(self, table, first_row_text, rows_text): def _read_invoice_activity(self, table, first_row_text, rows_text):
self.actions = [{ self.actions = [{

View file

@ -11,6 +11,9 @@ class IncomeImporter(_csv.CSVImporterBase):
'Pledge', 'Pledge',
'Status', 'Status',
]) ])
COPIED_FIELDS = {
'Pledge': 'amount',
}
ENTRY_SEED = { ENTRY_SEED = {
'currency': 'USD', 'currency': 'USD',
} }
@ -28,7 +31,6 @@ class IncomeImporter(_csv.CSVImporterBase):
return None return None
else: else:
return { return {
'amount': row['Pledge'].replace(',', ''),
'payee': '{0[FirstName]} {0[LastName]}'.format(row), 'payee': '{0[FirstName]} {0[LastName]}'.format(row),
} }
@ -41,7 +43,7 @@ class FeeImporterBase(_csv.CSVImporterBase):
def _read_row(self, row): def _read_row(self, row):
return { return {
'amount': row[self.AMOUNT_FIELD].lstrip('$'), 'amount': row[self.AMOUNT_FIELD],
'date': util.strpdate(row['Month'], '%Y-%m'), 'date': util.strpdate(row['Month'], '%Y-%m'),
} }

View file

@ -27,6 +27,6 @@ class PaymentImporter(_csv.CSVImporterBase):
return { return {
'currency': row['Converted Currency'].upper(), 'currency': row['Converted Currency'].upper(),
'date': util.strpdate(row['Created (UTC)'].split(None, 1)[0], self.DATE_FMT), 'date': util.strpdate(row['Created (UTC)'].split(None, 1)[0], self.DATE_FMT),
'fee': decimal.Decimal(row['Fee']), 'fee': util.parse_currency_dec(row['Fee']),
'tax': decimal.Decimal(row['Tax']), 'tax': util.parse_currency_dec(row['Tax']),
} }

View file

@ -9,7 +9,7 @@ import tokenize
import babel.numbers import babel.numbers
from . import errors from . import errors, util
class TokenTransformer: class TokenTransformer:
def __init__(self, source): def __init__(self, source):
@ -253,7 +253,7 @@ class Template:
template_vars.update( template_vars.update(
date=date.strftime(self.date_fmt), date=date.strftime(self.date_fmt),
payee=payee, payee=payee,
amount=decimal.Decimal(amount), amount=util.parse_currency_dec(amount),
currency=currency, currency=currency,
) )
for key, value in template_vars.items(): for key, value in template_vars.items():

View file

@ -1,5 +1,48 @@
import datetime import datetime
import decimal
import functools import functools
import re
import unicodedata
import babel.numbers
@functools.lru_cache()
def _currency_pattern(locale):
minus = babel.numbers.get_minus_sign_symbol(locale)
plus = babel.numbers.get_plus_sign_symbol(locale)
dec_sym = babel.numbers.get_decimal_symbol(locale)
sep_sym = '.' if dec_sym == ',' else ','
return r'([{}{}]?)\s*(\W?)\s*(\d+(?:{}\d+)*(?:{}\d*)?)'.format(
minus,
plus,
re.escape(sep_sym),
re.escape(dec_sym),
)
def parse_currency_dec(s, locale='en_US_POSIX'):
try:
match = re.search(_currency_pattern(locale), s)
except TypeError:
return decimal.Decimal(s)
if not match:
raise ValueError("no decimal found in {!r}".format(s))
# There may be extra symbols/text before the number, after the number,
# or between the number and its sign—but only in one of those places.
extra = None
for extra_s in [s[:match.start()], match.group(2), s[match.end():]]:
extra_s = extra_s.strip()
if extra and extra_s:
raise ValueError("too much extraneous text in {!r}".format(s))
extra = extra_s
# The only extra text allowed is currency specifiers like plain symbols,
# 'A$', 'US$', 'CAD', 'USD $', etc.
# Trim any currency symbol.
if extra and unicodedata.category(extra[-1]) == 'Sc':
extra = extra[:-1].strip()
# Anything remaining should look like currency specifier text.
if extra and ((len(extra) > 3) or (not extra.isalpha())):
raise ValueError("non-currency text in {!r}: {!r}".format(s, extra))
return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale)
def _rejoin_slice_words(method_name, source, wordslice, sep=None, limit=None, joiner=None): def _rejoin_slice_words(method_name, source, wordslice, sep=None, limit=None, joiner=None):
if joiner is None: if joiner is None:

View file

@ -7,7 +7,7 @@ import re
import pytest import pytest
import yaml import yaml
from import2ledger import importers from import2ledger import importers, util
from . import DATA_DIR from . import DATA_DIR
@ -35,7 +35,7 @@ class TestImporters:
with source_path.open() as source_file: with source_path.open() as source_file:
importer = import_class(source_file) importer = import_class(source_file)
for actual, expected in itertools.zip_longest(importer, expect_results): for actual, expected in itertools.zip_longest(importer, expect_results):
actual['amount'] = decimal.Decimal(actual['amount']) actual['amount'] = util.parse_currency_dec(actual['amount'])
assert actual == expected assert actual == expected
def test_loader(self): def test_loader(self):