import2ledger/import2ledger/strparse.py
Brett Smith 11eccb60dd strparse: Fix default limit argument in split functions.
The underlying string methods don't accept None.
2017-12-27 13:35:52 -05:00

61 lines
2.2 KiB
Python

import datetime
import decimal
import functools
import re
import unicodedata
import babel.numbers
CURRENCY_SPEC_PATTERN = r'^{space}(?:|{symbol}{space}{code}|{code}{space}{symbol}){space}$'.format(
code=r'[A-Za-z]{,3}',
space=r'\s*',
symbol=r'(\W?)',
)
@functools.lru_cache()
def _currency_amount_pattern(locale):
minus = babel.numbers.get_minus_sign_symbol(locale)
plus = babel.numbers.get_plus_sign_symbol(locale)
dec_sym = babel.numbers.get_decimal_symbol(locale)
sep_sym = '.' if dec_sym == ',' else ','
return r'([{}{}]?)\s*(\W?)\s*(\d+(?:{}\d+)*(?:{}\d*)?)'.format(
minus,
plus,
re.escape(sep_sym),
re.escape(dec_sym),
)
def currency_decimal(s, locale='en_US_POSIX'):
try:
match = re.search(_currency_amount_pattern(locale), s)
except TypeError:
return decimal.Decimal(s)
if not match:
raise ValueError("no decimal found in {!r}".format(s))
extra_s = ''.join([s[:match.start()], match.group(2), s[match.end():]])
# The only extra text allowed is currency specifiers:
# '€', 'A$', 'US$', 'CAD', '$USD', etc.
extra_match = re.match(CURRENCY_SPEC_PATTERN, extra_s)
if not extra_match:
extra_ok = False
else:
symbol = extra_match.group(1) or extra_match.group(2)
extra_ok = (not symbol) or (unicodedata.category(symbol) == 'Sc')
if not extra_ok:
raise ValueError("non-currency text in {!r}: {!r}".format(s, extra_s))
return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale)
def date(date_s, date_fmt):
return datetime.datetime.strptime(date_s, date_fmt).date()
def _rejoin_slice_words(method_name, source, wordslice, sep=None, limit=-1, joiner=None):
if joiner is None:
joiner = ' ' if sep is None else sep
return joiner.join(_slice_words(method_name, source, wordslice, sep, limit))
rejoin_slice_words = functools.partial(_rejoin_slice_words, 'split')
rejoin_rslice_words = functools.partial(_rejoin_slice_words, 'rsplit')
def _slice_words(method_name, source, wordslice, sep=None, limit=-1):
return getattr(source, method_name)(sep, limit)[wordslice]
slice_words = functools.partial(_slice_words, 'split')
rslice_words = functools.partial(_slice_words, 'rsplit')