import datetime import decimal import functools import re import unicodedata import babel.numbers CURRENCY_SPEC_PATTERN = r'^{space}(?:|{symbol}{space}{code}|{code}{space}{symbol}){space}$'.format( code=r'[A-Za-z]{,3}', space=r'\s*', symbol=r'(\W?)', ) @functools.lru_cache() def _currency_amount_pattern(locale): minus = babel.numbers.get_minus_sign_symbol(locale) plus = babel.numbers.get_plus_sign_symbol(locale) dec_sym = babel.numbers.get_decimal_symbol(locale) sep_sym = '.' if dec_sym == ',' else ',' return r'([{}{}]?)\s*(\W?)\s*(\d+(?:{}\d+)*(?:{}\d*)?)'.format( minus, plus, re.escape(sep_sym), re.escape(dec_sym), ) def currency_decimal(s, locale='en_US_POSIX'): try: match = re.search(_currency_amount_pattern(locale), s) except TypeError: return decimal.Decimal(s) if not match: raise ValueError("no decimal found in {!r}".format(s)) extra_s = ''.join([s[:match.start()], match.group(2), s[match.end():]]) # The only extra text allowed is currency specifiers: # '€', 'A$', 'US$', 'CAD', '$USD', etc. extra_match = re.match(CURRENCY_SPEC_PATTERN, extra_s) if not extra_match: extra_ok = False else: symbol = extra_match.group(1) or extra_match.group(2) extra_ok = (not symbol) or (unicodedata.category(symbol) == 'Sc') if not extra_ok: raise ValueError("non-currency text in {!r}: {!r}".format(s, extra_s)) return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale) def date(date_s, date_fmt): return datetime.datetime.strptime(date_s, date_fmt).date() def _rejoin_slice_words(method_name, source, wordslice, sep=None, limit=-1, joiner=None): if joiner is None: joiner = ' ' if sep is None else sep return joiner.join(_slice_words(method_name, source, wordslice, sep, limit)) rejoin_slice_words = functools.partial(_rejoin_slice_words, 'split') rejoin_rslice_words = functools.partial(_rejoin_slice_words, 'rsplit') def _slice_words(method_name, source, wordslice, sep=None, limit=-1): return getattr(source, method_name)(sep, limit)[wordslice] slice_words = functools.partial(_slice_words, 'split') rslice_words = functools.partial(_slice_words, 'rsplit')