strparse: Improve extra text parsing in currency_decimal.
This allows a symbol and a currency code to be in different parts of the string, as long as there's at most one of each.
This commit is contained in:
parent
9d638f4d01
commit
87f3209101
1 changed files with 19 additions and 18 deletions
|
@ -6,8 +6,14 @@ import unicodedata
|
|||
|
||||
import babel.numbers
|
||||
|
||||
CURRENCY_SPEC_PATTERN = r'^{space}(?:|{symbol}{space}{code}|{code}{space}{symbol}){space}$'.format(
|
||||
code=r'[A-Za-z]{,3}',
|
||||
space=r'\s*',
|
||||
symbol=r'(\W?)',
|
||||
)
|
||||
|
||||
@functools.lru_cache()
|
||||
def _currency_pattern(locale):
|
||||
def _currency_amount_pattern(locale):
|
||||
minus = babel.numbers.get_minus_sign_symbol(locale)
|
||||
plus = babel.numbers.get_plus_sign_symbol(locale)
|
||||
dec_sym = babel.numbers.get_decimal_symbol(locale)
|
||||
|
@ -21,27 +27,22 @@ def _currency_pattern(locale):
|
|||
|
||||
def currency_decimal(s, locale='en_US_POSIX'):
|
||||
try:
|
||||
match = re.search(_currency_pattern(locale), s)
|
||||
match = re.search(_currency_amount_pattern(locale), s)
|
||||
except TypeError:
|
||||
return decimal.Decimal(s)
|
||||
if not match:
|
||||
raise ValueError("no decimal found in {!r}".format(s))
|
||||
# There may be extra symbols/text before the number, after the number,
|
||||
# or between the number and its sign—but only in one of those places.
|
||||
extra = None
|
||||
for extra_s in [s[:match.start()], match.group(2), s[match.end():]]:
|
||||
extra_s = extra_s.strip()
|
||||
if extra and extra_s:
|
||||
raise ValueError("too much extraneous text in {!r}".format(s))
|
||||
extra = extra_s
|
||||
# The only extra text allowed is currency specifiers like plain symbols,
|
||||
# 'A$', 'US$', 'CAD', 'USD $', etc.
|
||||
# Trim any currency symbol.
|
||||
if extra and unicodedata.category(extra[-1]) == 'Sc':
|
||||
extra = extra[:-1].strip()
|
||||
# Anything remaining should look like currency specifier text.
|
||||
if extra and ((len(extra) > 3) or (not extra.isalpha())):
|
||||
raise ValueError("non-currency text in {!r}: {!r}".format(s, extra))
|
||||
extra_s = ''.join([s[:match.start()], match.group(2), s[match.end():]])
|
||||
# The only extra text allowed is currency specifiers:
|
||||
# '€', 'A$', 'US$', 'CAD', '$USD', etc.
|
||||
extra_match = re.match(CURRENCY_SPEC_PATTERN, extra_s)
|
||||
if not extra_match:
|
||||
extra_ok = False
|
||||
else:
|
||||
symbol = extra_match.group(1) or extra_match.group(2)
|
||||
extra_ok = (not symbol) or (unicodedata.category(symbol) == 'Sc')
|
||||
if not extra_ok:
|
||||
raise ValueError("non-currency text in {!r}: {!r}".format(s, extra_s))
|
||||
return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale)
|
||||
|
||||
def date(date_s, date_fmt):
|
||||
|
|
Loading…
Reference in a new issue