accrual: Rip out unnecessary functionality.

Now that make_consistent is really robust, there's much less need to do all
the consistency checking that was done in AccrualPostings.__init__. I expect
this will provide a performance benefit for large reports, since we'll be
calculating data for many fewer accrual groups. The only performance penalty
I see is that the aging report has to calculate the balance three times for
each row it reports, twice in write() and once in write_row(), but that
seems okay and can be cached separately if needed.
This commit is contained in:
Brett Smith 2020-07-01 10:54:58 -04:00
parent a5b3dc1463
commit c0a2d1c070
2 changed files with 69 additions and 245 deletions

View file

@ -127,10 +127,6 @@ T = TypeVar('T')
logger = logging.getLogger('conservancy_beancount.reports.accrual')
class Sentinel:
pass
class Account(NamedTuple):
name: str
aging_thresholds: Sequence[int]
@ -167,57 +163,7 @@ class AccrualAccount(enum.Enum):
class AccrualPostings(core.RelatedPostings):
__slots__ = (
'accrual_type',
'date',
'end_balance',
'account',
'entity',
'invoice',
)
INCONSISTENT = Sentinel()
def __init__(self,
source: Iterable[data.Posting]=(),
*,
_can_own: bool=False,
) -> None:
super().__init__(source, _can_own=_can_own)
self.account = self._single_item(post.account for post in self)
if isinstance(self.account, Sentinel):
self.accrual_type: Optional[AccrualAccount] = None
norm_func: Callable[[T], T] = lambda x: x
accrual_pred: Callable[[data.Posting], bool] = bool
else:
self.accrual_type = AccrualAccount.by_account(self.account)
norm_func = self.accrual_type.normalize_amount
accrual_pred = lambda post: norm_func(post.units).number > 0
if not any(accrual_pred(post) for post in self):
accrual_pred = bool
self.date = self._single_item(self.dates(accrual_pred))
self.entity = self._single_item(self.entities(accrual_pred))
self.invoice = self._single_item(self.first_meta_links('invoice', None))
self.end_balance = norm_func(self.balance_at_cost())
def _single_item(self, seq: Iterable[T]) -> Union[T, Sentinel]:
items = iter(seq)
try:
item1 = next(items)
except StopIteration:
all_same = False
else:
all_same = all(item == item1 for item in items)
return item1 if all_same else self.INCONSISTENT
def dates(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[datetime.date]:
return filters.iter_unique(post.meta.date for post in self if pred(post))
def entities(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[MetaValue]:
return filters.iter_unique(
post.meta['entity']
for post in self
if pred(post) and 'entity' in post.meta
)
__slots__ = ()
@classmethod
def make_consistent(cls,
@ -266,31 +212,13 @@ class AccrualPostings(core.RelatedPostings):
if pay_posts:
yield key, cls(pay_posts, _can_own=True)
def is_paid(self, default: Optional[bool]=None) -> Optional[bool]:
if self.accrual_type is None:
return default
else:
return self.end_balance.le_zero()
def is_zero(self, default: Optional[bool]=None) -> Optional[bool]:
if self.accrual_type is None:
return default
else:
return self.end_balance.is_zero()
def since_last_nonzero(self) -> 'AccrualPostings':
for index, (post, balance) in enumerate(self.iter_with_balance()):
if balance.is_zero():
start_index = index
def is_paid(self) -> Optional[bool]:
try:
empty = start_index == index
except NameError:
empty = True
return self if empty else self[start_index + 1:]
@property
def rt_id(self) -> Union[str, None, Sentinel]:
return self._single_item(self.first_meta_links('rt-id', None))
accrual_type = AccrualAccount.classify(self)
except ValueError:
return None
else:
return accrual_type.normalize_amount(self.balance()).le_zero()
class BaseReport:
@ -307,7 +235,7 @@ class BaseReport:
print(line, file=self.out_file)
class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
class AgingODS(core.BaseODS[AccrualPostings, data.Account]):
DOC_COLUMNS = [
'rt-id',
'invoice',
@ -334,11 +262,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
self.date = date
self.logger = logger
def section_key(self, row: AccrualPostings) -> Optional[data.Account]:
if isinstance(row.account, str):
return row.account
else:
return None
def section_key(self, row: AccrualPostings) -> data.Account:
return row[0].account
def start_spreadsheet(self) -> None:
for accrual_type in AccrualAccount:
@ -357,9 +282,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
))
self.lock_first_row()
def start_section(self, key: Optional[data.Account]) -> None:
if key is None:
return
def start_section(self, key: data.Account) -> None:
self.norm_func = core.normalize_amount_func(key)
self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds)
self.age_balances = [core.MutableBalance() for _ in self.age_thresholds]
accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1])
@ -374,9 +298,7 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
))
self.add_row()
def end_section(self, key: Optional[data.Account]) -> None:
if key is None:
return
def end_section(self, key: data.Account) -> None:
total_balance = core.MutableBalance()
text_style = self.merge_styles(self.style_bold, self.style_endtext)
text_span = 4
@ -418,28 +340,30 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
)
def write_row(self, row: AccrualPostings) -> None:
age = (self.date - row[0].meta.date).days
if row.end_balance.ge_zero():
row_date = row[0].meta.date
row_balance = self.norm_func(row.balance_at_cost())
age = (self.date - row_date).days
if row_balance.ge_zero():
for index, threshold in enumerate(self.age_thresholds):
if age >= threshold:
self.age_balances[index] += row.end_balance
self.age_balances[index] += row_balance
break
else:
return
raw_balance = row.balance()
if row.accrual_type is not None:
raw_balance = row.accrual_type.normalize_amount(raw_balance)
if raw_balance == row.end_balance:
raw_balance = self.norm_func(row.balance())
if raw_balance == row_balance:
amount_cell = odf.table.TableCell()
else:
amount_cell = self.balance_cell(raw_balance)
projects = {post.meta.get('project') or None for post in row}
entities = row.meta_values('entity')
entities.discard(None)
projects = row.meta_values('project')
projects.discard(None)
self.add_row(
self.date_cell(row[0].meta.date),
self.multiline_cell(row.entities()),
self.date_cell(row_date),
self.multiline_cell(sorted(entities)),
amount_cell,
self.balance_cell(row.end_balance),
self.balance_cell(row_balance),
self.multiline_cell(sorted(projects)),
*(self.meta_links_cell(row.all_meta_links(key))
for key in self.DOC_COLUMNS),
@ -459,35 +383,12 @@ class AgingReport(BaseReport):
self.ods = AgingODS(rt_wrapper, date, self.logger)
def run(self, groups: PostGroups) -> None:
rows: List[AccrualPostings] = []
for group in groups.values():
if group.is_zero():
# Cheap optimization: don't slice and dice groups we're not
# going to report anyway.
continue
elif group.accrual_type is None:
group = group.since_last_nonzero()
else:
# Filter out new accruals after the report date.
# e.g., cover the case that the same invoices has multiple
# postings over time, and we don't want to report too-recent
# ones.
cutoff_date = self.ods.date - datetime.timedelta(
days=group.accrual_type.value.aging_thresholds[-1],
)
group = AccrualPostings(
post for post in group.since_last_nonzero()
if post.meta.date <= cutoff_date
or group.accrual_type.normalize_amount(post.units.number) < 0
)
if group and not group.is_zero():
rows.append(group)
rows.sort(key=lambda related: (
related.account,
related[0].meta.date,
('\0'.join(related.entities())
if related.entity is related.INCONSISTENT
else related.entity),
rows = [group for group in groups.values()
if not group.balance_at_cost().is_zero()]
rows.sort(key=lambda group: (
group[0].account,
group[0].meta.date,
abs(sum(amt.number for amt in group.balance_at_cost().values())),
))
self.ods.write(rows)
self.ods.save_file(self.out_bin)
@ -495,12 +396,14 @@ class AgingReport(BaseReport):
class BalanceReport(BaseReport):
def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
posts = posts.since_last_nonzero()
date_s = posts[0].meta.date.strftime('%Y-%m-%d')
meta = posts[0].meta
date_s = meta.date.strftime('%Y-%m-%d')
entity_s = meta.get('entity', '<no entity>')
invoice_s = meta.get('invoice', '<no invoice>')
balance_s = posts.balance_at_cost().format(zero="Zero balance")
if index:
yield ""
yield f"{posts.invoice}:"
yield f"{entity_s} {invoice_s}:"
yield f" {balance_s} outstanding since {date_s}"
@ -523,10 +426,12 @@ class OutgoingReport(BaseReport):
self.rt_client = rt_wrapper.rt
def _primary_rt_id(self, posts: AccrualPostings) -> rtutil.TicketAttachmentIds:
rt_id = posts.rt_id
rt_ids = posts.first_meta_links('rt-id')
rt_id = next(rt_ids, None)
rt_id2 = next(rt_ids, None)
if rt_id is None:
raise ValueError("no rt-id links found")
elif isinstance(rt_id, Sentinel):
elif rt_id2 is not None:
raise ValueError("multiple rt-id links found")
parsed = rtutil.RT.parse(rt_id)
if parsed is None:
@ -535,7 +440,6 @@ class OutgoingReport(BaseReport):
return parsed
def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
posts = posts.since_last_nonzero()
try:
ticket_id, _ = self._primary_rt_id(posts)
ticket = self.rt_client.get_ticket(ticket_id)
@ -545,9 +449,13 @@ class OutgoingReport(BaseReport):
ticket = None
errmsg = error.args[0]
if ticket is None:
meta = posts[0].meta
self.logger.error(
"can't generate outgoings report for %s because no RT ticket available: %s",
posts.invoice, errmsg,
"can't generate outgoings report for %s %s %s because no RT ticket available: %s",
meta.date.isoformat(),
meta.get('entity', '<no entity>'),
meta.get('invoice', '<no invoice>'),
errmsg,
)
return
@ -566,10 +474,11 @@ class OutgoingReport(BaseReport):
)
requestor = f'{requestor_name} <{rt_requestor["EmailAddress"]}>'.strip()
balance_s = posts.end_balance.format(None)
balance = -posts.balance_at_cost()
balance_s = balance.format(None)
raw_balance = -posts.balance()
payment_amount = raw_balance.format('¤¤ #,##0.00')
if raw_balance != posts.end_balance:
if raw_balance != balance:
payment_amount += f' ({balance_s})'
balance_s = f'{raw_balance} ({balance_s})'
@ -776,14 +685,15 @@ def main(arglist: Optional[Sequence[str]]=None,
groups: PostGroups
if args.report_type is None or args.report_type is ReportType.OUTGOING:
groups = dict(AccrualPostings.group_by_first_meta_link(postings, 'rt-id'))
if (args.report_type is None
and len(groups) == 1
and all(group.accrual_type is AccrualAccount.PAYABLE
and not group.is_paid()
and key # Make sure we have a usable rt-id
for key, group in groups.items())
):
args.report_type = ReportType.OUTGOING
if args.report_type is None and len(groups) == 1:
key = next(iter(groups))
group = groups[key]
account = group[0].account
if (AccrualAccount.by_account(account) is AccrualAccount.PAYABLE
and all(post.account == account for post in group)
and not group.balance().ge_zero()
and key): # Make sure we have a usable rt-id
args.report_type = ReportType.OUTGOING
if args.report_type is not ReportType.OUTGOING:
groups = dict(AccrualPostings.make_consistent(postings))
if args.report_type is not ReportType.AGING:

View file

@ -263,82 +263,6 @@ def test_report_type_by_unknown_name(arg):
with pytest.raises(ValueError):
accrual.ReportType.by_name(arg)
@pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_accrual_postings_consistent_account(acct_name):
meta = {'invoice': '{acct_name} invoice.pdf'}
txn = testutil.Transaction(postings=[
(acct_name, 50, meta),
(acct_name, 25, meta),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.account == acct_name
def test_accrual_postings_entity():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Payee15'}),
(ACCOUNTS[0], -10, {'entity': 'Payee10'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.entity == 'Accruee'
assert set(related.entities()) == {'Accruee', 'Payee10', 'Payee15'}
def test_accrual_postings_entities():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Payee15'}),
(ACCOUNTS[0], -10, {'entity': 'Payee10'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
actual = related.entities()
assert next(actual, None) == 'Accruee'
assert set(actual) == {'Payee10', 'Payee15'}
def test_accrual_postings_entities_no_duplicates():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Accruee'}),
(ACCOUNTS[0], -10, {'entity': 'Other'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
actual = related.entities()
assert next(actual, None) == 'Accruee'
assert next(actual, None) == 'Other'
assert next(actual, None) is None
def test_accrual_postings_inconsistent_account():
meta = {'invoice': 'invoice.pdf'}
txn = testutil.Transaction(postings=[
(acct_name, index, meta)
for index, acct_name in enumerate(ACCOUNTS)
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.account is related.INCONSISTENT
def test_accrual_postings_rt_id():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10, {'rt-id': 'rt:90'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:92'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:94 rt:92'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id == 'rt:90'
def test_accrual_postings_rt_id_inconsistent():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10, {'rt-id': 'rt:96'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:98 rt:96'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id is related.INCONSISTENT
def test_accrual_postings_rt_id_none():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id is None
@pytest.mark.parametrize('acct_name,invoice,day', testutil.combine_values(
ACCOUNTS,
['FIXME', '', None, *testutil.NON_STRING_METADATA_VALUES],
@ -353,18 +277,13 @@ def test_make_consistent_bad_invoice(acct_name, invoice, day):
(acct_name, index * mult, {'invoice': invoice, 'entity': f'BadInvoice{day}'})
for index in range(1, 4)
])
consistent = dict(accrual.AccrualPostings.make_consistent(data.Posting.from_txn(txn)))
postings = data.Posting.from_txn(txn)
consistent = dict(accrual.AccrualPostings.make_consistent(iter(postings)))
assert len(consistent) == 1
key = next(iter(consistent))
assert acct_name in key
if invoice:
assert str(invoice) in key
actual = consistent[key]
assert actual
_, actual = consistent.popitem()
assert len(actual) == 3
for act_post, exp_post in zip(actual, txn.postings):
assert act_post.units == exp_post.units
assert act_post.meta.get('invoice') == invoice
for act_post, exp_post in zip(actual, postings):
assert act_post == exp_post
def test_make_consistent_across_accounts():
invoice = 'Invoices/CrossAccount.pdf'
@ -376,7 +295,6 @@ def test_make_consistent_across_accounts():
assert len(consistent) == len(ACCOUNTS)
for key, posts in consistent.items():
assert len(posts) == 1
assert posts.account in key
def test_make_consistent_both_invoice_and_account():
txn = testutil.Transaction(date=datetime.date(2019, 2, 2), postings=[
@ -386,7 +304,6 @@ def test_make_consistent_both_invoice_and_account():
assert len(consistent) == len(ACCOUNTS)
for key, posts in consistent.items():
assert len(posts) == 1
assert posts.account in key
@pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_make_consistent_across_entity(acct_name):
@ -399,10 +316,6 @@ def test_make_consistent_across_entity(acct_name):
assert len(consistent) == 3
for key, posts in consistent.items():
assert len(posts) == 1
entities = posts.entities()
assert next(entities, None) == posts.entity
assert next(entities, None) is None
assert posts.entity in key
@pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_make_consistent_entity_differs_accrual_payment(acct_name):
@ -440,7 +353,7 @@ def test_make_consistent_by_date_with_exact_payment():
actual = [group for _, group in
accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
assert len(actual) == 2
assert actual[0].is_zero()
assert sum(post.units.number for post in actual[0]) == 0
assert len(actual[1]) == 1
assert actual[1][0].meta.date.day == 3
@ -470,7 +383,7 @@ def test_make_consistent_by_date_with_overpayment():
actual = [group for _, group in
accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
assert len(actual) == 2
assert actual[0].is_zero()
assert sum(post.units.number for post in actual[0]) == 0
assert len(actual[1]) == 2
assert actual[1][0].meta.date.day == 2
assert actual[1][0].units.number == -25
@ -658,7 +571,8 @@ def test_outgoing_report_without_rt_id(accrual_postings, caplog):
assert caplog.records
log = caplog.records[0]
assert log.message.startswith(
f"can't generate outgoings report for {invoice} because no RT ticket available:",
f"can't generate outgoings report for 2010-05-15 MatchingProgram {invoice}"
" because no RT ticket available:",
)
assert not output.getvalue()
@ -761,7 +675,7 @@ def test_output_payments_when_only_match(arglist, expect_invoice):
assert not errors.getvalue()
assert retcode == 0
check_output(output, [
rf'^{re.escape(expect_invoice)}:$',
rf'^EarlyBird {re.escape(expect_invoice)}:$',
r' outstanding since ',
])