accrual: Rip out unnecessary functionality.

Now that make_consistent is really robust, there's much less need to do all
the consistency checking that was done in AccrualPostings.__init__. I expect
this will provide a performance benefit for large reports, since we'll be
calculating data for many fewer accrual groups. The only performance penalty
I see is that the aging report has to calculate the balance three times for
each row it reports, twice in write() and once in write_row(), but that
seems okay and can be cached separately if needed.
This commit is contained in:
Brett Smith 2020-07-01 10:54:58 -04:00
parent a5b3dc1463
commit c0a2d1c070
2 changed files with 69 additions and 245 deletions

View file

@ -127,10 +127,6 @@ T = TypeVar('T')
logger = logging.getLogger('conservancy_beancount.reports.accrual') logger = logging.getLogger('conservancy_beancount.reports.accrual')
class Sentinel:
pass
class Account(NamedTuple): class Account(NamedTuple):
name: str name: str
aging_thresholds: Sequence[int] aging_thresholds: Sequence[int]
@ -167,57 +163,7 @@ class AccrualAccount(enum.Enum):
class AccrualPostings(core.RelatedPostings): class AccrualPostings(core.RelatedPostings):
__slots__ = ( __slots__ = ()
'accrual_type',
'date',
'end_balance',
'account',
'entity',
'invoice',
)
INCONSISTENT = Sentinel()
def __init__(self,
source: Iterable[data.Posting]=(),
*,
_can_own: bool=False,
) -> None:
super().__init__(source, _can_own=_can_own)
self.account = self._single_item(post.account for post in self)
if isinstance(self.account, Sentinel):
self.accrual_type: Optional[AccrualAccount] = None
norm_func: Callable[[T], T] = lambda x: x
accrual_pred: Callable[[data.Posting], bool] = bool
else:
self.accrual_type = AccrualAccount.by_account(self.account)
norm_func = self.accrual_type.normalize_amount
accrual_pred = lambda post: norm_func(post.units).number > 0
if not any(accrual_pred(post) for post in self):
accrual_pred = bool
self.date = self._single_item(self.dates(accrual_pred))
self.entity = self._single_item(self.entities(accrual_pred))
self.invoice = self._single_item(self.first_meta_links('invoice', None))
self.end_balance = norm_func(self.balance_at_cost())
def _single_item(self, seq: Iterable[T]) -> Union[T, Sentinel]:
items = iter(seq)
try:
item1 = next(items)
except StopIteration:
all_same = False
else:
all_same = all(item == item1 for item in items)
return item1 if all_same else self.INCONSISTENT
def dates(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[datetime.date]:
return filters.iter_unique(post.meta.date for post in self if pred(post))
def entities(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[MetaValue]:
return filters.iter_unique(
post.meta['entity']
for post in self
if pred(post) and 'entity' in post.meta
)
@classmethod @classmethod
def make_consistent(cls, def make_consistent(cls,
@ -266,31 +212,13 @@ class AccrualPostings(core.RelatedPostings):
if pay_posts: if pay_posts:
yield key, cls(pay_posts, _can_own=True) yield key, cls(pay_posts, _can_own=True)
def is_paid(self, default: Optional[bool]=None) -> Optional[bool]: def is_paid(self) -> Optional[bool]:
if self.accrual_type is None:
return default
else:
return self.end_balance.le_zero()
def is_zero(self, default: Optional[bool]=None) -> Optional[bool]:
if self.accrual_type is None:
return default
else:
return self.end_balance.is_zero()
def since_last_nonzero(self) -> 'AccrualPostings':
for index, (post, balance) in enumerate(self.iter_with_balance()):
if balance.is_zero():
start_index = index
try: try:
empty = start_index == index accrual_type = AccrualAccount.classify(self)
except NameError: except ValueError:
empty = True return None
return self if empty else self[start_index + 1:] else:
return accrual_type.normalize_amount(self.balance()).le_zero()
@property
def rt_id(self) -> Union[str, None, Sentinel]:
return self._single_item(self.first_meta_links('rt-id', None))
class BaseReport: class BaseReport:
@ -307,7 +235,7 @@ class BaseReport:
print(line, file=self.out_file) print(line, file=self.out_file)
class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): class AgingODS(core.BaseODS[AccrualPostings, data.Account]):
DOC_COLUMNS = [ DOC_COLUMNS = [
'rt-id', 'rt-id',
'invoice', 'invoice',
@ -334,11 +262,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
self.date = date self.date = date
self.logger = logger self.logger = logger
def section_key(self, row: AccrualPostings) -> Optional[data.Account]: def section_key(self, row: AccrualPostings) -> data.Account:
if isinstance(row.account, str): return row[0].account
return row.account
else:
return None
def start_spreadsheet(self) -> None: def start_spreadsheet(self) -> None:
for accrual_type in AccrualAccount: for accrual_type in AccrualAccount:
@ -357,9 +282,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
)) ))
self.lock_first_row() self.lock_first_row()
def start_section(self, key: Optional[data.Account]) -> None: def start_section(self, key: data.Account) -> None:
if key is None: self.norm_func = core.normalize_amount_func(key)
return
self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds) self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds)
self.age_balances = [core.MutableBalance() for _ in self.age_thresholds] self.age_balances = [core.MutableBalance() for _ in self.age_thresholds]
accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1]) accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1])
@ -374,9 +298,7 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
)) ))
self.add_row() self.add_row()
def end_section(self, key: Optional[data.Account]) -> None: def end_section(self, key: data.Account) -> None:
if key is None:
return
total_balance = core.MutableBalance() total_balance = core.MutableBalance()
text_style = self.merge_styles(self.style_bold, self.style_endtext) text_style = self.merge_styles(self.style_bold, self.style_endtext)
text_span = 4 text_span = 4
@ -418,28 +340,30 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
) )
def write_row(self, row: AccrualPostings) -> None: def write_row(self, row: AccrualPostings) -> None:
age = (self.date - row[0].meta.date).days row_date = row[0].meta.date
if row.end_balance.ge_zero(): row_balance = self.norm_func(row.balance_at_cost())
age = (self.date - row_date).days
if row_balance.ge_zero():
for index, threshold in enumerate(self.age_thresholds): for index, threshold in enumerate(self.age_thresholds):
if age >= threshold: if age >= threshold:
self.age_balances[index] += row.end_balance self.age_balances[index] += row_balance
break break
else: else:
return return
raw_balance = row.balance() raw_balance = self.norm_func(row.balance())
if row.accrual_type is not None: if raw_balance == row_balance:
raw_balance = row.accrual_type.normalize_amount(raw_balance)
if raw_balance == row.end_balance:
amount_cell = odf.table.TableCell() amount_cell = odf.table.TableCell()
else: else:
amount_cell = self.balance_cell(raw_balance) amount_cell = self.balance_cell(raw_balance)
projects = {post.meta.get('project') or None for post in row} entities = row.meta_values('entity')
entities.discard(None)
projects = row.meta_values('project')
projects.discard(None) projects.discard(None)
self.add_row( self.add_row(
self.date_cell(row[0].meta.date), self.date_cell(row_date),
self.multiline_cell(row.entities()), self.multiline_cell(sorted(entities)),
amount_cell, amount_cell,
self.balance_cell(row.end_balance), self.balance_cell(row_balance),
self.multiline_cell(sorted(projects)), self.multiline_cell(sorted(projects)),
*(self.meta_links_cell(row.all_meta_links(key)) *(self.meta_links_cell(row.all_meta_links(key))
for key in self.DOC_COLUMNS), for key in self.DOC_COLUMNS),
@ -459,35 +383,12 @@ class AgingReport(BaseReport):
self.ods = AgingODS(rt_wrapper, date, self.logger) self.ods = AgingODS(rt_wrapper, date, self.logger)
def run(self, groups: PostGroups) -> None: def run(self, groups: PostGroups) -> None:
rows: List[AccrualPostings] = [] rows = [group for group in groups.values()
for group in groups.values(): if not group.balance_at_cost().is_zero()]
if group.is_zero(): rows.sort(key=lambda group: (
# Cheap optimization: don't slice and dice groups we're not group[0].account,
# going to report anyway. group[0].meta.date,
continue abs(sum(amt.number for amt in group.balance_at_cost().values())),
elif group.accrual_type is None:
group = group.since_last_nonzero()
else:
# Filter out new accruals after the report date.
# e.g., cover the case that the same invoices has multiple
# postings over time, and we don't want to report too-recent
# ones.
cutoff_date = self.ods.date - datetime.timedelta(
days=group.accrual_type.value.aging_thresholds[-1],
)
group = AccrualPostings(
post for post in group.since_last_nonzero()
if post.meta.date <= cutoff_date
or group.accrual_type.normalize_amount(post.units.number) < 0
)
if group and not group.is_zero():
rows.append(group)
rows.sort(key=lambda related: (
related.account,
related[0].meta.date,
('\0'.join(related.entities())
if related.entity is related.INCONSISTENT
else related.entity),
)) ))
self.ods.write(rows) self.ods.write(rows)
self.ods.save_file(self.out_bin) self.ods.save_file(self.out_bin)
@ -495,12 +396,14 @@ class AgingReport(BaseReport):
class BalanceReport(BaseReport): class BalanceReport(BaseReport):
def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]: def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
posts = posts.since_last_nonzero() meta = posts[0].meta
date_s = posts[0].meta.date.strftime('%Y-%m-%d') date_s = meta.date.strftime('%Y-%m-%d')
entity_s = meta.get('entity', '<no entity>')
invoice_s = meta.get('invoice', '<no invoice>')
balance_s = posts.balance_at_cost().format(zero="Zero balance") balance_s = posts.balance_at_cost().format(zero="Zero balance")
if index: if index:
yield "" yield ""
yield f"{posts.invoice}:" yield f"{entity_s} {invoice_s}:"
yield f" {balance_s} outstanding since {date_s}" yield f" {balance_s} outstanding since {date_s}"
@ -523,10 +426,12 @@ class OutgoingReport(BaseReport):
self.rt_client = rt_wrapper.rt self.rt_client = rt_wrapper.rt
def _primary_rt_id(self, posts: AccrualPostings) -> rtutil.TicketAttachmentIds: def _primary_rt_id(self, posts: AccrualPostings) -> rtutil.TicketAttachmentIds:
rt_id = posts.rt_id rt_ids = posts.first_meta_links('rt-id')
rt_id = next(rt_ids, None)
rt_id2 = next(rt_ids, None)
if rt_id is None: if rt_id is None:
raise ValueError("no rt-id links found") raise ValueError("no rt-id links found")
elif isinstance(rt_id, Sentinel): elif rt_id2 is not None:
raise ValueError("multiple rt-id links found") raise ValueError("multiple rt-id links found")
parsed = rtutil.RT.parse(rt_id) parsed = rtutil.RT.parse(rt_id)
if parsed is None: if parsed is None:
@ -535,7 +440,6 @@ class OutgoingReport(BaseReport):
return parsed return parsed
def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]: def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
posts = posts.since_last_nonzero()
try: try:
ticket_id, _ = self._primary_rt_id(posts) ticket_id, _ = self._primary_rt_id(posts)
ticket = self.rt_client.get_ticket(ticket_id) ticket = self.rt_client.get_ticket(ticket_id)
@ -545,9 +449,13 @@ class OutgoingReport(BaseReport):
ticket = None ticket = None
errmsg = error.args[0] errmsg = error.args[0]
if ticket is None: if ticket is None:
meta = posts[0].meta
self.logger.error( self.logger.error(
"can't generate outgoings report for %s because no RT ticket available: %s", "can't generate outgoings report for %s %s %s because no RT ticket available: %s",
posts.invoice, errmsg, meta.date.isoformat(),
meta.get('entity', '<no entity>'),
meta.get('invoice', '<no invoice>'),
errmsg,
) )
return return
@ -566,10 +474,11 @@ class OutgoingReport(BaseReport):
) )
requestor = f'{requestor_name} <{rt_requestor["EmailAddress"]}>'.strip() requestor = f'{requestor_name} <{rt_requestor["EmailAddress"]}>'.strip()
balance_s = posts.end_balance.format(None) balance = -posts.balance_at_cost()
balance_s = balance.format(None)
raw_balance = -posts.balance() raw_balance = -posts.balance()
payment_amount = raw_balance.format('¤¤ #,##0.00') payment_amount = raw_balance.format('¤¤ #,##0.00')
if raw_balance != posts.end_balance: if raw_balance != balance:
payment_amount += f' ({balance_s})' payment_amount += f' ({balance_s})'
balance_s = f'{raw_balance} ({balance_s})' balance_s = f'{raw_balance} ({balance_s})'
@ -776,14 +685,15 @@ def main(arglist: Optional[Sequence[str]]=None,
groups: PostGroups groups: PostGroups
if args.report_type is None or args.report_type is ReportType.OUTGOING: if args.report_type is None or args.report_type is ReportType.OUTGOING:
groups = dict(AccrualPostings.group_by_first_meta_link(postings, 'rt-id')) groups = dict(AccrualPostings.group_by_first_meta_link(postings, 'rt-id'))
if (args.report_type is None if args.report_type is None and len(groups) == 1:
and len(groups) == 1 key = next(iter(groups))
and all(group.accrual_type is AccrualAccount.PAYABLE group = groups[key]
and not group.is_paid() account = group[0].account
and key # Make sure we have a usable rt-id if (AccrualAccount.by_account(account) is AccrualAccount.PAYABLE
for key, group in groups.items()) and all(post.account == account for post in group)
): and not group.balance().ge_zero()
args.report_type = ReportType.OUTGOING and key): # Make sure we have a usable rt-id
args.report_type = ReportType.OUTGOING
if args.report_type is not ReportType.OUTGOING: if args.report_type is not ReportType.OUTGOING:
groups = dict(AccrualPostings.make_consistent(postings)) groups = dict(AccrualPostings.make_consistent(postings))
if args.report_type is not ReportType.AGING: if args.report_type is not ReportType.AGING:

View file

@ -263,82 +263,6 @@ def test_report_type_by_unknown_name(arg):
with pytest.raises(ValueError): with pytest.raises(ValueError):
accrual.ReportType.by_name(arg) accrual.ReportType.by_name(arg)
@pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_accrual_postings_consistent_account(acct_name):
meta = {'invoice': '{acct_name} invoice.pdf'}
txn = testutil.Transaction(postings=[
(acct_name, 50, meta),
(acct_name, 25, meta),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.account == acct_name
def test_accrual_postings_entity():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Payee15'}),
(ACCOUNTS[0], -10, {'entity': 'Payee10'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.entity == 'Accruee'
assert set(related.entities()) == {'Accruee', 'Payee10', 'Payee15'}
def test_accrual_postings_entities():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Payee15'}),
(ACCOUNTS[0], -10, {'entity': 'Payee10'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
actual = related.entities()
assert next(actual, None) == 'Accruee'
assert set(actual) == {'Payee10', 'Payee15'}
def test_accrual_postings_entities_no_duplicates():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 25, {'entity': 'Accruee'}),
(ACCOUNTS[0], -15, {'entity': 'Accruee'}),
(ACCOUNTS[0], -10, {'entity': 'Other'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
actual = related.entities()
assert next(actual, None) == 'Accruee'
assert next(actual, None) == 'Other'
assert next(actual, None) is None
def test_accrual_postings_inconsistent_account():
meta = {'invoice': 'invoice.pdf'}
txn = testutil.Transaction(postings=[
(acct_name, index, meta)
for index, acct_name in enumerate(ACCOUNTS)
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.account is related.INCONSISTENT
def test_accrual_postings_rt_id():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10, {'rt-id': 'rt:90'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:92'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:94 rt:92'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id == 'rt:90'
def test_accrual_postings_rt_id_inconsistent():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10, {'rt-id': 'rt:96'}),
(ACCOUNTS[0], 10, {'rt-id': 'rt:98 rt:96'}),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id is related.INCONSISTENT
def test_accrual_postings_rt_id_none():
txn = testutil.Transaction(postings=[
(ACCOUNTS[0], 10),
])
related = accrual.AccrualPostings(data.Posting.from_txn(txn))
assert related.rt_id is None
@pytest.mark.parametrize('acct_name,invoice,day', testutil.combine_values( @pytest.mark.parametrize('acct_name,invoice,day', testutil.combine_values(
ACCOUNTS, ACCOUNTS,
['FIXME', '', None, *testutil.NON_STRING_METADATA_VALUES], ['FIXME', '', None, *testutil.NON_STRING_METADATA_VALUES],
@ -353,18 +277,13 @@ def test_make_consistent_bad_invoice(acct_name, invoice, day):
(acct_name, index * mult, {'invoice': invoice, 'entity': f'BadInvoice{day}'}) (acct_name, index * mult, {'invoice': invoice, 'entity': f'BadInvoice{day}'})
for index in range(1, 4) for index in range(1, 4)
]) ])
consistent = dict(accrual.AccrualPostings.make_consistent(data.Posting.from_txn(txn))) postings = data.Posting.from_txn(txn)
consistent = dict(accrual.AccrualPostings.make_consistent(iter(postings)))
assert len(consistent) == 1 assert len(consistent) == 1
key = next(iter(consistent)) _, actual = consistent.popitem()
assert acct_name in key
if invoice:
assert str(invoice) in key
actual = consistent[key]
assert actual
assert len(actual) == 3 assert len(actual) == 3
for act_post, exp_post in zip(actual, txn.postings): for act_post, exp_post in zip(actual, postings):
assert act_post.units == exp_post.units assert act_post == exp_post
assert act_post.meta.get('invoice') == invoice
def test_make_consistent_across_accounts(): def test_make_consistent_across_accounts():
invoice = 'Invoices/CrossAccount.pdf' invoice = 'Invoices/CrossAccount.pdf'
@ -376,7 +295,6 @@ def test_make_consistent_across_accounts():
assert len(consistent) == len(ACCOUNTS) assert len(consistent) == len(ACCOUNTS)
for key, posts in consistent.items(): for key, posts in consistent.items():
assert len(posts) == 1 assert len(posts) == 1
assert posts.account in key
def test_make_consistent_both_invoice_and_account(): def test_make_consistent_both_invoice_and_account():
txn = testutil.Transaction(date=datetime.date(2019, 2, 2), postings=[ txn = testutil.Transaction(date=datetime.date(2019, 2, 2), postings=[
@ -386,7 +304,6 @@ def test_make_consistent_both_invoice_and_account():
assert len(consistent) == len(ACCOUNTS) assert len(consistent) == len(ACCOUNTS)
for key, posts in consistent.items(): for key, posts in consistent.items():
assert len(posts) == 1 assert len(posts) == 1
assert posts.account in key
@pytest.mark.parametrize('acct_name', ACCOUNTS) @pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_make_consistent_across_entity(acct_name): def test_make_consistent_across_entity(acct_name):
@ -399,10 +316,6 @@ def test_make_consistent_across_entity(acct_name):
assert len(consistent) == 3 assert len(consistent) == 3
for key, posts in consistent.items(): for key, posts in consistent.items():
assert len(posts) == 1 assert len(posts) == 1
entities = posts.entities()
assert next(entities, None) == posts.entity
assert next(entities, None) is None
assert posts.entity in key
@pytest.mark.parametrize('acct_name', ACCOUNTS) @pytest.mark.parametrize('acct_name', ACCOUNTS)
def test_make_consistent_entity_differs_accrual_payment(acct_name): def test_make_consistent_entity_differs_accrual_payment(acct_name):
@ -440,7 +353,7 @@ def test_make_consistent_by_date_with_exact_payment():
actual = [group for _, group in actual = [group for _, group in
accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))] accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
assert len(actual) == 2 assert len(actual) == 2
assert actual[0].is_zero() assert sum(post.units.number for post in actual[0]) == 0
assert len(actual[1]) == 1 assert len(actual[1]) == 1
assert actual[1][0].meta.date.day == 3 assert actual[1][0].meta.date.day == 3
@ -470,7 +383,7 @@ def test_make_consistent_by_date_with_overpayment():
actual = [group for _, group in actual = [group for _, group in
accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))] accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
assert len(actual) == 2 assert len(actual) == 2
assert actual[0].is_zero() assert sum(post.units.number for post in actual[0]) == 0
assert len(actual[1]) == 2 assert len(actual[1]) == 2
assert actual[1][0].meta.date.day == 2 assert actual[1][0].meta.date.day == 2
assert actual[1][0].units.number == -25 assert actual[1][0].units.number == -25
@ -658,7 +571,8 @@ def test_outgoing_report_without_rt_id(accrual_postings, caplog):
assert caplog.records assert caplog.records
log = caplog.records[0] log = caplog.records[0]
assert log.message.startswith( assert log.message.startswith(
f"can't generate outgoings report for {invoice} because no RT ticket available:", f"can't generate outgoings report for 2010-05-15 MatchingProgram {invoice}"
" because no RT ticket available:",
) )
assert not output.getvalue() assert not output.getvalue()
@ -761,7 +675,7 @@ def test_output_payments_when_only_match(arglist, expect_invoice):
assert not errors.getvalue() assert not errors.getvalue()
assert retcode == 0 assert retcode == 0
check_output(output, [ check_output(output, [
rf'^{re.escape(expect_invoice)}:$', rf'^EarlyBird {re.escape(expect_invoice)}:$',
r' outstanding since ', r' outstanding since ',
]) ])