beancount-tools-in-Perl/bean-query-goofy-daemon.plx

188 lines
7.4 KiB
Text
Raw Normal View History

#!/usr/bin/perl
# License: AGPLv3-or-later
# see files COPYRIGHT, LICENSE, and AGPL-3.0.txt included in canonical repository for details.
# https://k.sfconservancy.org/NPO-Accounting/beancount-tools-in-Perl
use strict;
use warnings;
use autodie qw(:all);
use Getopt::Long;
use File::Spec::Functions;
use File::Temp qw/:mktemp/;
use POSIX qw(mkfifo);
use IPC::Shareable;
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
use Expect;
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
# We have to set the PAGER to a passthrough text program to assure that
# output does not get paused
$ENV{PAGER} = "/usr/bin/cat";
my $BEANCOUNT_QUERY_CMD = "/usr/bin/bean-query";
my($VERBOSE, $BEANCOUNT_DIR, $LOAD_FILE, $REPOSITORY_URL, $FIFO_DIR) = (0, undef, undef, undef, undef);
GetOptions("verbose=i" => \$VERBOSE, "beancountDir=s" => \$BEANCOUNT_DIR,
"loadFile=s" => \$LOAD_FILE, "repositoryURL=s" => \$REPOSITORY_URL, 'fifoDir=s' => \$FIFO_DIR);
sub UsageAndExit($) {
print STDERR "usage: $0 --loadFile=/path/to/file.beancount --beancountDir=/path/to/beancountdir --fifoDir=/path/to/directory/for/fifos [ --repositoryURL=URL_OF_REPOSITORY --verbose=N ]\n";
print STDERR "\n $_[0]\n";
exit 2;
}
UsageAndExit("/path/to/direcotry/for/fifos must be provided and be an existing directory")
unless (defined $FIFO_DIR and -d $FIFO_DIR);
UsageAndExit("/path/to/file.beancount/path/to/beancountdir must be provided and be an existing directory")
unless (defined $BEANCOUNT_DIR and -d $BEANCOUNT_DIR);
chdir $BEANCOUNT_DIR;
UsageAndExit("/path/to/file.beancount must a relative path to a file that exists in $BEANCOUNT_DIR")
unless (defined $LOAD_FILE and -r $LOAD_FILE);
if (defined $REPOSITORY_URL) {
UsageAndExit("if --repositoryURL is provided, $BEANCOUNT_DIR must be a checkout of a Git repository, but there is no $BEANCOUNT_DIR/.git/config.")
unless (-r '.git/config');
open(my $gitFH, "<", ".git/config");
my $repoString;
while (my $line = <$gitFH>) {
chomp $line;
$repoString = $1
if $line =~ /^\s*url\s*=\s*(\S+)\s*$/;
}
close $gitFH;
UsageAndExit("if --repositoryURL is provided, the checkout found in $BEANCOUNT_DIR must be of that repository, but .git/config does not list a URL.") unless defined $REPOSITORY_URL;
UsageAndExit("if --repositoryURL is provided, the checkout found in $BEANCOUNT_DIR must be of that repository, but it instead appears to be a checkout of $repoString.")
unless ($repoString eq $REPOSITORY_URL);
}
my $glue = 'BeAn';
my %options = (
create => 'yes',
exclusive => 0,
mode => 0600,
destroy => 'yes',
);
my %query;
tie %query, 'IPC::Shareable', $glue, { %options } or
die "server: tie failed for $glue\n";
%query = ();
my %beancountData;
tie %beancountData, 'IPC::Shareable', $glue, { %options } or
die "server: tie failed\n";
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
my ($currentFormat, $runningBeanQuery);
sub StartRunningBeanQuery {
my($format) = @_;
$format = $currentFormat unless defined $format;
$currentFormat = $format;
my @cmd = ($BEANCOUNT_QUERY_CMD);
push(@cmd, '-f', $format) if defined $format;
push(@cmd, $LOAD_FILE);
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
$runningBeanQuery = Expect->spawn(@cmd);
$runningBeanQuery->log_stdout(0);
$runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*')
or die("Unable to find beancount prompt, output was instead: ".
$runningBeanQuery->before() . $runningBeanQuery->after());
print STDERR "Beancount started with output of:\n", $runningBeanQuery->before(),
$runningBeanQuery->match(), $runningBeanQuery->after(), "\n"
if ($VERBOSE > 3);
}
StartRunningBeanQuery('text');
while (1) {
if (not defined $query{question}) {
print STDERR "No question posed..." if $VERBOSE > 2;
if (defined $query{fifoName}) {
print STDERR "fifo still active; locking to clear it..." if $VERBOSE > 2;
(tied %query)->shlock;
print STDERR "clearing fifo, $query{fifoName}..." if $VERBOSE > 2;
no autodie 'unlink'; unlink($query{fifoName});
%query = ();
(tied %query)->shunlock;
print STDERR "fifo cleared & lock released." if $VERBOSE > 2;
}
print STDERR "sleep for 2 seconds\n" if $VERBOSE > 2;
sleep 2;
next;
} elsif ($query{question} !~ /^[\,\=\~\-\@\w.\s\"\'\_\(\)\<\>\*\.\!]+$/) {
print STDERR "Query string $query{question} looks suspicious, not running beancount query!\n";
(tied %query)->shlock;
$query{fifoName} = mktemp("REJECTED_beancount-query-fifo-this-file-does-not-exist_${$}_XXXXXXXXX");
(tied %query)->shunlock;
sleep 2;
} elsif (defined $query{format} and $query{format} !~ /^(?:csv|text)$/) {
print STDERR "format string $query{format} is not text or csv, not running beancount query!\n";
(tied %query)->shlock;
$query{question} = $query{format} = undef;
$query{fifoName} = mktemp("REJECTED_beancount-query-fifo-this-file-does-not-exist_${$}_XXXXXXXXX");
(tied %query)->shunlock;
} elsif (not defined $query{fifoName}) {
if (defined $query{format} and $query{format} ne 'text') {
print STDERR "format string $query{format} is not supported yet!\n";
(tied %query)->shlock;
$query{question} = $query{format} = undef;
$query{fifoName} = mktemp("REJECTED_beancount-$query{format}_not_supported_${$}_XXXXXXXXX");
(tied %query)->shunlock;
next;
}
print STDERR "Runing query: $query{question}\n" if $VERBOSE > 0;
my $ques = $query{question};
$ques =~ s/\n/ /gm;
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
$runningBeanQuery->send("$ques\n");
$runningBeanQuery->expect(undef, 'beancount>') # *Don't* use regex here!
# See `git annotate` on this line for why no regex.
or die("Unable to find beancount prompt, output was instead: ".
$runningBeanQuery->before() . $runningBeanQuery->after());
(tied %query)->shlock;
print STDERR "Acquired shlock on tied variable.\n" if $VERBOSE > 1;
$query{fifoName} = undef;
my $cnt = 0;
my $fifoFileName = mktemp(catfile($FIFO_DIR, "beancount-query-fifo_${$}_XXXXXXXXX"));
mkfifo($fifoFileName, 0700);
die "unable to create fifo in $fifoFileName: $!" unless -p $fifoFileName;
print STDERR "Created $fifoFileName..." if $VERBOSE > 1;
$query{fifoName} = $fifoFileName;
(tied %query)->shunlock;
open(my $fifoFH, ">", $fifoFileName);
print STDERR "and beginning write to it." if $VERBOSE > 1;
my($seenSeperator, $prevLine) = (0, "");
Switch from IPC::Run to Expect module; set PAGER to /usr/bin/cat After some efforts to make this current incarnation of the daemon work for situations with excessively long output, I discovered two problems. First, bean-query seems to always use a pager in interactive mode, and it seems impossible to dissuade bean-query from this behavior without an upstream change to Beancount. As such, my first effort was to force the pager to be `/usr/bin/cat`, which I thought would be sufficient to solve the problem entirely. After that change coupled with much debugging with IPC::Run, I found that detecting the beancount> prompt using a regular expression of /^\s*beancount\s*\>\s*/m, or similar efforts with simply /\s*beancount\s*/ or other versions like /\s*beancount\s*\>\s*$/ms /\s*beancount\s*\>\s*$/s, simply would not detect the end of the string, so the process hung on this line in the main loop: pump $runningBeanQuery until $rbcOut =~ /^\s*beancount\s*\>\s*/m; Ironically, when there is *not* copious output, that line worked just fine. It was baffling. I switched to Expect, and found the exact same behavior with: $runningBeanQuery->expect(undef, -re => '^\s*beancount\s*\>\s*') with that line, it works fine in the StartRunningBeanQuery() subroutine, but when used in the main loop, Expect never finds that string. Again, I tried it with various regexes as specified above to no avail. *However*, merely changing the line to: $runningBeanQuery->expect(undef, 'beancount>') (i.e., not using the regex feature in the Expect library), fixed the problem entirely. My working theory is some complex interaction bug between the IO::Pty module, regular expression matching, and output timing (since this problem only shows up when there is copious output). I think in particular the fact that bean-query creates a subprocess for the `PAGER` command in the pty when the output is copious is somehow related to the problem. Regardless, this solution now works in all the cases where I'm using the daemon, so I'm not investigating further.
2020-08-17 15:46:57 +00:00
my $rbcOut = $runningBeanQuery->before();
foreach my $line (split /\n/, $rbcOut) {
# Occasionally, some of the SELECT statement is printed back to
# $rbcOut. Avoid reproducing this in output by waiting for the line
# of ----'s. Thus, this only works in text mode right now.
unless ($seenSeperator) {
if ($line =~ /^\s*$/) {
$seenSeperator = 1;
} elsif ($currentFormat eq 'text' and $line =~ /^\s*\-+\s*$/) {
$seenSeperator = 1;
}
$prevLine = $line;
next;
}
last if $line =~ /^\s*beancount\s*\>\s*/;
print STDERR "." unless ($cnt++ % 500) or ($VERBOSE <= 1);
print $fifoFH "$line\n";
$prevLine = $line;
}
close $fifoFH;
(tied %query)->shlock; $query{question} = undef; (tied %query)->shunlock;
print STDERR "...done! Data now in $fifoFileName\n" if $VERBOSE > 0;
}
}
finish $runningBeanQuery;
###############################################################################
#
# Local variables:
# compile-command: "perl -c bean-query-goofy-daemon.plx"
# perl-indent-level: 2
# End: