payroll-import/src/parse.clj

102 lines
3.4 KiB
Clojure

;; Copyright 2024 Ben Sturmfels
;; License: GPLv3-or-later
;;
;; Full copyright and licensing details can be found at toplevel file
;; LICENSE.txt in the repository.
(ns parse
"Tools for parsing a subset of Beancount transaction text format for testing.
There may be a way to get structured data out of Beancount (eg. JSON), but
this was fairly quick to write."
(:require [clojure.spec.alpha :as s]
[clojure.walk :as walk]))
(s/def ::token (s/+ (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890-:")))
(s/def ::number (s/+ (set "01234567890-.")))
(s/def ::quoted-token (s/cat
:_ #{\"}
:token (s/+ (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890-:./% ()"))
:_ #{\"}))
(s/def ::whitespace (s/+ #{\space}))
(s/def ::meta
(s/cat :_ ::whitespace
:key ::token
:_ #{\:}
:_ ::whitespace
:value ::quoted-token
:_ #{\newline}))
(s/def ::posting
(s/cat :_ ::whitespace
:account ::token
:_ ::whitespace
:amount ::number
:_ ::whitespace
:currency ::token
:_ #{\newline}
:meta (s/* ::meta)))
(s/def ::transaction
(s/cat
:date ::token
:_ ::whitespace
:_ ::token
:_ ::whitespace
:payee (s/? ::quoted-token)
:_ (s/? ::whitespace)
:desc ::quoted-token
:_ #{\newline}
:meta (s/* ::meta)
:postings (s/* ::posting)
:_ (s/* #{\newline})))
(s/def ::transactions (s/+ ::transaction))
(defn- kv->map
"Convert vector [{:key \"x\" :value \"y\"}] to {:x y}"
[vec]
(into {} (for [{:keys [key value]} vec]
[(keyword key) value])))
(defn- convert-parse-tree
"Reformat the parse tree into a similar data structure as used during import."
[tree]
(walk/postwalk #(cond
;; vector of chars to string
(and (vector? %)
(= (type (first %)) java.lang.Character)) (apply str %)
;; posting amount to bigdec
(and (= (type %) clojure.lang.PersistentArrayMap)
(contains? % :amount)) (dissoc (update % :amount bigdec) :_)
;; flatten quoted-tokens
(and (= (type %) clojure.lang.PersistentArrayMap)
(contains? % :token)) (:token %)
;; drop :_ keys
(= (type %) clojure.lang.PersistentArrayMap) (dissoc % :_)
;; convert vector of :key/:value maps to a map
(and (vector? %)
(= (type (first %)) clojure.lang.PersistentArrayMap)
(contains? (first %) :key)) (kv->map %)
:else %)
tree))
(defn parse
"Parse a Beancount transaction into an intermediate data structure.
Used in development to compare a hand-written Beancount import side-by-side
with an automatically generated import using deep-diff2. We can't directly
compare the text output because the ordering and spacing is too hard to get to
match exactly."
[text]
(let [tree (s/conform ::transactions (conj (vec text) \newline))]
(convert-parse-tree tree)))
(defn sort-postings
"Sort transaction postings into a predictable order."
[transactions]
(for [t transactions]
(update t :postings
(fn [ps] (sort-by (juxt #(get-in % [:meta :entity]) :account :amount) (filter #(not (zero? (:amount %))) ps))))))