;; Copyright 2024 Ben Sturmfels
;; License: GPLv3-or-later
;;
;; Full copyright and licensing details can be found at toplevel file
;; LICENSE.txt in the repository.

(ns parse
  "Tools for parsing a subset of Beancount transaction text format for testing.
  There may be a way to get structured data out of Beancount (eg. JSON), but
  this was fairly quick to write."
  (:require [clojure.spec.alpha :as s]
            [clojure.walk :as walk]))

(s/def ::token (s/+ (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890-:")))

(s/def ::number (s/+ (set "01234567890-.")))

(s/def ::quoted-token (s/cat
                       :_ #{\"}
                       :token (s/+ (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890-:./% ()"))
                       :_ #{\"}))

(s/def ::whitespace (s/+ #{\space}))

(s/def ::meta
  (s/cat :_ ::whitespace
         :key ::token
         :_ #{\:}
         :_ ::whitespace
         :value ::quoted-token
         :_ #{\newline}))

(s/def ::posting
  (s/cat :_ ::whitespace
         :account ::token
         :_ ::whitespace
         :amount ::number
         :_ ::whitespace
         :currency ::token
         :_ #{\newline}
         :meta (s/* ::meta)))

(s/def ::transaction
  (s/cat
   :date ::token
   :_ ::whitespace
   :_ ::token
   :_ ::whitespace
   :payee (s/? ::quoted-token)
   :_ (s/? ::whitespace)
   :desc ::quoted-token
   :_ #{\newline}
   :meta (s/* ::meta)
   :postings (s/* ::posting)
   :_ (s/* #{\newline})))

(s/def ::transactions (s/+ ::transaction))

(defn- kv->map
  "Convert vector [{:key \"x\" :value \"y\"}] to {:x y}"
  [vec]
  (into {} (for [{:keys [key value]} vec]
             [(keyword key) value])))

(defn- convert-parse-tree
  "Reformat the parse tree into a similar data structure as used during import."
  [tree]
  (walk/postwalk #(cond
                    ;; vector of chars to string
                    (and (vector? %)
                         (= (type (first %)) java.lang.Character)) (apply str %)
                    ;; posting amount to bigdec
                    (and (= (type %) clojure.lang.PersistentArrayMap)
                         (contains? % :amount)) (dissoc (update % :amount bigdec) :_)
                    ;; flatten quoted-tokens
                    (and (= (type %) clojure.lang.PersistentArrayMap)
                         (contains? % :token)) (:token %)
                    ;; drop :_ keys
                    (= (type %) clojure.lang.PersistentArrayMap) (dissoc % :_)
                    ;; convert vector of :key/:value maps to a map
                    (and (vector? %)
                         (= (type (first %)) clojure.lang.PersistentArrayMap)
                         (contains? (first %) :key)) (kv->map %)
                    :else %)
                 tree))

(defn parse
  "Parse a Beancount transaction into an intermediate data structure.
  Used in development to compare a hand-written Beancount import side-by-side
  with an automatically generated import using deep-diff2. We can't directly
  compare the text output because the ordering and spacing is too hard to get to
  match exactly."
  [text]
  (let [tree (s/conform ::transactions (conj (vec text) \newline))]
    (convert-parse-tree tree)))

(defn sort-postings
  "Sort transaction postings into a predictable order."
  [transactions]
  (for [t transactions]
    (update t :postings
            (fn [ps] (sort-by (juxt #(get-in % [:meta :entity]) :account :amount) (filter #(not (zero? (:amount %))) ps))))))