Compare commits

...

3 commits

Author SHA1 Message Date
dc89a21839
Upgrade deps, note use of upstream Clojure 2026-02-17 13:11:46 +11:00
08585f7fca
Make import/read-csv more testable, warn about any unexpected categories
Previously import/read-csv opened a file on the filesystem, making it hard to
test. Now it takes a string or java.io.Reader.

The warning about non-zero records with category "PFML ER PU" has now been
generalised to flag any non-zero records with an unknown category.
2026-02-13 18:30:58 +11:00
23cc4657ee
Make CSV file an argument, handle blank amounts and add non-zero PFML warning
Was producing an error due to new PFML field with blank amounts.
2026-02-13 13:36:15 +11:00
8 changed files with 137 additions and 73 deletions

View file

@ -18,13 +18,13 @@ Run a demo with two example employees, Jack and Jill Citizen:
Provide your own payroll data with: Provide your own payroll data with:
java -jar payroll-importer-x.x.x-standalone.jar --csv resources/example-paychex-pay-item-details.csv --total-fees 206.50 java -jar payroll-importer-x.x.x-standalone.jar --total-fees 206.50 resources/example-paychex-pay-item-details.csv
In the above, various values such as the date, time period covered and In the above, various values such as the date, time period covered and
receipt/invoice values show "TODO" placeholders that you are expected to fill in receipt/invoice values show "TODO" placeholders that you are expected to fill in
later. If you prefer, you can provide any/all of these explicitly: later. If you prefer, you can provide any/all of these explicitly:
java -jar payroll-importer-x.x.x-standalone.jar --csv resources/example-paychex-pay-item-details.csv --date 2023-12-29 --period 'December 2023' --total-fees 206.50 --pay-receipt-no rt:19462/674660 --pay-invoice-no rt:19403/675431 --fees-receipt-no rt:19459/675387 --fees-invoice-no rt:19459/674887 --retirement-receipt-no rt:19403/676724 --retirement-invoice-no rt:19403/675431 java -jar payroll-importer-x.x.x-standalone.jar --date 2023-12-29 --period 'December 2023' --total-fees 206.50 --pay-receipt-no rt:19462/674660 --pay-invoice-no rt:19403/675431 --fees-receipt-no rt:19459/675387 --fees-invoice-no rt:19459/674887 --retirement-receipt-no rt:19403/676724 --retirement-invoice-no rt:19403/675431 resources/example-paychex-pay-item-details.csv
You can test the output in Beancount by adding the following header entries to define the accounts: You can test the output in Beancount by adding the following header entries to define the accounts:
@ -64,7 +64,7 @@ Run tests with:
You can run without building using: You can run without building using:
bin/dev --csv resources/example-paychex-pay-item-details.csv --total-fees 206.50 bin/dev --total-fees 206.50 resources/example-paychex-pay-item-details.csv
The project is set up for development in Emacs and CIDER-mode. Open a source The project is set up for development in Emacs and CIDER-mode. Open a source
file and run `cider-jack-in`. file and run `cider-jack-in`.

View file

@ -1,4 +1,4 @@
#!/usr/bin/env sh #!/usr/bin/env sh
# Run the program without building it # Run the program without building it
clojure -M -m core "$@" clojure -M:dev "$@"

View file

@ -14,8 +14,6 @@
(defn uber [_] (defn uber [_]
(clean nil) (clean nil)
(b/copy-dir {:src-dirs ["src" "resources"]
:target-dir class-dir})
(b/copy-dir {:src-dirs ["src" "resources"] (b/copy-dir {:src-dirs ["src" "resources"]
:target-dir class-dir}) :target-dir class-dir})
(b/compile-clj {:basis @basis (b/compile-clj {:basis @basis

View file

@ -1,12 +1,19 @@
{:paths ["src" "resources" "private"] ;; Private is not included in the build {:paths ["src" "resources" "private"] ;; Private is not included in the build
:deps { :deps {
org.clojure/clojure {:mvn/version "1.11.1"} ;; Note that the clojure version below will be installed from upstream
org.clojure/data.csv {:mvn/version "1.0.1"} ;; rather than using the system version.
org.clojure/tools.cli {:mvn/version "1.1.230"}} org.clojure/clojure {:mvn/version "1.12.4"}
org.clojure/data.csv {:mvn/version "1.1.1"}
org.clojure/tools.cli {:mvn/version "1.3.250"}}
:aliases :aliases
{:dev {:extra-deps {lambdaisland/deep-diff2 {:mvn/version "2.10.211"}}} {:dev {:extra-deps {lambdaisland/deep-diff2 {:mvn/version "2.12.219"}}
:main-opts ["-m" "core"]
;; Saves ~ 1 second of startup time - 1.5 sec on my laptop. After
;; building an uberjar and running with these options, it drops to about
;; 750ms.
:jvm-opts ["-XX:TieredStopAtLevel=1" "-XX:+TieredCompilation"]}
:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.87.1366"}} :test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.87.1366"}}
:main-opts ["-m" "kaocha.runner"]} :main-opts ["-m" "kaocha.runner"]}
;; Run with clj -T:build function-in-build ;; Run with clj -T:build function-in-build
:build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"}} :build {:deps {io.github.clojure/tools.build {:mvn/version "0.10.12"}}
:ns-default build}}} :ns-default build}}}

View file

@ -4,10 +4,10 @@
;; develop/build the program. Use it with `guix shell --manifest=manifest.scm`. ;; develop/build the program. Use it with `guix shell --manifest=manifest.scm`.
(specifications->manifest (specifications->manifest
(list (list
;; No issues running this OpenJDK 21 program on Debian Stable (OpenJDK 17). ;; No issues running this OpenJDK 21 program on Debian Bookworm (OpenJDK 17).
"openjdk@21" "openjdk@21:jdk"
;; Works fine with clojure-tools from Guix.
"clojure-tools" "clojure-tools"
"rlwrap"
"clj-kondo" "clj-kondo"
"beancount" "beancount"
)) ))

View file

@ -6,17 +6,15 @@
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.set :as set] [clojure.set :as set]
[clojure.string :as str] [clojure.string :as str]
[clojure.tools.cli :refer [parse-opts]] [clojure.tools.cli :as cli]
[import :as import]) [import :as import])
(:gen-class)) (:gen-class))
(def cli-options (def cli-options
[[nil "--csv FILE" "Pay Item Details CSV report" [[nil "--date DATE" "Date used for the transactions (YYYY-MM-DD)"
:validate [#(-> % io/file .exists) "File does not exist"]]
[nil "--date DATE" "Date used for the transactions (YYYY-MM-DD)"
:validate [#(re-matches #"\d{4}-\d{2}-\d{2}" %) "Must be of format YYYY-MM-DD"] :validate [#(re-matches #"\d{4}-\d{2}-\d{2}" %) "Must be of format YYYY-MM-DD"]
:default "TODO-DATE"] :default "TODO-DATE"]
[nil "--period PERIOD" "Month/year covered by the pay run eg. \"December 2023\"" [nil "--period PERIOD" "Month/year of the pay run eg. \"December 2023\""
:default "TODO-PERIOD"] :default "TODO-PERIOD"]
[nil "--total-fees NUM" "Total fee charged by Paychex, eg. \"206.50\"" [nil "--total-fees NUM" "Total fee charged by Paychex, eg. \"206.50\""
:parse-fn bigdec :parse-fn bigdec
@ -33,13 +31,13 @@
:default "TODO-RETIREMENT-RECEIPT"] :default "TODO-RETIREMENT-RECEIPT"]
[nil "--retirement-invoice-no REFERENCE" "Retirement receipt number, eg. \"rt:111/222\"" [nil "--retirement-invoice-no REFERENCE" "Retirement receipt number, eg. \"rt:111/222\""
:default "TODO-RETIREMENT-INVOICE"] :default "TODO-RETIREMENT-INVOICE"]
[nil "--project EMPLOYEE:PROJECT" "Allocate an employee to a specific project, eg. \"Doe-Jane:Outreachy\". Use once for each employee." [nil "--project EMPLOYEE:PROJECT" "Allocate employee to project, eg. \"Doe-Jane:Outreachy\""
:multi true :multi true
:validate [#(= 2 (count %)) "Must be of the form \"name:project\""] :validate [#(= 2 (count %)) "Must be of the form \"name:project\""]
:parse-fn #(str/split % #":") :parse-fn #(str/split % #":")
:default {} :default {}
:assoc-fn (fn [m k [name proj]] (assoc-in m [k name] proj))] :assoc-fn (fn [m k [name proj]] (assoc-in m [k name] proj))]
[nil "--demo" "Produce demo output based made-up payroll data. Useful for documentation."] [nil "--demo" "Produce demo output based made-up payroll data"]
["-h" "--help"]]) ["-h" "--help"]])
(defn unmatched-employees (defn unmatched-employees
@ -57,9 +55,10 @@
"Run the import with a map of options." "Run the import with a map of options."
[options] [options]
(let [options (if (:demo options) (merge options demo-options) options) (let [options (if (:demo options) (merge options demo-options) options)
{:keys [date period pay-receipt-no pay-invoice-no total-fees project]} options {:keys [csv date period pay-receipt-no pay-invoice-no total-fees project]} options
{:keys [fees-receipt-no fees-invoice-no retirement-receipt-no retirement-invoice-no]} options {:keys [fees-receipt-no fees-invoice-no retirement-receipt-no retirement-invoice-no]} options
records (import/read-csv (:csv options)) records (with-open [reader (io/reader csv)]
(import/read-csv reader))
imported (concat (import/net-pay date period pay-invoice-no project records) imported (concat (import/net-pay date period pay-invoice-no project records)
(import/individual-taxes date period pay-invoice-no retirement-invoice-no project records) (import/individual-taxes date period pay-invoice-no retirement-invoice-no project records)
(import/employer-taxes date period pay-invoice-no project records) (import/employer-taxes date period pay-invoice-no project records)
@ -67,32 +66,49 @@
(import/taxes-ach-debit date period pay-receipt-no pay-invoice-no project records) (import/taxes-ach-debit date period pay-receipt-no pay-invoice-no project records)
(import/fees date period fees-receipt-no fees-invoice-no total-fees project records) (import/fees date period fees-receipt-no fees-invoice-no total-fees project records)
(import/retirement date period retirement-receipt-no retirement-invoice-no records)) (import/retirement date period retirement-receipt-no retirement-invoice-no records))
unmatched (unmatched-employees records project)] unmatched (unmatched-employees records project)
[imported unmatched])) warnings (import/warnings records)]
[imported unmatched warnings]))
(defn usage [summary]
(str
"Usage: java -jar payroll-importer.jar [OPTIONS] [CSV]\n\n"
"Options include:\n\n"
summary
"\n\n"
"Use --project once for each employee."))
(defn -main (defn -main
"Run the CLI interface." "Run the CLI interface."
[& args] [& args]
(let [{:keys [options errors summary]} (parse-opts args cli-options) (let [{:keys [options arguments errors summary]} (cli/parse-opts args cli-options)
no-csv-or-demo? (not (or (contains? options :csv) (contains? options :demo))) csv (first arguments)
options (assoc options :csv csv)
demo? (contains? options :demo)
neither-csv-or-demo? (and (not csv) (not demo?))
csv-doesnt-exist? (and (not demo?) csv (not (-> csv io/file .exists)))
errors (cond-> errors errors (cond-> errors
no-csv-or-demo? (conj "Please provide a CSV file with \"--csv FILE\" or try \"--demo\""))] neither-csv-or-demo? (conj "Please provide a CSV file argument or try \"--demo\"")
csv-doesnt-exist? (conj (str "CSV file \"" csv "\" does not exist")))]
(when (:help options) (when (:help options)
(println summary) (println (usage summary))
(System/exit 0)) (System/exit 0))
(when errors (when errors
(println (println
(str "The following errors occurred:\n\n" (str "The following errors occurred:\n\n"
(str/join \newline errors))) (str/join \newline errors)))
(System/exit 1)) (System/exit 1))
(let [[imported unmatched] (run options)] (let [[imported unmatched warnings] (run options)]
(when (seq unmatched) (when (seq unmatched)
(println (println
(str "Could not find these employees in the payroll:\n\n" (str "Could not find these employees in the payroll:\n\n"
(str/join ", " unmatched))) (str/join ", " unmatched)))
(System/exit 1)) (System/exit 1))
(doseq [i imported] (doseq [i imported]
(println (import/render-transaction i)))))) (println (import/render-transaction i)))
(when (seq warnings)
(.println System/err "WARNINGS:\n")
(.println System/err (str/join "\n" warnings))))))
(comment (comment
;; Examples to exercise the importer during development. ;; Examples to exercise the importer during development.
@ -103,7 +119,9 @@
;; These examples are not included with the code for privacy reasons. ;; These examples are not included with the code for privacy reasons.
(require '[examples]) (require '[examples])
(def records (import/read-csv "/home/ben/Downloads/2024-01-29_Pay-Item-Details_2024-01.csv")) (def records (with-open [reader (io/reader "/home/ben/downloads/2026-01-28_Pay-Item-Details_2026-01.csv")]
(import/read-csv reader)))
(def imported (def imported
(concat (import/net-pay "2024-01-31" "January 2024" "rt:19462/685751" {} records) (concat (import/net-pay "2024-01-31" "January 2024" "rt:19462/685751" {} records)
(import/individual-taxes "2024-01-31" "January 2024" "rt:19462/685751" "rt:19403/685602" {} records) (import/individual-taxes "2024-01-31" "January 2024" "rt:19462/685751" "rt:19403/685602" {} records)

View file

@ -19,23 +19,18 @@
(defn read-csv (defn read-csv
"Read in CSV and return a vector of maps with only the fields we want. "Read in CSV and return a vector of maps with only the fields we want.
Merges the various number fields into a single \"amount\" field." Merges the various number fields into a single \"amount\" field. Per
[filename] clojure.data.csv, `reader` can be a string or java.io.Reader."
(with-open [reader (io/reader filename)] [reader]
(doall (doall
(for [[_ name _ category type & totals] (csv/read-csv reader)] (for [[_ name _ category type & totals] (csv/read-csv reader)]
{:name (employee-name->entity-tag name) {:name (employee-name->entity-tag name)
:category category :category category
:type type :type type
:amount (apply max (map bigdec (remove str/blank? totals)))})))) :amount (reduce max 0 (map bigdec (remove str/blank? totals)))})))
(defn- cat->payroll-type (def cat->payroll
"Map the CSV withholding categories to Beancount payroll-type tags." {"Fed Income Tax" "US:Tax:Income"
[cat]
;; May need to become config, similar to names. Should prompt you if no
;; mapping exists to avoid mistakes.
(case cat
"Fed Income Tax" "US:Tax:Income"
"Medicare" "US:Tax:Medicare" "Medicare" "US:Tax:Medicare"
"IL Income Tax" "US:IL:Tax:Income" "IL Income Tax" "US:IL:Tax:Income"
"NY Income Tax" "US:NY:Tax:Income" "NY Income Tax" "US:NY:Tax:Income"
@ -53,8 +48,38 @@
"IL Unemploy" "IL:Unemployment" "IL Unemploy" "IL:Unemployment"
"NY Unemploy" "NY:Unemployment" "NY Unemploy" "NY:Unemployment"
"OR Unemploy" "OR:Unemployment" "OR Unemploy" "OR:Unemployment"
"NY Re-empl Svc" "US:NY:Reempt" "NY Re-empl Svc" "US:NY:Reempt"})
cat))
(defn- cat->payroll-type
"Map the CSV withholding categories to Beancount payroll-type tags."
[cat]
;; May need to become config, similar to names. Should prompt you if no
;; mapping exists to avoid mistakes.
(get cat->payroll cat cat))
(def known-category
"These are all CSV categories that we know about."
(into #{"403b ER match"
"403b EE Pretax"
"Exp Reimb Non Tax"
"Net Pay"
"Salary"}
(keys cat->payroll)))
(defn non-zero-unknown-field?
"Detect non-zero unknown fields.
New field categories are probably safe enough if they have no amount or a zero
amount listed. If not, we probably need to investigate and figure out how to
incorporate these into the books."
[record]
(when (and (not (known-category (:category record)))
(not (zero? (:amount record))))
record))
(defn warnings [records]
(->> records
(filter non-zero-unknown-field?)
(map #(str "Unexpected non-zero \"" (:category %) "\" category found for " (:name %)))))
(defn- assoc-project (defn- assoc-project
"Conditionally adds a specific project tag to metadata. "Conditionally adds a specific project tag to metadata.

View file

@ -3,19 +3,35 @@
(ns import-test (ns import-test
(:require [import] (:require [import]
[clojure.java.io] [clojure.java.io :as io]
[clojure.string :as str] [clojure.string :as str]
[clojure.test :as t :refer [deftest is are]])) [clojure.test :as t :refer [deftest is are]]))
;; Run all our functions over two slightly different data examples. 2024
;; uses "Liability" where 2025 uses "Liability Expense". 2026 includes blank
;; PFML records.
(def paychex-csv-2024
;; clojure.java.io/resource locates a file in the classpath
(with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2024.csv"))]
(import/read-csv reader)))
(def paychex-csv-2025
(with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2025.csv"))]
(import/read-csv reader)))
;; Run all our functions over two slightly different data examples. 2024 uses (deftest handles-blank-amount
;; "Liability" where 2025 uses "Liability Expense". (let [amount (-> "Example Co Inc,\"Citizen, Jill B\",2,PFML ER PU,Fringe Benefits,,,,,,,,,,,\n"
(def paychex-csv-2024 (->> "example-paychex-pay-item-details-2024.csv" import/read-csv
clojure.java.io/resource first
import/read-csv)) :amount)]
(def paychex-csv-2025 (->> "example-paychex-pay-item-details-2025.csv" (is (= amount 0))))
clojure.java.io/resource
import/read-csv)) (deftest warns-about-non-zero-pfml-records
(let [records [{:name "Doe-Jane"
:category "PFML ER PU"
:type "Fringe Benefits"
:amount 1M}]
warnings (import/warnings records)]
(is (= (count warnings) 1) "Expected a non-zero PFML warning")))
(deftest render-transaction (deftest render-transaction
(let [transaction '{:date "DATE" (let [transaction '{:date "DATE"