Compare commits

..

3 commits

Author SHA1 Message Date
dc89a21839
Upgrade deps, note use of upstream Clojure 2026-02-17 13:11:46 +11:00
08585f7fca
Make import/read-csv more testable, warn about any unexpected categories
Previously import/read-csv opened a file on the filesystem, making it hard to
test. Now it takes a string or java.io.Reader.

The warning about non-zero records with category "PFML ER PU" has now been
generalised to flag any non-zero records with an unknown category.
2026-02-13 18:30:58 +11:00
23cc4657ee
Make CSV file an argument, handle blank amounts and add non-zero PFML warning
Was producing an error due to new PFML field with blank amounts.
2026-02-13 13:36:15 +11:00
8 changed files with 137 additions and 73 deletions

View file

@ -18,13 +18,13 @@ Run a demo with two example employees, Jack and Jill Citizen:
Provide your own payroll data with:
java -jar payroll-importer-x.x.x-standalone.jar --csv resources/example-paychex-pay-item-details.csv --total-fees 206.50
java -jar payroll-importer-x.x.x-standalone.jar --total-fees 206.50 resources/example-paychex-pay-item-details.csv
In the above, various values such as the date, time period covered and
receipt/invoice values show "TODO" placeholders that you are expected to fill in
later. If you prefer, you can provide any/all of these explicitly:
java -jar payroll-importer-x.x.x-standalone.jar --csv resources/example-paychex-pay-item-details.csv --date 2023-12-29 --period 'December 2023' --total-fees 206.50 --pay-receipt-no rt:19462/674660 --pay-invoice-no rt:19403/675431 --fees-receipt-no rt:19459/675387 --fees-invoice-no rt:19459/674887 --retirement-receipt-no rt:19403/676724 --retirement-invoice-no rt:19403/675431
java -jar payroll-importer-x.x.x-standalone.jar --date 2023-12-29 --period 'December 2023' --total-fees 206.50 --pay-receipt-no rt:19462/674660 --pay-invoice-no rt:19403/675431 --fees-receipt-no rt:19459/675387 --fees-invoice-no rt:19459/674887 --retirement-receipt-no rt:19403/676724 --retirement-invoice-no rt:19403/675431 resources/example-paychex-pay-item-details.csv
You can test the output in Beancount by adding the following header entries to define the accounts:
@ -64,7 +64,7 @@ Run tests with:
You can run without building using:
bin/dev --csv resources/example-paychex-pay-item-details.csv --total-fees 206.50
bin/dev --total-fees 206.50 resources/example-paychex-pay-item-details.csv
The project is set up for development in Emacs and CIDER-mode. Open a source
file and run `cider-jack-in`.

View file

@ -1,4 +1,4 @@
#!/usr/bin/env sh
# Run the program without building it
clojure -M -m core "$@"
clojure -M:dev "$@"

View file

@ -14,8 +14,6 @@
(defn uber [_]
(clean nil)
(b/copy-dir {:src-dirs ["src" "resources"]
:target-dir class-dir})
(b/copy-dir {:src-dirs ["src" "resources"]
:target-dir class-dir})
(b/compile-clj {:basis @basis

View file

@ -1,12 +1,19 @@
{:paths ["src" "resources" "private"] ;; Private is not included in the build
:deps {
org.clojure/clojure {:mvn/version "1.11.1"}
org.clojure/data.csv {:mvn/version "1.0.1"}
org.clojure/tools.cli {:mvn/version "1.1.230"}}
;; Note that the clojure version below will be installed from upstream
;; rather than using the system version.
org.clojure/clojure {:mvn/version "1.12.4"}
org.clojure/data.csv {:mvn/version "1.1.1"}
org.clojure/tools.cli {:mvn/version "1.3.250"}}
:aliases
{:dev {:extra-deps {lambdaisland/deep-diff2 {:mvn/version "2.10.211"}}}
{:dev {:extra-deps {lambdaisland/deep-diff2 {:mvn/version "2.12.219"}}
:main-opts ["-m" "core"]
;; Saves ~ 1 second of startup time - 1.5 sec on my laptop. After
;; building an uberjar and running with these options, it drops to about
;; 750ms.
:jvm-opts ["-XX:TieredStopAtLevel=1" "-XX:+TieredCompilation"]}
:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.87.1366"}}
:main-opts ["-m" "kaocha.runner"]}
;; Run with clj -T:build function-in-build
:build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"}}
:build {:deps {io.github.clojure/tools.build {:mvn/version "0.10.12"}}
:ns-default build}}}

View file

@ -4,10 +4,10 @@
;; develop/build the program. Use it with `guix shell --manifest=manifest.scm`.
(specifications->manifest
(list
;; No issues running this OpenJDK 21 program on Debian Stable (OpenJDK 17).
"openjdk@21"
;; Works fine with clojure-tools from Guix.
;; No issues running this OpenJDK 21 program on Debian Bookworm (OpenJDK 17).
"openjdk@21:jdk"
"clojure-tools"
"rlwrap"
"clj-kondo"
"beancount"
))

View file

@ -6,17 +6,15 @@
(:require [clojure.java.io :as io]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.cli :refer [parse-opts]]
[clojure.tools.cli :as cli]
[import :as import])
(:gen-class))
(def cli-options
[[nil "--csv FILE" "Pay Item Details CSV report"
:validate [#(-> % io/file .exists) "File does not exist"]]
[nil "--date DATE" "Date used for the transactions (YYYY-MM-DD)"
[[nil "--date DATE" "Date used for the transactions (YYYY-MM-DD)"
:validate [#(re-matches #"\d{4}-\d{2}-\d{2}" %) "Must be of format YYYY-MM-DD"]
:default "TODO-DATE"]
[nil "--period PERIOD" "Month/year covered by the pay run eg. \"December 2023\""
[nil "--period PERIOD" "Month/year of the pay run eg. \"December 2023\""
:default "TODO-PERIOD"]
[nil "--total-fees NUM" "Total fee charged by Paychex, eg. \"206.50\""
:parse-fn bigdec
@ -33,13 +31,13 @@
:default "TODO-RETIREMENT-RECEIPT"]
[nil "--retirement-invoice-no REFERENCE" "Retirement receipt number, eg. \"rt:111/222\""
:default "TODO-RETIREMENT-INVOICE"]
[nil "--project EMPLOYEE:PROJECT" "Allocate an employee to a specific project, eg. \"Doe-Jane:Outreachy\". Use once for each employee."
[nil "--project EMPLOYEE:PROJECT" "Allocate employee to project, eg. \"Doe-Jane:Outreachy\""
:multi true
:validate [#(= 2 (count %)) "Must be of the form \"name:project\""]
:parse-fn #(str/split % #":")
:default {}
:assoc-fn (fn [m k [name proj]] (assoc-in m [k name] proj))]
[nil "--demo" "Produce demo output based made-up payroll data. Useful for documentation."]
[nil "--demo" "Produce demo output based made-up payroll data"]
["-h" "--help"]])
(defn unmatched-employees
@ -57,9 +55,10 @@
"Run the import with a map of options."
[options]
(let [options (if (:demo options) (merge options demo-options) options)
{:keys [date period pay-receipt-no pay-invoice-no total-fees project]} options
{:keys [csv date period pay-receipt-no pay-invoice-no total-fees project]} options
{:keys [fees-receipt-no fees-invoice-no retirement-receipt-no retirement-invoice-no]} options
records (import/read-csv (:csv options))
records (with-open [reader (io/reader csv)]
(import/read-csv reader))
imported (concat (import/net-pay date period pay-invoice-no project records)
(import/individual-taxes date period pay-invoice-no retirement-invoice-no project records)
(import/employer-taxes date period pay-invoice-no project records)
@ -67,32 +66,49 @@
(import/taxes-ach-debit date period pay-receipt-no pay-invoice-no project records)
(import/fees date period fees-receipt-no fees-invoice-no total-fees project records)
(import/retirement date period retirement-receipt-no retirement-invoice-no records))
unmatched (unmatched-employees records project)]
[imported unmatched]))
unmatched (unmatched-employees records project)
warnings (import/warnings records)]
[imported unmatched warnings]))
(defn usage [summary]
(str
"Usage: java -jar payroll-importer.jar [OPTIONS] [CSV]\n\n"
"Options include:\n\n"
summary
"\n\n"
"Use --project once for each employee."))
(defn -main
"Run the CLI interface."
[& args]
(let [{:keys [options errors summary]} (parse-opts args cli-options)
no-csv-or-demo? (not (or (contains? options :csv) (contains? options :demo)))
(let [{:keys [options arguments errors summary]} (cli/parse-opts args cli-options)
csv (first arguments)
options (assoc options :csv csv)
demo? (contains? options :demo)
neither-csv-or-demo? (and (not csv) (not demo?))
csv-doesnt-exist? (and (not demo?) csv (not (-> csv io/file .exists)))
errors (cond-> errors
no-csv-or-demo? (conj "Please provide a CSV file with \"--csv FILE\" or try \"--demo\""))]
neither-csv-or-demo? (conj "Please provide a CSV file argument or try \"--demo\"")
csv-doesnt-exist? (conj (str "CSV file \"" csv "\" does not exist")))]
(when (:help options)
(println summary)
(println (usage summary))
(System/exit 0))
(when errors
(println
(str "The following errors occurred:\n\n"
(str/join \newline errors)))
(System/exit 1))
(let [[imported unmatched] (run options)]
(let [[imported unmatched warnings] (run options)]
(when (seq unmatched)
(println
(str "Could not find these employees in the payroll:\n\n"
(str/join ", " unmatched)))
(System/exit 1))
(doseq [i imported]
(println (import/render-transaction i))))))
(println (import/render-transaction i)))
(when (seq warnings)
(.println System/err "WARNINGS:\n")
(.println System/err (str/join "\n" warnings))))))
(comment
;; Examples to exercise the importer during development.
@ -103,7 +119,9 @@
;; These examples are not included with the code for privacy reasons.
(require '[examples])
(def records (import/read-csv "/home/ben/Downloads/2024-01-29_Pay-Item-Details_2024-01.csv"))
(def records (with-open [reader (io/reader "/home/ben/downloads/2026-01-28_Pay-Item-Details_2026-01.csv")]
(import/read-csv reader)))
(def imported
(concat (import/net-pay "2024-01-31" "January 2024" "rt:19462/685751" {} records)
(import/individual-taxes "2024-01-31" "January 2024" "rt:19462/685751" "rt:19403/685602" {} records)

View file

@ -19,23 +19,18 @@
(defn read-csv
"Read in CSV and return a vector of maps with only the fields we want.
Merges the various number fields into a single \"amount\" field."
[filename]
(with-open [reader (io/reader filename)]
Merges the various number fields into a single \"amount\" field. Per
clojure.data.csv, `reader` can be a string or java.io.Reader."
[reader]
(doall
(for [[_ name _ category type & totals] (csv/read-csv reader)]
{:name (employee-name->entity-tag name)
:category category
:type type
:amount (apply max (map bigdec (remove str/blank? totals)))}))))
:amount (reduce max 0 (map bigdec (remove str/blank? totals)))})))
(defn- cat->payroll-type
"Map the CSV withholding categories to Beancount payroll-type tags."
[cat]
;; May need to become config, similar to names. Should prompt you if no
;; mapping exists to avoid mistakes.
(case cat
"Fed Income Tax" "US:Tax:Income"
(def cat->payroll
{"Fed Income Tax" "US:Tax:Income"
"Medicare" "US:Tax:Medicare"
"IL Income Tax" "US:IL:Tax:Income"
"NY Income Tax" "US:NY:Tax:Income"
@ -53,8 +48,38 @@
"IL Unemploy" "IL:Unemployment"
"NY Unemploy" "NY:Unemployment"
"OR Unemploy" "OR:Unemployment"
"NY Re-empl Svc" "US:NY:Reempt"
cat))
"NY Re-empl Svc" "US:NY:Reempt"})
(defn- cat->payroll-type
"Map the CSV withholding categories to Beancount payroll-type tags."
[cat]
;; May need to become config, similar to names. Should prompt you if no
;; mapping exists to avoid mistakes.
(get cat->payroll cat cat))
(def known-category
"These are all CSV categories that we know about."
(into #{"403b ER match"
"403b EE Pretax"
"Exp Reimb Non Tax"
"Net Pay"
"Salary"}
(keys cat->payroll)))
(defn non-zero-unknown-field?
"Detect non-zero unknown fields.
New field categories are probably safe enough if they have no amount or a zero
amount listed. If not, we probably need to investigate and figure out how to
incorporate these into the books."
[record]
(when (and (not (known-category (:category record)))
(not (zero? (:amount record))))
record))
(defn warnings [records]
(->> records
(filter non-zero-unknown-field?)
(map #(str "Unexpected non-zero \"" (:category %) "\" category found for " (:name %)))))
(defn- assoc-project
"Conditionally adds a specific project tag to metadata.

View file

@ -3,19 +3,35 @@
(ns import-test
(:require [import]
[clojure.java.io]
[clojure.java.io :as io]
[clojure.string :as str]
[clojure.test :as t :refer [deftest is are]]))
;; Run all our functions over two slightly different data examples. 2024
;; uses "Liability" where 2025 uses "Liability Expense". 2026 includes blank
;; PFML records.
(def paychex-csv-2024
;; clojure.java.io/resource locates a file in the classpath
(with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2024.csv"))]
(import/read-csv reader)))
(def paychex-csv-2025
(with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2025.csv"))]
(import/read-csv reader)))
;; Run all our functions over two slightly different data examples. 2024 uses
;; "Liability" where 2025 uses "Liability Expense".
(def paychex-csv-2024 (->> "example-paychex-pay-item-details-2024.csv"
clojure.java.io/resource
import/read-csv))
(def paychex-csv-2025 (->> "example-paychex-pay-item-details-2025.csv"
clojure.java.io/resource
import/read-csv))
(deftest handles-blank-amount
(let [amount (-> "Example Co Inc,\"Citizen, Jill B\",2,PFML ER PU,Fringe Benefits,,,,,,,,,,,\n"
import/read-csv
first
:amount)]
(is (= amount 0))))
(deftest warns-about-non-zero-pfml-records
(let [records [{:name "Doe-Jane"
:category "PFML ER PU"
:type "Fringe Benefits"
:amount 1M}]
warnings (import/warnings records)]
(is (= (count warnings) 1) "Expected a non-zero PFML warning")))
(deftest render-transaction
(let [transaction '{:date "DATE"