From 08585f7fca0618a7744aa43cb7b2d89fcabe14e2 Mon Sep 17 00:00:00 2001 From: Ben Sturmfels Date: Fri, 13 Feb 2026 18:30:58 +1100 Subject: [PATCH] Make import/read-csv more testable, warn about any unexpected categories Previously import/read-csv opened a file on the filesystem, making it hard to test. Now it takes a string or java.io.Reader. The warning about non-zero records with category "PFML ER PU" has now been generalised to flag any non-zero records with an unknown category. --- .../example-paychex-pay-item-details-2026.csv | 29 ------ src/core.clj | 10 +- src/import.clj | 97 +++++++++++-------- test/import_test.clj | 32 +++--- 4 files changed, 79 insertions(+), 89 deletions(-) delete mode 100644 resources/example-paychex-pay-item-details-2026.csv diff --git a/resources/example-paychex-pay-item-details-2026.csv b/resources/example-paychex-pay-item-details-2026.csv deleted file mode 100644 index 2bc8bca..0000000 --- a/resources/example-paychex-pay-item-details-2026.csv +++ /dev/null @@ -1,29 +0,0 @@ -Example Co Inc,"Citizen, Jack A",1,403b EE Pretax,Retirement,,,,,,1000,,,,, -Example Co Inc,"Citizen, Jack A",1,Exp Reimb Non Tax,Reimbursement,,,,,50,,,,,, -Example Co Inc,"Citizen, Jack A",1,Fed Income Tax,Withholding,,,,,,,,470.22,,, -Example Co Inc,"Citizen, Jack A",1,Fed Unemploy,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jack A",1,Medicare,Withholding,,,,,,,,88,,, -Example Co Inc,"Citizen, Jack A",1,Medicare,Liability Expense,,,,,,,,,88,, -Example Co Inc,"Citizen, Jack A",1,Net Pay,Net Pay,,,,,,,,,,,4184.49 -Example Co Inc,"Citizen, Jack A",1,Salary,Earnings,6068.99,,0,,,,,,,, -Example Co Inc,"Citizen, Jack A",1,Social Security,Withholding,,,,,,,,376.28,,, -Example Co Inc,"Citizen, Jack A",1,Social Security,Liability Expense,,,,,,,,,376.28,, -Example Co Inc,"Citizen, Jack A",1,TN Unemploy,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jill B",2,403b EE Pretax,Retirement,,,,,,820,,,,, -Example Co Inc,"Citizen, Jill B",2,Exp Reimb Non Tax,Reimbursement,,,,,50,,,,,, -Example Co Inc,"Citizen, Jill B",2,Fed Income Tax,Withholding,,,,,,,,681.01,,, -Example Co Inc,"Citizen, Jill B",2,Fed Unemploy,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jill B",2,Medicare,Liability Expense,,,,,,,,,99.28,, -Example Co Inc,"Citizen, Jill B",2,Medicare,Withholding,,,,,,,,99.29,,, -Example Co Inc,"Citizen, Jill B",2,Net Pay,Net Pay,,,,,,,,,,,4397.39 -Example Co Inc,"Citizen, Jill B",2,OR Disability PFL,Withholding,,,,,,,,41.08,,, -Example Co Inc,"Citizen, Jill B",2,OR Disability PFL,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jill B",2,OR EE Work Bene,Withholding,,,,,,,,0,,, -Example Co Inc,"Citizen, Jill B",2,OR ER Work Bene,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jill B",2,OR Income Tax,Withholding,,,,,,,,427.8,,, -Example Co Inc,"Citizen, Jill B",2,OR TRANS STT,Withholding,,,,,,,,6.03,,, -Example Co Inc,"Citizen, Jill B",2,OR Unemploy,Liability Expense,,,,,,,,,0,, -Example Co Inc,"Citizen, Jill B",2,PFML ER PU,Fringe Benefits,,,,,,,,,,, -Example Co Inc,"Citizen, Jill B",2,Salary,Earnings,6847.12,,0,,,,,,,, -Example Co Inc,"Citizen, Jill B",2,Social Security,Liability Expense,,,,,,,,,424.52,, -Example Co Inc,"Citizen, Jill B",2,Social Security,Withholding,,,,,,,,424.52,,, diff --git a/src/core.clj b/src/core.clj index bbc82ec..62505a5 100644 --- a/src/core.clj +++ b/src/core.clj @@ -55,9 +55,10 @@ "Run the import with a map of options." [options] (let [options (if (:demo options) (merge options demo-options) options) - {:keys [date period pay-receipt-no pay-invoice-no total-fees project]} options + {:keys [csv date period pay-receipt-no pay-invoice-no total-fees project]} options {:keys [fees-receipt-no fees-invoice-no retirement-receipt-no retirement-invoice-no]} options - records (import/read-csv (:csv options)) + records (with-open [reader (io/reader csv)] + (import/read-csv reader)) imported (concat (import/net-pay date period pay-invoice-no project records) (import/individual-taxes date period pay-invoice-no retirement-invoice-no project records) (import/employer-taxes date period pay-invoice-no project records) @@ -107,7 +108,7 @@ (println (import/render-transaction i))) (when (seq warnings) (.println System/err "WARNINGS:\n") - (.println System/err (str/join "/n" warnings)))))) + (.println System/err (str/join "\n" warnings)))))) (comment ;; Examples to exercise the importer during development. @@ -118,7 +119,8 @@ ;; These examples are not included with the code for privacy reasons. (require '[examples]) - (def records (import/read-csv "/home/ben/downloads/2026-01-28_Pay-Item-Details_2026-01.csv")) + (def records (with-open [reader (io/reader "/home/ben/downloads/2026-01-28_Pay-Item-Details_2026-01.csv")] + (import/read-csv reader))) (def imported (concat (import/net-pay "2024-01-31" "January 2024" "rt:19462/685751" {} records) diff --git a/src/import.clj b/src/import.clj index 65c009b..9804260 100644 --- a/src/import.clj +++ b/src/import.clj @@ -19,54 +19,67 @@ (defn read-csv "Read in CSV and return a vector of maps with only the fields we want. - Merges the various number fields into a single \"amount\" field." - [filename] - (with-open [reader (io/reader filename)] - (doall - (for [[_ name _ category type & totals] (csv/read-csv reader)] - {:name (employee-name->entity-tag name) - :category category - :type type - :amount (reduce max 0 (map bigdec (remove str/blank? totals)))})))) + Merges the various number fields into a single \"amount\" field. Per + clojure.data.csv, `reader` can be a string or java.io.Reader." + [reader] + (doall + (for [[_ name _ category type & totals] (csv/read-csv reader)] + {:name (employee-name->entity-tag name) + :category category + :type type + :amount (reduce max 0 (map bigdec (remove str/blank? totals)))}))) -(defn non-zero-pfml? [record] - "PFML is a new field as of 2026, but it's only an issue if there's a non-zero - amount listed. In that case we'll need to figure out how to record this in the - books." - (and (= (:category record) "PFML ER PU") (not (zero? (:amount record))))) - -(defn warnings [records] - (->> records - (filter non-zero-pfml?) - (map :name) - (map #(str "Non-zero \"PFML ER PU\" record found for " %)))) +(def cat->payroll + {"Fed Income Tax" "US:Tax:Income" + "Medicare" "US:Tax:Medicare" + "IL Income Tax" "US:IL:Tax:Income" + "NY Income Tax" "US:NY:Tax:Income" + "NYC Income Tax" "US:NY:Tax:NYC" + "OR Income Tax" "US:OR:Tax:Income" + "OH Income Tax" "US:OH:Tax:Income" + "PNTSD Income Tax" "US:OH:Tax:PNTSD" + "COLMB Income Tax" "US:OH:Tax:COLUMB" + "Social Security" "US:Tax:SocialSecurity" + "NY Disability" "US:NY:Disability" + "OR Disability PFL" "US:OR:Disability:PFL" + "NY Disability PFL" "US:NY:Disability:PFL" + "OR TRANS STT" "US:OR:Tax:STT" + "Fed Unemploy" "US:Unemployment" + "IL Unemploy" "IL:Unemployment" + "NY Unemploy" "NY:Unemployment" + "OR Unemploy" "OR:Unemployment" + "NY Re-empl Svc" "US:NY:Reempt"}) (defn- cat->payroll-type "Map the CSV withholding categories to Beancount payroll-type tags." [cat] ;; May need to become config, similar to names. Should prompt you if no ;; mapping exists to avoid mistakes. - (case cat - "Fed Income Tax" "US:Tax:Income" - "Medicare" "US:Tax:Medicare" - "IL Income Tax" "US:IL:Tax:Income" - "NY Income Tax" "US:NY:Tax:Income" - "NYC Income Tax" "US:NY:Tax:NYC" - "OR Income Tax" "US:OR:Tax:Income" - "OH Income Tax" "US:OH:Tax:Income" - "PNTSD Income Tax" "US:OH:Tax:PNTSD" - "COLMB Income Tax" "US:OH:Tax:COLUMB" - "Social Security" "US:Tax:SocialSecurity" - "NY Disability" "US:NY:Disability" - "OR Disability PFL" "US:OR:Disability:PFL" - "NY Disability PFL" "US:NY:Disability:PFL" - "OR TRANS STT" "US:OR:Tax:STT" - "Fed Unemploy" "US:Unemployment" - "IL Unemploy" "IL:Unemployment" - "NY Unemploy" "NY:Unemployment" - "OR Unemploy" "OR:Unemployment" - "NY Re-empl Svc" "US:NY:Reempt" - cat)) + (get cat->payroll cat cat)) + +(def known-category + "These are all CSV categories that we know about." + (into #{"403b ER match" + "403b EE Pretax" + "Exp Reimb Non Tax" + "Net Pay" + "Salary"} + (keys cat->payroll))) + +(defn non-zero-unknown-field? + "Detect non-zero unknown fields. + New field categories are probably safe enough if they have no amount or a zero + amount listed. If not, we probably need to investigate and figure out how to + incorporate these into the books." + [record] + (when (and (not (known-category (:category record))) + (not (zero? (:amount record)))) + record)) + +(defn warnings [records] + (->> records + (filter non-zero-unknown-field?) + (map #(str "Unexpected non-zero \"" (:category %) "\" category found for " (:name %))))) (defn- assoc-project "Conditionally adds a specific project tag to metadata. @@ -179,7 +192,7 @@ [date period pay-invoice-no retirement-invoice-no projects records] (for [[name employee-records] (group-by :name records) :let [template {:date date :desc (format "Monthly Payroll - %s - TAXES - %s" period name) - :meta (assoc-project + :meta (assoc-project projects name {:project "Conservancy" :program "Conservancy:Payroll" diff --git a/test/import_test.clj b/test/import_test.clj index 2b951f5..024d45d 100644 --- a/test/import_test.clj +++ b/test/import_test.clj @@ -3,22 +3,27 @@ (ns import-test (:require [import] - [clojure.java.io] + [clojure.java.io :as io] [clojure.string :as str] [clojure.test :as t :refer [deftest is are]])) +;; Run all our functions over two slightly different data examples. 2024 +;; uses "Liability" where 2025 uses "Liability Expense". 2026 includes blank +;; PFML records. +(def paychex-csv-2024 + ;; clojure.java.io/resource locates a file in the classpath + (with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2024.csv"))] + (import/read-csv reader))) +(def paychex-csv-2025 + (with-open [reader (io/reader (io/resource "example-paychex-pay-item-details-2025.csv"))] + (import/read-csv reader))) -;; Run all our functions over two slightly different data examples. 2024 uses -;; "Liability" where 2025 uses "Liability Expense". -(def paychex-csv-2024 (->> "example-paychex-pay-item-details-2024.csv" - clojure.java.io/resource - import/read-csv)) -(def paychex-csv-2025 (->> "example-paychex-pay-item-details-2025.csv" - clojure.java.io/resource - import/read-csv)) -(def paychex-csv-2026 (->> "example-paychex-pay-item-details-2026.csv" - clojure.java.io/resource - import/read-csv)) +(deftest handles-blank-amount + (let [amount (-> "Example Co Inc,\"Citizen, Jill B\",2,PFML ER PU,Fringe Benefits,,,,,,,,,,,\n" + import/read-csv + first + :amount)] + (is (= amount 0)))) (deftest warns-about-non-zero-pfml-records (let [records [{:name "Doe-Jane" @@ -113,8 +118,7 @@ DATE txn \"Paychex\" \"Monthly Payroll - PERIOD - Fee\" :meta {:entity "Citizen-Jill" :tax-implication "Reimbursement"}})}]] (are [records] (= expected (import/net-pay "DATE" "PERIOD" "TODO-PAY-INVOICE" {} records)) paychex-csv-2024 - paychex-csv-2025 - paychex-csv-2026))) + paychex-csv-2025))) (deftest individual-taxes (let [expected '({:date "DATE"