cleaned up some procedure logic in paycor.go and added documentation to csvWorker.go

master
dtookey 3 years ago
parent a87213d98f
commit f3446b5c08

@ -54,6 +54,10 @@ var (
NewHeaders = []string{"Badge #", "Brv", "Hol", "OT", "Reg", "Service", "Sick", "Vac", "Total"}
)
const (
defaultReportSize = 200
)
func UpdatePaycorReports(pathlike string) error {
updateDirectory()
err := updatePaycorHours(pathlike)
@ -78,14 +82,17 @@ func updatePaycorHours(pathlike string) error {
cx := &db.ConnectorGeneric{}
tableRunner := db.NewRunner("create-mercury-hrPaycorHours-table.sql", db.MercuryDatabaseName)
cx.ExecuteSqlScript(tableRunner)
itemAccumulator := make([]*PaycorHours, 0, len(files)*defaultReportSize)
if err != nil {
return err
}
for _, v := range files {
if isTempFile(v.Name()) {
continue
}
filePath := path.Join(pathlike, v.Name())
worker := mercuryUtil.NewCsvWorker(filePath, func() *PaycorHours { return &PaycorHours{} }, true)
items := make([]*PaycorHours, 0, 200)
items := make([]*PaycorHours, 0, defaultReportSize)
err := worker.Process(&items)
if err != nil {
return err
@ -93,8 +100,9 @@ func updatePaycorHours(pathlike string) error {
for _, i := range items {
i.WeekOf = dateFromFileName(v.Name())
}
db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &items)
itemAccumulator = append(itemAccumulator, items...)
}
db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &itemAccumulator)
return nil
}
@ -170,9 +178,9 @@ func (hl *PaycorHoursLegacy) Set(header string, content string) error {
func (h *PaycorHours) Set(header string, content string) error {
switch header {
case "Badge #":
i, err := strconv.Atoi(content)
i, err := parseInt(content)
if err != nil {
panic(err)
return err
}
h.EEid = i
case "Brv":
@ -308,6 +316,18 @@ func parseFloat(content string) (float64, error) {
}
}
func parseInt(content string) (int, error) {
if len(content) == 0 {
return -1, nil
} else {
i, err := strconv.Atoi(content)
if err != nil {
return -1, err
}
return i, nil
}
}
func dateFromFileName(filename string) string {
parts := strings.Split(filename, "_")
block := parts[0]
@ -316,3 +336,7 @@ func dateFromFileName(filename string) string {
date := block[6:]
return fmt.Sprintf("%s-%s-%s", year, month, date)
}
func isTempFile(filepath string) bool {
return filepath[len(filepath)-1:] == "#"
}

@ -3,6 +3,7 @@ package mercuryUtil
import (
"bytes"
"errors"
"fmt"
"os"
"strconv"
"strings"
@ -13,6 +14,7 @@ type (
Set(header string, content string) error
}
//CsvWorker DO NOT INITIALIZE THIS ON YOUR OWN UNLESS YOU KNOW EXACTLY WHAT YOU ARE DOING. Use NewCsvWorker instead
CsvWorker[K CsvConvertable] struct {
Filepath string
cachedData *[][]string
@ -20,6 +22,8 @@ type (
MakeNew func() K
}
// Deprecated: SanitationRule this is used in one spot and one spot only. Don't use it unless you know exactly what
// you're doing
SanitationRule struct {
ColumnName string
MappingFunction func(string) string
@ -27,17 +31,27 @@ type (
)
var (
//UfeffRule left over from a prior attempt. however, it encapsulates logic that is useful for sanitizing output from
//Paycor.
UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string {
var blank []byte
bbuff := []byte(s)
bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
//I have no idea what 0xef, 0xbb, 0xbf is, but it's f---ing things up
bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)
return string(bbuff)
}}
)
// NewCsvWorker This absolute mad-lad of a struct will save your bacon. It takes in a path to a csv report, a constructor
// function to allocate individual structs (handling allocation is a potential area for improvement), and a boolean
//determining whether the first row is headers
// pathlike: string regular filepath to a specific csv file "/home/user/work/report.csv"
// factory: func() K a function to use for initializing new structs
// headerInFirstRow: boolean value for whether to use the first row as header values. If this is set to false, the
// Set(header, content) interface will provide 0..len(headers) as a string value to determine which column is being
// used
func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] {
worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow}
worker.MakeNew = factory
@ -45,6 +59,8 @@ func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInF
return &worker
}
// ensureData: This will memoize the structured string data from the csv library workers. This is not a pure function --
// it will mutate the CsvWorker struct when called
func (w *CsvWorker[K]) ensureData() error {
if w.cachedData == nil {
data, err := LoadCsv(w.Filepath)
@ -56,6 +72,9 @@ func (w *CsvWorker[K]) ensureData() error {
return nil
}
// GetHeaders will read the *entire* csv into memory and memoize the results in the CsvWorker struct. The default behavior
// will use [UfeffRule] to sanitize headers, so watch out if you're using zero-width non-blocking webfont spaces in your
// sourcecode
func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
err := w.ensureData()
if err != nil {
@ -68,6 +87,11 @@ func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
return (*w.cachedData)[0], nil
}
// Process will take
// artifacts: *[]K this container is used to store the results. Part of the reason for this is that resource allocation
// in generic functions is a bit more complex than I can handle with my current grasp of generic functions. This procedure
// will return an error if and only if ensureData() fails. Otherwise, it will panic to preserve the stack trace to aid
// in debugging.
func (w *CsvWorker[K]) Process(artifacts *[]K) error {
err := w.ensureData()
if err != nil {
@ -76,7 +100,7 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
var headers []string
var startingIdx int
//intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
//initialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
data := *w.cachedData
if w.headerInFirstRow {
headers = data[0]
@ -102,7 +126,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
header := headers[i]
err := obj.Set(header, cell)
if err != nil {
return err
fmt.Println("[Error] in " + w.Filepath)
panic(err)
}
}
*artifacts = append(*artifacts, obj)
@ -110,6 +135,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
return nil
}
// isEmptyRow: This procedure is a bit piggy. It naively scans the provided array and returns true iff every value returns
// len(0)
func isEmptyRow(arr []string) bool {
for _, v := range arr {
if len(v) > 0 {
@ -119,6 +146,9 @@ func isEmptyRow(arr []string) bool {
return true
}
// Deprecated: GetHeadersFromCsvFilepath this reads exactly 1024 bytes into the file to attempt to find the first newline
// character. once it does, it calls a hard strings.Split(row, ",") to get the headers into an array. It was a quick
// hack that should not be used. OR USED AT YOUR OWN RISK DON'T SAY I DIDN'T WARN YOU
func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) {
//this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later
f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)

@ -14,11 +14,11 @@ import (
func main() {
csvLocation := "/home/dtookey/work/clarity-reporting/paycor/"
start := time.Now()
//migrateOldSheets()
err := hr.UpdatePaycorReports(csvLocation)
if err != nil {
panic(err)
}
finish := time.Now()
fmt.Printf("Runtime: %dus\n", finish.Sub(start).Microseconds())
}
@ -51,7 +51,6 @@ func processHoursReports() error {
if mercuryUtil.StringArrEquals(headers, hr.NewHeaders) {
//just copy the spreadsheet. we don't need any extra processing
b, err := os.ReadFile(filePath)
if err != nil {
return err

Loading…
Cancel
Save