diff --git a/src/hr/paycor.go b/src/hr/paycor.go index 335cf6c..1726dca 100644 --- a/src/hr/paycor.go +++ b/src/hr/paycor.go @@ -54,6 +54,10 @@ var ( NewHeaders = []string{"Badge #", "Brv", "Hol", "OT", "Reg", "Service", "Sick", "Vac", "Total"} ) +const ( + defaultReportSize = 200 +) + func UpdatePaycorReports(pathlike string) error { updateDirectory() err := updatePaycorHours(pathlike) @@ -78,14 +82,17 @@ func updatePaycorHours(pathlike string) error { cx := &db.ConnectorGeneric{} tableRunner := db.NewRunner("create-mercury-hrPaycorHours-table.sql", db.MercuryDatabaseName) cx.ExecuteSqlScript(tableRunner) - + itemAccumulator := make([]*PaycorHours, 0, len(files)*defaultReportSize) if err != nil { return err } for _, v := range files { + if isTempFile(v.Name()) { + continue + } filePath := path.Join(pathlike, v.Name()) worker := mercuryUtil.NewCsvWorker(filePath, func() *PaycorHours { return &PaycorHours{} }, true) - items := make([]*PaycorHours, 0, 200) + items := make([]*PaycorHours, 0, defaultReportSize) err := worker.Process(&items) if err != nil { return err @@ -93,8 +100,9 @@ func updatePaycorHours(pathlike string) error { for _, i := range items { i.WeekOf = dateFromFileName(v.Name()) } - db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &items) + itemAccumulator = append(itemAccumulator, items...) } + db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &itemAccumulator) return nil } @@ -170,9 +178,9 @@ func (hl *PaycorHoursLegacy) Set(header string, content string) error { func (h *PaycorHours) Set(header string, content string) error { switch header { case "Badge #": - i, err := strconv.Atoi(content) + i, err := parseInt(content) if err != nil { - panic(err) + return err } h.EEid = i case "Brv": @@ -308,6 +316,18 @@ func parseFloat(content string) (float64, error) { } } +func parseInt(content string) (int, error) { + if len(content) == 0 { + return -1, nil + } else { + i, err := strconv.Atoi(content) + if err != nil { + return -1, err + } + return i, nil + } +} + func dateFromFileName(filename string) string { parts := strings.Split(filename, "_") block := parts[0] @@ -316,3 +336,7 @@ func dateFromFileName(filename string) string { date := block[6:] return fmt.Sprintf("%s-%s-%s", year, month, date) } + +func isTempFile(filepath string) bool { + return filepath[len(filepath)-1:] == "#" +} diff --git a/src/mercuryUtil/csvWorker.go b/src/mercuryUtil/csvWorker.go index e7c3cfb..23bcd87 100644 --- a/src/mercuryUtil/csvWorker.go +++ b/src/mercuryUtil/csvWorker.go @@ -3,6 +3,7 @@ package mercuryUtil import ( "bytes" "errors" + "fmt" "os" "strconv" "strings" @@ -13,6 +14,7 @@ type ( Set(header string, content string) error } + //CsvWorker DO NOT INITIALIZE THIS ON YOUR OWN UNLESS YOU KNOW EXACTLY WHAT YOU ARE DOING. Use NewCsvWorker instead CsvWorker[K CsvConvertable] struct { Filepath string cachedData *[][]string @@ -20,6 +22,8 @@ type ( MakeNew func() K } + // Deprecated: SanitationRule this is used in one spot and one spot only. Don't use it unless you know exactly what + // you're doing SanitationRule struct { ColumnName string MappingFunction func(string) string @@ -27,17 +31,27 @@ type ( ) var ( + //UfeffRule left over from a prior attempt. however, it encapsulates logic that is useful for sanitizing output from + //Paycor. UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string { var blank []byte bbuff := []byte(s) bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank) - //I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up + //I have no idea what 0xef, 0xbb, 0xbf is, but it's f---ing things up bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank) return string(bbuff) }} ) +// NewCsvWorker This absolute mad-lad of a struct will save your bacon. It takes in a path to a csv report, a constructor +// function to allocate individual structs (handling allocation is a potential area for improvement), and a boolean +//determining whether the first row is headers +// pathlike: string regular filepath to a specific csv file "/home/user/work/report.csv" +// factory: func() K a function to use for initializing new structs +// headerInFirstRow: boolean value for whether to use the first row as header values. If this is set to false, the +// Set(header, content) interface will provide 0..len(headers) as a string value to determine which column is being +// used func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] { worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow} worker.MakeNew = factory @@ -45,6 +59,8 @@ func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInF return &worker } +// ensureData: This will memoize the structured string data from the csv library workers. This is not a pure function -- +// it will mutate the CsvWorker struct when called func (w *CsvWorker[K]) ensureData() error { if w.cachedData == nil { data, err := LoadCsv(w.Filepath) @@ -56,6 +72,9 @@ func (w *CsvWorker[K]) ensureData() error { return nil } +// GetHeaders will read the *entire* csv into memory and memoize the results in the CsvWorker struct. The default behavior +// will use [UfeffRule] to sanitize headers, so watch out if you're using zero-width non-blocking webfont spaces in your +// sourcecode func (w *CsvWorker[K]) GetHeaders() ([]string, error) { err := w.ensureData() if err != nil { @@ -68,6 +87,11 @@ func (w *CsvWorker[K]) GetHeaders() ([]string, error) { return (*w.cachedData)[0], nil } +// Process will take +// artifacts: *[]K this container is used to store the results. Part of the reason for this is that resource allocation +// in generic functions is a bit more complex than I can handle with my current grasp of generic functions. This procedure +// will return an error if and only if ensureData() fails. Otherwise, it will panic to preserve the stack trace to aid +// in debugging. func (w *CsvWorker[K]) Process(artifacts *[]K) error { err := w.ensureData() if err != nil { @@ -76,7 +100,7 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error { var headers []string var startingIdx int - //intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0]) + //initialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0]) data := *w.cachedData if w.headerInFirstRow { headers = data[0] @@ -102,7 +126,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error { header := headers[i] err := obj.Set(header, cell) if err != nil { - return err + fmt.Println("[Error] in " + w.Filepath) + panic(err) } } *artifacts = append(*artifacts, obj) @@ -110,6 +135,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error { return nil } +// isEmptyRow: This procedure is a bit piggy. It naively scans the provided array and returns true iff every value returns +// len(0) func isEmptyRow(arr []string) bool { for _, v := range arr { if len(v) > 0 { @@ -119,6 +146,9 @@ func isEmptyRow(arr []string) bool { return true } +// Deprecated: GetHeadersFromCsvFilepath this reads exactly 1024 bytes into the file to attempt to find the first newline +// character. once it does, it calls a hard strings.Split(row, ",") to get the headers into an array. It was a quick +// hack that should not be used. OR USED AT YOUR OWN RISK DON'T SAY I DIDN'T WARN YOU func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) { //this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755) diff --git a/src/test.go b/src/test.go index 1ae3dc5..4060dd9 100644 --- a/src/test.go +++ b/src/test.go @@ -14,11 +14,11 @@ import ( func main() { csvLocation := "/home/dtookey/work/clarity-reporting/paycor/" start := time.Now() - //migrateOldSheets() err := hr.UpdatePaycorReports(csvLocation) if err != nil { panic(err) } + finish := time.Now() fmt.Printf("Runtime: %dus\n", finish.Sub(start).Microseconds()) } @@ -51,7 +51,6 @@ func processHoursReports() error { if mercuryUtil.StringArrEquals(headers, hr.NewHeaders) { //just copy the spreadsheet. we don't need any extra processing - b, err := os.ReadFile(filePath) if err != nil { return err