cleaned up some procedure logic in paycor.go and added documentation to csvWorker.go

3 years ago · f3446b5c08
parent a87213d98f
commit f3446b5c08
3 changed files with 63 additions and 10 deletions
--- a/src/hr/paycor.go
+++ b/src/hr/paycor.go
@ -54,6 +54,10 @@ var (
 	NewHeaders = []string{"Badge #", "Brv", "Hol", "OT", "Reg", "Service", "Sick", "Vac", "Total"}
 )

+const (
+	defaultReportSize = 200
+)
+
 func UpdatePaycorReports(pathlike string) error {
 	updateDirectory()
 	err := updatePaycorHours(pathlike)
@ -78,14 +82,17 @@ func updatePaycorHours(pathlike string) error {
 	cx := &db.ConnectorGeneric{}
 	tableRunner := db.NewRunner("create-mercury-hrPaycorHours-table.sql", db.MercuryDatabaseName)
 	cx.ExecuteSqlScript(tableRunner)
-
+	itemAccumulator := make([]*PaycorHours, 0, len(files)*defaultReportSize)
 	if err != nil {
 		return err
 	}
 	for _, v := range files {
+		if isTempFile(v.Name()) {
+			continue
+		}
 		filePath := path.Join(pathlike, v.Name())
 		worker := mercuryUtil.NewCsvWorker(filePath, func() *PaycorHours { return &PaycorHours{} }, true)
-		items := make([]*PaycorHours, 0, 200)
+		items := make([]*PaycorHours, 0, defaultReportSize)
 		err := worker.Process(&items)
 		if err != nil {
 			return err
@ -93,8 +100,9 @@ func updatePaycorHours(pathlike string) error {
 		for _, i := range items {
 			i.WeekOf = dateFromFileName(v.Name())
 		}
-		db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &items)
+		itemAccumulator = append(itemAccumulator, items...)
 	}
+	db.BlockUpdate[PaycorHours](cx, db.MercuryDatabaseName, "update-mercury-hrPaycorHours.sql", &itemAccumulator)
 	return nil
 }

@ -170,9 +178,9 @@ func (hl *PaycorHoursLegacy) Set(header string, content string) error {
 func (h *PaycorHours) Set(header string, content string) error {
 	switch header {
 	case "Badge #":
-		i, err := strconv.Atoi(content)
+		i, err := parseInt(content)
 		if err != nil {
-			panic(err)
+			return err
 		}
 		h.EEid = i
 	case "Brv":
@ -308,6 +316,18 @@ func parseFloat(content string) (float64, error) {
 	}
 }

+func parseInt(content string) (int, error) {
+	if len(content) == 0 {
+		return -1, nil
+	} else {
+		i, err := strconv.Atoi(content)
+		if err != nil {
+			return -1, err
+		}
+		return i, nil
+	}
+}
+
 func dateFromFileName(filename string) string {
 	parts := strings.Split(filename, "_")
 	block := parts[0]
@ -316,3 +336,7 @@ func dateFromFileName(filename string) string {
 	date := block[6:]
 	return fmt.Sprintf("%s-%s-%s", year, month, date)
 }
+
+func isTempFile(filepath string) bool {
+	return filepath[len(filepath)-1:] == "#"
+}
--- a/src/mercuryUtil/csvWorker.go
+++ b/src/mercuryUtil/csvWorker.go
@ -3,6 +3,7 @@ package mercuryUtil
 import (
 	"bytes"
 	"errors"
+	"fmt"
 	"os"
 	"strconv"
 	"strings"
@ -13,6 +14,7 @@ type (
 		Set(header string, content string) error
 	}

+	//CsvWorker DO NOT INITIALIZE THIS ON YOUR OWN UNLESS YOU KNOW EXACTLY WHAT YOU ARE DOING. Use NewCsvWorker instead
 	CsvWorker[K CsvConvertable] struct {
 		Filepath         string
 		cachedData       *[][]string
@ -20,6 +22,8 @@ type (
 		MakeNew          func() K
 	}

+	// Deprecated: SanitationRule this is used in one spot and one spot only. Don't use it unless you know exactly what
+	// you're doing
 	SanitationRule struct {
 		ColumnName      string
 		MappingFunction func(string) string
@ -27,17 +31,27 @@ type (
 )

 var (
+	//UfeffRule left over from a prior attempt. however, it encapsulates logic that is useful for sanitizing output from
+	//Paycor.
 	UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string {
 		var blank []byte
 		bbuff := []byte(s)
 		bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
-		//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
+		//I have no idea what 0xef, 0xbb, 0xbf is, but it's f---ing things up
 		bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)

 		return string(bbuff)
 	}}
 )

+// NewCsvWorker This absolute mad-lad of a struct will save your bacon. It takes in a path to a csv report, a constructor
+// function to allocate individual structs (handling allocation is a potential area for improvement), and a boolean
+//determining whether the first row is headers
+//	pathlike: string regular filepath to a specific csv file "/home/user/work/report.csv"
+//	factory: func() K a function to use for initializing new structs
+//	headerInFirstRow: boolean value for whether to use the first row as header values. If this is set to false, the
+//	Set(header, content) interface will provide 0..len(headers) as a string value to determine which column is being
+//	used
 func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] {
 	worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow}
 	worker.MakeNew = factory
@ -45,6 +59,8 @@ func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInF
 	return &worker
 }

+// ensureData: This will memoize the structured string data from the csv library workers. This is not a pure function --
+// it will mutate the CsvWorker struct when called
 func (w *CsvWorker[K]) ensureData() error {
 	if w.cachedData == nil {
 		data, err := LoadCsv(w.Filepath)
@ -56,6 +72,9 @@ func (w *CsvWorker[K]) ensureData() error {
 	return nil
 }

+// GetHeaders will read the *entire* csv into memory and memoize the results in the CsvWorker struct. The default behavior
+// will use [UfeffRule] to sanitize headers, so watch out if you're using zero-width non-blocking webfont spaces in your
+// sourcecode
 func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
 	err := w.ensureData()
 	if err != nil {
@ -68,6 +87,11 @@ func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
 	return (*w.cachedData)[0], nil
 }

+// Process will take
+// artifacts: *[]K this container is used to store the results. Part of the reason for this is that resource allocation
+// in generic functions is a bit more complex than I can handle with my current grasp of generic functions. This procedure
+// will return an error if and only if ensureData() fails. Otherwise, it will panic to preserve the stack trace to aid
+// in debugging.
 func (w *CsvWorker[K]) Process(artifacts *[]K) error {
 	err := w.ensureData()
 	if err != nil {
@ -76,7 +100,7 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
 	var headers []string
 	var startingIdx int

-	//intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
+	//initialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
 	data := *w.cachedData
 	if w.headerInFirstRow {
 		headers = data[0]
@ -102,7 +126,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
 			header := headers[i]
 			err := obj.Set(header, cell)
 			if err != nil {
-				return err
+				fmt.Println("[Error] in " + w.Filepath)
+				panic(err)
 			}
 		}
 		*artifacts = append(*artifacts, obj)
@ -110,6 +135,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
 	return nil
 }

+// isEmptyRow: This procedure is a bit piggy. It naively scans the provided array and returns true iff every value returns
+// len(0)
 func isEmptyRow(arr []string) bool {
 	for _, v := range arr {
 		if len(v) > 0 {
@ -119,6 +146,9 @@ func isEmptyRow(arr []string) bool {
 	return true
 }

+// Deprecated: GetHeadersFromCsvFilepath this reads exactly 1024 bytes into the file to attempt to find the first newline
+// character. once it does, it calls a hard strings.Split(row, ",") to get the headers into an array. It was a quick
+// hack that should not be used. OR  USED AT YOUR OWN RISK DON'T SAY I DIDN'T WARN YOU
 func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) {
 	//this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later
 	f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)
--- a/src/test.go
+++ b/src/test.go
@ -14,11 +14,11 @@ import (
 func main() {
 	csvLocation := "/home/dtookey/work/clarity-reporting/paycor/"
 	start := time.Now()
-	//migrateOldSheets()
 	err := hr.UpdatePaycorReports(csvLocation)
 	if err != nil {
 		panic(err)
 	}
+
 	finish := time.Now()
 	fmt.Printf("Runtime: %dus\n", finish.Sub(start).Microseconds())
 }
@ -51,7 +51,6 @@ func processHoursReports() error {

 		if mercuryUtil.StringArrEquals(headers, hr.NewHeaders) {
 			//just copy the spreadsheet. we don't need any extra processing
-
 			b, err := os.ReadFile(filePath)
 			if err != nil {
 				return err