From 5c63b01fb6759c676237fd02249368e4dca904f2 Mon Sep 17 00:00:00 2001 From: dtookey Date: Wed, 2 Nov 2022 16:10:38 -0400 Subject: [PATCH] migration from legacy report to new report hours are complete. --- .gitignore | 4 +- src/hr/paycor.go | 235 +++++++++++++++++++++++++++++++++++ src/mercuryUtil/csvWorker.go | 110 ++++++++++++++-- src/mercuryUtil/hrFixer.go | 117 ----------------- src/mercuryUtil/util.go | 24 +++- src/test.go | 163 ++++++++++++++++-------- 6 files changed, 470 insertions(+), 183 deletions(-) create mode 100644 src/hr/paycor.go delete mode 100644 src/mercuryUtil/hrFixer.go diff --git a/.gitignore b/.gitignore index 9eaadc5..4d6f3d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /tokens/ /build/ -/.idea/ \ No newline at end of file +/.idea/ +/go.sum +/mercury.iml diff --git a/src/hr/paycor.go b/src/hr/paycor.go new file mode 100644 index 0000000..ddd07c8 --- /dev/null +++ b/src/hr/paycor.go @@ -0,0 +1,235 @@ +package hr + +import ( + "errors" + "strconv" + "strings" +) + +type ( + PaycorHoursLegacy struct { + Paygroup string + LastName string + FirstName string + HomeDept string + ManagerName string + WorkedDept string + OT float64 + Reg float64 + Sick float64 + Vacation float64 + Total float64 + } + + PaycorHours struct { + EEid int + Bereavement float64 + Holiday float64 + OT float64 + Regular float64 + Service float64 + Sick float64 + Vacation float64 + Total float64 + } + + PaycorDirectoryEntry struct { + Paygroup string + LastName string + FirstName string + EEid int + DepartmentName string + Manager string + } +) + +var ( + OldHeaders = []string{"Paygroup", "Last Name", "First Name", "Home Department", "Manager Name", "Worked DeptName", "OT", "Reg", "Sick", "Vac", "Total"} + NewHeaders = []string{"Badge #", "Brv", "Hol", "OT", "Reg", "Service", "Sick", "Vac", "Total"} +) + +func (hl *PaycorHoursLegacy) Set(header string, content string) error { + content = strings.ReplaceAll(content, "\u00a0", "") + + switch header { + case "Paygroup": + hl.Paygroup = content + case "Last Name": + hl.LastName = content + case "First Name": + hl.FirstName = content + case "Home Department": + hl.HomeDept = content + case "Manager Name": + hl.ManagerName = content + case "Worked DeptName": + hl.WorkedDept = content + case "OT": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + hl.OT = f + case "Reg": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + hl.Reg = f + case "Sick": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + hl.Sick = f + case "Vac": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + hl.Vacation = f + case "Total": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + hl.Total = f + default: + return errors.New("Could not find header for '" + header + "'") + } + + return nil +} + +func (h *PaycorHours) Set(header string, content string) error { + switch header { + case "Badge #": + i, err := strconv.Atoi(content) + if err != nil { + panic(err) + } + h.EEid = i + case "Brv": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Bereavement = f + case "Hol": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Holiday = f + case "OT": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.OT = f + case "Reg": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Regular = f + case "Service": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Service = f + case "Sick": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Sick = f + case "Vac": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Vacation = f + case "Total": + f, err := parseFloat(content) + if err != nil { + panic(err) + } + h.Total = f + default: + return errors.New("Could not find mapping for '" + header + "'") + } + return nil +} + +func (d *PaycorDirectoryEntry) Set(header string, content string) error { + switch header { + case "Paygroup": + d.Paygroup = content + case "Last Name": + d.LastName = content + case "First Name": + d.FirstName = content + case "Badge #": + v, err := strconv.Atoi(content) + if err != nil { + return err + } + d.EEid = v + case "Department Name": + d.DepartmentName = content + case "Manager": + d.Manager = content + default: + return errors.New("Could not find mapping for '" + header + "'") + } + return nil +} + +func (hl *PaycorHoursLegacy) ToPaycorHours(directory *map[string]*PaycorDirectoryEntry) (*PaycorHours, error) { + h := PaycorHours{ + Bereavement: 0.0, + Holiday: 0.0, + OT: hl.OT, + Regular: hl.Reg, + Service: 0.0, + Sick: hl.Sick, + Vacation: hl.Vacation, + Total: hl.Total, + } + + ee, okay := (*directory)[hl.FirstName+hl.LastName] + if !okay { + return nil, errors.New("could not find map entry for [" + hl.FirstName + hl.LastName + "]") + } + h.EEid = ee.EEid + return &h, nil +} + +func (h *PaycorHours) ToRow() []string { + + return []string{ + strconv.Itoa(h.EEid), + strconv.FormatFloat(h.Bereavement, 'f', 2, 64), + strconv.FormatFloat(h.Holiday, 'f', 2, 64), + strconv.FormatFloat(h.OT, 'f', 2, 64), + strconv.FormatFloat(h.Regular, 'f', 2, 64), + strconv.FormatFloat(h.Service, 'f', 2, 64), + strconv.FormatFloat(h.Sick, 'f', 2, 64), + strconv.FormatFloat(h.Vacation, 'f', 2, 64), + strconv.FormatFloat(h.Total, 'f', 2, 64), + } +} + +func parseFloat(content string) (float64, error) { + if len(content) == 0 { + return 0.0, nil + } else { + f, err := strconv.ParseFloat(content, 64) + if err != nil { + return 0.0, err + } + return f, nil + } +} diff --git a/src/mercuryUtil/csvWorker.go b/src/mercuryUtil/csvWorker.go index b112df9..e7c3cfb 100644 --- a/src/mercuryUtil/csvWorker.go +++ b/src/mercuryUtil/csvWorker.go @@ -1,6 +1,12 @@ package mercuryUtil -import "strconv" +import ( + "bytes" + "errors" + "os" + "strconv" + "strings" +) type ( CsvConvertable interface { @@ -9,34 +15,77 @@ type ( CsvWorker[K CsvConvertable] struct { Filepath string - Artifacts *[]K + cachedData *[][]string headerInFirstRow bool MakeNew func() K } + + SanitationRule struct { + ColumnName string + MappingFunction func(string) string + } ) -func NewCsvWorker[K CsvConvertable](pathlike string, container []K, factory func() K, headerInFirstRow bool) (*CsvWorker[K], error) { +var ( + UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string { + var blank []byte + bbuff := []byte(s) + bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank) + //I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up + bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank) + + return string(bbuff) + }} +) + +func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] { worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow} worker.MakeNew = factory - worker.Artifacts = &container - err := worker.process() + + return &worker +} + +func (w *CsvWorker[K]) ensureData() error { + if w.cachedData == nil { + data, err := LoadCsv(w.Filepath) + if err != nil { + return err + } + w.cachedData = data + } + return nil +} + +func (w *CsvWorker[K]) GetHeaders() ([]string, error) { + err := w.ensureData() if err != nil { return nil, err } - return &worker, nil + headersIntermed := (*w.cachedData)[0] + for i, v := range headersIntermed { + headersIntermed[i] = UfeffRule.MappingFunction(v) + } + return (*w.cachedData)[0], nil } -func (w *CsvWorker[K]) process() error { - data := LoadCsv(w.Filepath) +func (w *CsvWorker[K]) Process(artifacts *[]K) error { + err := w.ensureData() + if err != nil { + return err + } var headers []string var startingIdx int //intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0]) + data := *w.cachedData if w.headerInFirstRow { - headers = (*data)[0] + headers = data[0] + for i, v := range headers { + headers[i] = UfeffRule.MappingFunction(v) + } startingIdx = 1 } else { - l := len((*data)[0]) + l := len(data[0]) headers = make([]string, l, l) for i := 0; i < l; i++ { headers[i] = strconv.Itoa(i) @@ -44,8 +93,8 @@ func (w *CsvWorker[K]) process() error { startingIdx = 0 } - for rowIdx, row := range *data { - if rowIdx < startingIdx { + for rowIdx, row := range data { + if rowIdx < startingIdx || isEmptyRow(row) { continue } obj := w.MakeNew() @@ -56,7 +105,42 @@ func (w *CsvWorker[K]) process() error { return err } } - *w.Artifacts = append(*w.Artifacts, obj) + *artifacts = append(*artifacts, obj) } return nil } + +func isEmptyRow(arr []string) bool { + for _, v := range arr { + if len(v) > 0 { + return false + } + } + return true +} + +func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) { + //this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later + f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755) + if err != nil { + return nil, err + } + defer f.Close() + + buff := make([]byte, 1024, 1024) + + l, err := f.Read(buff) + if err != nil { + return nil, err + } + for i := 0; i < l; i++ { + c := buff[i] + if c == '\n' { + headerRow := string(buff[:i]) + headerRow = UfeffRule.MappingFunction(headerRow) + headerRow = strings.ReplaceAll(headerRow, "\r", "") + return strings.Split(headerRow, ","), nil + } + } + return nil, errors.New("could not find a newline character in the first 1024 bytes of csv: " + pathlike) +} diff --git a/src/mercuryUtil/hrFixer.go b/src/mercuryUtil/hrFixer.go deleted file mode 100644 index c5d8f07..0000000 --- a/src/mercuryUtil/hrFixer.go +++ /dev/null @@ -1,117 +0,0 @@ -package mercuryUtil - -import ( - "bytes" - "fmt" -) - -type ( - MigrationRule struct { - ColumnName string - MappingFunction func(string) string - } - - MigrationWorker struct { - FirstRowIsHeader bool - ubiRules []MigrationRule //ubiquitous rules - headerRules []MigrationRule - rules []MigrationRule - } -) - -func NewMigrationWorker() *MigrationWorker { - rules := make([]MigrationRule, 0, 100) - headerRules := make([]MigrationRule, 0, 100) - worker := MigrationWorker{FirstRowIsHeader: false, headerRules: headerRules, rules: rules} - - //this default rule will remove the web-based zero-width nonblocking space character from everything - ufeffRule := MigrationRule{ColumnName: "*", MappingFunction: func(s string) string { - var blank []byte - bbuff := []byte(s) - bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank) - //I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up - bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank) - - return string(bbuff) - }} - - worker.ubiRules = append(worker.ubiRules, ufeffRule) - worker.AddHeaderRule(&ufeffRule) - - return &worker -} - -func MigrateCSVs(sourceDir string, targetDir string) error { - worker := NewMigrationWorker() - - worker.AddHeaderRule( - &MigrationRule{ - "Badge #", - func(s string) string { return "EEID" }, - }, - ) - - files, err := GetAllFilesInDir(sourceDir) - - if err != nil { - panic(err) - } - - for _, file := range *files { - table := LoadCsv(file) - headerRow := (*table)[0] - headers := make([]string, len(headerRow), len(headerRow)) - for i, header := range headerRow { - prepassRules := worker.ubiRules - v := header - for _, rule := range prepassRules { - v = rule.MappingFunction(v) - } - - formattingRules := worker.getHeaderRulesByHeader(v) - for _, rule := range *formattingRules { - v = rule.MappingFunction(v) - } - headers[i] = v - } - fmt.Printf("%s\t%#v\n", file, headers) - } - - return nil -} - -func (w *MigrationWorker) AddRule(m *MigrationRule) { - w.rules = append(w.rules, *m) -} -func (w *MigrationWorker) AddHeaderRule(m *MigrationRule) { - w.headerRules = append(w.headerRules, *m) -} - -func (w *MigrationWorker) getRulesByHeader(header string) *[]*MigrationRule { - ret := make([]*MigrationRule, 0, 10) - - for _, rule := range w.rules { - if rule.ColumnName == "*" || rule.ColumnName == header { - ret = append(ret, &rule) - } - } - - return &ret -} - -func (w *MigrationWorker) getHeaderRulesByHeader(header string) *[]*MigrationRule { - ret := make([]*MigrationRule, 0, 10) - - for _, rule := range w.headerRules { - if rule.ColumnName == "*" || rule.ColumnName == header { - lRule := rule - ret = append(ret, &lRule) - } - } - - return &ret -} - -func (w *MigrationWorker) ProcessRecords(records *[][]string) { - -} diff --git a/src/mercuryUtil/util.go b/src/mercuryUtil/util.go index f6f9028..13c9af3 100644 --- a/src/mercuryUtil/util.go +++ b/src/mercuryUtil/util.go @@ -36,7 +36,7 @@ func CopyFile(inPath string, outpath string) error { return nil } -func LoadCsv(pathlike string) *[][]string { +func LoadCsv(pathlike string) (*[][]string, error) { f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755) if err != nil { panic(err) @@ -48,7 +48,25 @@ func LoadCsv(pathlike string) *[][]string { records, err := reader.ReadAll() if err != nil { - panic(err) + return nil, err + } + return &records, nil +} + +func StringArrEquals(a []string, b []string) bool { + if len(a) != len(b) { + return false + } + //should we do this? it screws up order independence + //sort.Strings(a) + //sort.Strings(b) + + for i := 0; i < len(a); i++ { + vA := a[i] + vB := b[i] + if vA != vB { + return false + } } - return &records + return true } diff --git a/src/test.go b/src/test.go index d7b9f2f..d454941 100644 --- a/src/test.go +++ b/src/test.go @@ -1,76 +1,141 @@ package main import ( + "encoding/csv" "errors" "fmt" + "mercury/src/hr" "mercury/src/mercuryUtil" - "strconv" + "os" + "path" + "time" ) func main() { - test() + start := time.Now() + migrateOldSheets() + finish := time.Now() + fmt.Printf("Runtime: %dus\n", finish.Sub(start).Microseconds()) } -type CCStatement struct { - TxnDate string - PostedDate string - CardNo int - Description string - Category string - Debit float64 - Account string - Department string - Notes string +func migrateOldSheets() { + err := processHoursReports() + if err != nil { + panic(err) + } } -func (c *CCStatement) Set(header string, content string) error { - switch header { - case "Transaction Date": - c.TxnDate = content - case "Posted Date": - c.PostedDate = content - case "Card No.": - i, err := strconv.Atoi(content) +func processHoursReports() error { + eeMap, err := getEmployeeMap() + if err != nil { + return err + } + dirBase := "/home/dtookey/work/clarity-reporting/paycor_leg/" + targetBase := "/home/dtookey/work/clarity-reporting/paycor/" + files, err := os.ReadDir(dirBase) + if err != nil { + return err + } + for _, v := range files { + filePath := path.Join(dirBase, v.Name()) + outName := path.Join(targetBase, v.Name()) + headers, err := mercuryUtil.GetHeadersFromCsvFilepath(filePath) if err != nil { - c.CardNo = -1 return err } - c.CardNo = i - case "Description": - c.Description = content - case "Category": - c.Category = content - case "Debit": - f, err := strconv.ParseFloat(content, 64) + + if mercuryUtil.StringArrEquals(headers, hr.NewHeaders) { + //just copy the spreadsheet. we don't need any extra processing + + b, err := os.ReadFile(filePath) + if err != nil { + return err + } + err = os.WriteFile(outName, b, 0755) + } else if mercuryUtil.StringArrEquals(headers, hr.OldHeaders) { + //parse as a legacy version + fmt.Printf("Processing: [%s]\n", filePath) + worker := mercuryUtil.NewCsvWorker(filePath, func() *hr.PaycorHoursLegacy { return &hr.PaycorHoursLegacy{} }, true) + items := make([]*hr.PaycorHoursLegacy, 0, 200) + err := worker.Process(&items) + if err != nil { + panic(err) + } + pHours, err := migrateLegacyItems(&items, eeMap) + if err != nil { + panic(err) + } + err = writeItemsToNewFile(outName, pHours) + if err != nil { + panic(err) + } + } else { + return errors.New("could not find matching headers for file: " + filePath) + } + } + + return nil +} + +func writeItemsToNewFile(pathlike string, items []*hr.PaycorHours) error { + f, err := os.Create(pathlike) + if err != nil { + if errors.Is(err, os.ErrExist) { + err = os.Remove(pathlike) + if err != nil { + return err + } + f, err = os.Create(pathlike) + if err != nil { + return err + } + } else { + return err + } + } + + writer := csv.NewWriter(f) + err = writer.Write(hr.NewHeaders) + if err != nil { + return err + } + for _, v := range items { + err = writer.Write(v.ToRow()) if err != nil { return err } - c.Debit = f - case "Account": - c.Account = content - case "Department": - c.Department = content - case "Notes": - c.Notes = content - default: - return errors.New("could not find header: '" + header + "'") + } + writer.Flush() + if writer.Error() != nil { + return writer.Error() } return nil } -func test() { - path := "/home/dtookey/work/dde-expense/Credit Card coding 0122 Tookey.csv" - artifacts := make([]*CCStatement, 0, 5000) - worker, err := mercuryUtil.NewCsvWorker[*CCStatement]( - path, - artifacts, - func() *CCStatement { return &CCStatement{} }, - true, - ) +func migrateLegacyItems(items *[]*hr.PaycorHoursLegacy, eeMap *map[string]*hr.PaycorDirectoryEntry) ([]*hr.PaycorHours, error) { + ret := make([]*hr.PaycorHours, 0, len(*items)) + for _, v := range *items { + pHours, err := v.ToPaycorHours(eeMap) + if err != nil { + //return nil, err + continue + } + ret = append(ret, pHours) + } + return ret, nil +} + +func getEmployeeMap() (*map[string]*hr.PaycorDirectoryEntry, error) { + directoryPath := "/home/dtookey/work/clarity-reporting/paycor_dir/20220914_Paycor_Employee Roster.csv" + worker := mercuryUtil.NewCsvWorker(directoryPath, func() *hr.PaycorDirectoryEntry { return &hr.PaycorDirectoryEntry{} }, true) + entries := make([]*hr.PaycorDirectoryEntry, 0, 200) + err := worker.Process(&entries) if err != nil { - panic(err) + return nil, err } - for _, val := range *worker.Artifacts { - fmt.Printf("%#v\n", val) + eeMap := make(map[string]*hr.PaycorDirectoryEntry) + for _, v := range entries { + eeMap[v.FirstName+v.LastName] = v } + return &eeMap, nil }