migration from legacy report to new report hours are complete.

master
dtookey 3 years ago
parent 11d3ca3e6d
commit 5c63b01fb6

4
.gitignore vendored

@ -1,3 +1,5 @@
/tokens/
/build/
/.idea/
/.idea/
/go.sum
/mercury.iml

@ -0,0 +1,235 @@
package hr
import (
"errors"
"strconv"
"strings"
)
type (
PaycorHoursLegacy struct {
Paygroup string
LastName string
FirstName string
HomeDept string
ManagerName string
WorkedDept string
OT float64
Reg float64
Sick float64
Vacation float64
Total float64
}
PaycorHours struct {
EEid int
Bereavement float64
Holiday float64
OT float64
Regular float64
Service float64
Sick float64
Vacation float64
Total float64
}
PaycorDirectoryEntry struct {
Paygroup string
LastName string
FirstName string
EEid int
DepartmentName string
Manager string
}
)
var (
OldHeaders = []string{"Paygroup", "Last Name", "First Name", "Home Department", "Manager Name", "Worked DeptName", "OT", "Reg", "Sick", "Vac", "Total"}
NewHeaders = []string{"Badge #", "Brv", "Hol", "OT", "Reg", "Service", "Sick", "Vac", "Total"}
)
func (hl *PaycorHoursLegacy) Set(header string, content string) error {
content = strings.ReplaceAll(content, "\u00a0", "")
switch header {
case "Paygroup":
hl.Paygroup = content
case "Last Name":
hl.LastName = content
case "First Name":
hl.FirstName = content
case "Home Department":
hl.HomeDept = content
case "Manager Name":
hl.ManagerName = content
case "Worked DeptName":
hl.WorkedDept = content
case "OT":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
hl.OT = f
case "Reg":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
hl.Reg = f
case "Sick":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
hl.Sick = f
case "Vac":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
hl.Vacation = f
case "Total":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
hl.Total = f
default:
return errors.New("Could not find header for '" + header + "'")
}
return nil
}
func (h *PaycorHours) Set(header string, content string) error {
switch header {
case "Badge #":
i, err := strconv.Atoi(content)
if err != nil {
panic(err)
}
h.EEid = i
case "Brv":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Bereavement = f
case "Hol":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Holiday = f
case "OT":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.OT = f
case "Reg":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Regular = f
case "Service":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Service = f
case "Sick":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Sick = f
case "Vac":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Vacation = f
case "Total":
f, err := parseFloat(content)
if err != nil {
panic(err)
}
h.Total = f
default:
return errors.New("Could not find mapping for '" + header + "'")
}
return nil
}
func (d *PaycorDirectoryEntry) Set(header string, content string) error {
switch header {
case "Paygroup":
d.Paygroup = content
case "Last Name":
d.LastName = content
case "First Name":
d.FirstName = content
case "Badge #":
v, err := strconv.Atoi(content)
if err != nil {
return err
}
d.EEid = v
case "Department Name":
d.DepartmentName = content
case "Manager":
d.Manager = content
default:
return errors.New("Could not find mapping for '" + header + "'")
}
return nil
}
func (hl *PaycorHoursLegacy) ToPaycorHours(directory *map[string]*PaycorDirectoryEntry) (*PaycorHours, error) {
h := PaycorHours{
Bereavement: 0.0,
Holiday: 0.0,
OT: hl.OT,
Regular: hl.Reg,
Service: 0.0,
Sick: hl.Sick,
Vacation: hl.Vacation,
Total: hl.Total,
}
ee, okay := (*directory)[hl.FirstName+hl.LastName]
if !okay {
return nil, errors.New("could not find map entry for [" + hl.FirstName + hl.LastName + "]")
}
h.EEid = ee.EEid
return &h, nil
}
func (h *PaycorHours) ToRow() []string {
return []string{
strconv.Itoa(h.EEid),
strconv.FormatFloat(h.Bereavement, 'f', 2, 64),
strconv.FormatFloat(h.Holiday, 'f', 2, 64),
strconv.FormatFloat(h.OT, 'f', 2, 64),
strconv.FormatFloat(h.Regular, 'f', 2, 64),
strconv.FormatFloat(h.Service, 'f', 2, 64),
strconv.FormatFloat(h.Sick, 'f', 2, 64),
strconv.FormatFloat(h.Vacation, 'f', 2, 64),
strconv.FormatFloat(h.Total, 'f', 2, 64),
}
}
func parseFloat(content string) (float64, error) {
if len(content) == 0 {
return 0.0, nil
} else {
f, err := strconv.ParseFloat(content, 64)
if err != nil {
return 0.0, err
}
return f, nil
}
}

@ -1,6 +1,12 @@
package mercuryUtil
import "strconv"
import (
"bytes"
"errors"
"os"
"strconv"
"strings"
)
type (
CsvConvertable interface {
@ -9,34 +15,77 @@ type (
CsvWorker[K CsvConvertable] struct {
Filepath string
Artifacts *[]K
cachedData *[][]string
headerInFirstRow bool
MakeNew func() K
}
SanitationRule struct {
ColumnName string
MappingFunction func(string) string
}
)
func NewCsvWorker[K CsvConvertable](pathlike string, container []K, factory func() K, headerInFirstRow bool) (*CsvWorker[K], error) {
var (
UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string {
var blank []byte
bbuff := []byte(s)
bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)
return string(bbuff)
}}
)
func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] {
worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow}
worker.MakeNew = factory
worker.Artifacts = &container
err := worker.process()
return &worker
}
func (w *CsvWorker[K]) ensureData() error {
if w.cachedData == nil {
data, err := LoadCsv(w.Filepath)
if err != nil {
return err
}
w.cachedData = data
}
return nil
}
func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
err := w.ensureData()
if err != nil {
return nil, err
}
return &worker, nil
headersIntermed := (*w.cachedData)[0]
for i, v := range headersIntermed {
headersIntermed[i] = UfeffRule.MappingFunction(v)
}
return (*w.cachedData)[0], nil
}
func (w *CsvWorker[K]) process() error {
data := LoadCsv(w.Filepath)
func (w *CsvWorker[K]) Process(artifacts *[]K) error {
err := w.ensureData()
if err != nil {
return err
}
var headers []string
var startingIdx int
//intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
data := *w.cachedData
if w.headerInFirstRow {
headers = (*data)[0]
headers = data[0]
for i, v := range headers {
headers[i] = UfeffRule.MappingFunction(v)
}
startingIdx = 1
} else {
l := len((*data)[0])
l := len(data[0])
headers = make([]string, l, l)
for i := 0; i < l; i++ {
headers[i] = strconv.Itoa(i)
@ -44,8 +93,8 @@ func (w *CsvWorker[K]) process() error {
startingIdx = 0
}
for rowIdx, row := range *data {
if rowIdx < startingIdx {
for rowIdx, row := range data {
if rowIdx < startingIdx || isEmptyRow(row) {
continue
}
obj := w.MakeNew()
@ -56,7 +105,42 @@ func (w *CsvWorker[K]) process() error {
return err
}
}
*w.Artifacts = append(*w.Artifacts, obj)
*artifacts = append(*artifacts, obj)
}
return nil
}
func isEmptyRow(arr []string) bool {
for _, v := range arr {
if len(v) > 0 {
return false
}
}
return true
}
func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) {
//this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later
f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)
if err != nil {
return nil, err
}
defer f.Close()
buff := make([]byte, 1024, 1024)
l, err := f.Read(buff)
if err != nil {
return nil, err
}
for i := 0; i < l; i++ {
c := buff[i]
if c == '\n' {
headerRow := string(buff[:i])
headerRow = UfeffRule.MappingFunction(headerRow)
headerRow = strings.ReplaceAll(headerRow, "\r", "")
return strings.Split(headerRow, ","), nil
}
}
return nil, errors.New("could not find a newline character in the first 1024 bytes of csv: " + pathlike)
}

@ -1,117 +0,0 @@
package mercuryUtil
import (
"bytes"
"fmt"
)
type (
MigrationRule struct {
ColumnName string
MappingFunction func(string) string
}
MigrationWorker struct {
FirstRowIsHeader bool
ubiRules []MigrationRule //ubiquitous rules
headerRules []MigrationRule
rules []MigrationRule
}
)
func NewMigrationWorker() *MigrationWorker {
rules := make([]MigrationRule, 0, 100)
headerRules := make([]MigrationRule, 0, 100)
worker := MigrationWorker{FirstRowIsHeader: false, headerRules: headerRules, rules: rules}
//this default rule will remove the web-based zero-width nonblocking space character from everything
ufeffRule := MigrationRule{ColumnName: "*", MappingFunction: func(s string) string {
var blank []byte
bbuff := []byte(s)
bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)
return string(bbuff)
}}
worker.ubiRules = append(worker.ubiRules, ufeffRule)
worker.AddHeaderRule(&ufeffRule)
return &worker
}
func MigrateCSVs(sourceDir string, targetDir string) error {
worker := NewMigrationWorker()
worker.AddHeaderRule(
&MigrationRule{
"Badge #",
func(s string) string { return "EEID" },
},
)
files, err := GetAllFilesInDir(sourceDir)
if err != nil {
panic(err)
}
for _, file := range *files {
table := LoadCsv(file)
headerRow := (*table)[0]
headers := make([]string, len(headerRow), len(headerRow))
for i, header := range headerRow {
prepassRules := worker.ubiRules
v := header
for _, rule := range prepassRules {
v = rule.MappingFunction(v)
}
formattingRules := worker.getHeaderRulesByHeader(v)
for _, rule := range *formattingRules {
v = rule.MappingFunction(v)
}
headers[i] = v
}
fmt.Printf("%s\t%#v\n", file, headers)
}
return nil
}
func (w *MigrationWorker) AddRule(m *MigrationRule) {
w.rules = append(w.rules, *m)
}
func (w *MigrationWorker) AddHeaderRule(m *MigrationRule) {
w.headerRules = append(w.headerRules, *m)
}
func (w *MigrationWorker) getRulesByHeader(header string) *[]*MigrationRule {
ret := make([]*MigrationRule, 0, 10)
for _, rule := range w.rules {
if rule.ColumnName == "*" || rule.ColumnName == header {
ret = append(ret, &rule)
}
}
return &ret
}
func (w *MigrationWorker) getHeaderRulesByHeader(header string) *[]*MigrationRule {
ret := make([]*MigrationRule, 0, 10)
for _, rule := range w.headerRules {
if rule.ColumnName == "*" || rule.ColumnName == header {
lRule := rule
ret = append(ret, &lRule)
}
}
return &ret
}
func (w *MigrationWorker) ProcessRecords(records *[][]string) {
}

@ -36,7 +36,7 @@ func CopyFile(inPath string, outpath string) error {
return nil
}
func LoadCsv(pathlike string) *[][]string {
func LoadCsv(pathlike string) (*[][]string, error) {
f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)
if err != nil {
panic(err)
@ -48,7 +48,25 @@ func LoadCsv(pathlike string) *[][]string {
records, err := reader.ReadAll()
if err != nil {
panic(err)
return nil, err
}
return &records, nil
}
func StringArrEquals(a []string, b []string) bool {
if len(a) != len(b) {
return false
}
//should we do this? it screws up order independence
//sort.Strings(a)
//sort.Strings(b)
for i := 0; i < len(a); i++ {
vA := a[i]
vB := b[i]
if vA != vB {
return false
}
}
return &records
return true
}

@ -1,76 +1,141 @@
package main
import (
"encoding/csv"
"errors"
"fmt"
"mercury/src/hr"
"mercury/src/mercuryUtil"
"strconv"
"os"
"path"
"time"
)
func main() {
test()
start := time.Now()
migrateOldSheets()
finish := time.Now()
fmt.Printf("Runtime: %dus\n", finish.Sub(start).Microseconds())
}
type CCStatement struct {
TxnDate string
PostedDate string
CardNo int
Description string
Category string
Debit float64
Account string
Department string
Notes string
func migrateOldSheets() {
err := processHoursReports()
if err != nil {
panic(err)
}
}
func (c *CCStatement) Set(header string, content string) error {
switch header {
case "Transaction Date":
c.TxnDate = content
case "Posted Date":
c.PostedDate = content
case "Card No.":
i, err := strconv.Atoi(content)
func processHoursReports() error {
eeMap, err := getEmployeeMap()
if err != nil {
return err
}
dirBase := "/home/dtookey/work/clarity-reporting/paycor_leg/"
targetBase := "/home/dtookey/work/clarity-reporting/paycor/"
files, err := os.ReadDir(dirBase)
if err != nil {
return err
}
for _, v := range files {
filePath := path.Join(dirBase, v.Name())
outName := path.Join(targetBase, v.Name())
headers, err := mercuryUtil.GetHeadersFromCsvFilepath(filePath)
if err != nil {
c.CardNo = -1
return err
}
c.CardNo = i
case "Description":
c.Description = content
case "Category":
c.Category = content
case "Debit":
f, err := strconv.ParseFloat(content, 64)
if mercuryUtil.StringArrEquals(headers, hr.NewHeaders) {
//just copy the spreadsheet. we don't need any extra processing
b, err := os.ReadFile(filePath)
if err != nil {
return err
}
err = os.WriteFile(outName, b, 0755)
} else if mercuryUtil.StringArrEquals(headers, hr.OldHeaders) {
//parse as a legacy version
fmt.Printf("Processing: [%s]\n", filePath)
worker := mercuryUtil.NewCsvWorker(filePath, func() *hr.PaycorHoursLegacy { return &hr.PaycorHoursLegacy{} }, true)
items := make([]*hr.PaycorHoursLegacy, 0, 200)
err := worker.Process(&items)
if err != nil {
panic(err)
}
pHours, err := migrateLegacyItems(&items, eeMap)
if err != nil {
panic(err)
}
err = writeItemsToNewFile(outName, pHours)
if err != nil {
panic(err)
}
} else {
return errors.New("could not find matching headers for file: " + filePath)
}
}
return nil
}
func writeItemsToNewFile(pathlike string, items []*hr.PaycorHours) error {
f, err := os.Create(pathlike)
if err != nil {
if errors.Is(err, os.ErrExist) {
err = os.Remove(pathlike)
if err != nil {
return err
}
f, err = os.Create(pathlike)
if err != nil {
return err
}
} else {
return err
}
}
writer := csv.NewWriter(f)
err = writer.Write(hr.NewHeaders)
if err != nil {
return err
}
for _, v := range items {
err = writer.Write(v.ToRow())
if err != nil {
return err
}
c.Debit = f
case "Account":
c.Account = content
case "Department":
c.Department = content
case "Notes":
c.Notes = content
default:
return errors.New("could not find header: '" + header + "'")
}
writer.Flush()
if writer.Error() != nil {
return writer.Error()
}
return nil
}
func test() {
path := "/home/dtookey/work/dde-expense/Credit Card coding 0122 Tookey.csv"
artifacts := make([]*CCStatement, 0, 5000)
worker, err := mercuryUtil.NewCsvWorker[*CCStatement](
path,
artifacts,
func() *CCStatement { return &CCStatement{} },
true,
)
func migrateLegacyItems(items *[]*hr.PaycorHoursLegacy, eeMap *map[string]*hr.PaycorDirectoryEntry) ([]*hr.PaycorHours, error) {
ret := make([]*hr.PaycorHours, 0, len(*items))
for _, v := range *items {
pHours, err := v.ToPaycorHours(eeMap)
if err != nil {
//return nil, err
continue
}
ret = append(ret, pHours)
}
return ret, nil
}
func getEmployeeMap() (*map[string]*hr.PaycorDirectoryEntry, error) {
directoryPath := "/home/dtookey/work/clarity-reporting/paycor_dir/20220914_Paycor_Employee Roster.csv"
worker := mercuryUtil.NewCsvWorker(directoryPath, func() *hr.PaycorDirectoryEntry { return &hr.PaycorDirectoryEntry{} }, true)
entries := make([]*hr.PaycorDirectoryEntry, 0, 200)
err := worker.Process(&entries)
if err != nil {
panic(err)
return nil, err
}
for _, val := range *worker.Artifacts {
fmt.Printf("%#v\n", val)
eeMap := make(map[string]*hr.PaycorDirectoryEntry)
for _, v := range entries {
eeMap[v.FirstName+v.LastName] = v
}
return &eeMap, nil
}

Loading…
Cancel
Save