new interface/worker to accelerate csv parsing. define the struct, write a mapping function, and baby -- you got a stew goin'.
parent
b6d9f3e9fd
commit
11d3ca3e6d
@ -1,5 +0,0 @@
|
||||
package drive
|
||||
|
||||
func Test() {
|
||||
|
||||
}
|
||||
@ -0,0 +1,62 @@
|
||||
package mercuryUtil
|
||||
|
||||
import "strconv"
|
||||
|
||||
type (
|
||||
CsvConvertable interface {
|
||||
Set(header string, content string) error
|
||||
}
|
||||
|
||||
CsvWorker[K CsvConvertable] struct {
|
||||
Filepath string
|
||||
Artifacts *[]K
|
||||
headerInFirstRow bool
|
||||
MakeNew func() K
|
||||
}
|
||||
)
|
||||
|
||||
func NewCsvWorker[K CsvConvertable](pathlike string, container []K, factory func() K, headerInFirstRow bool) (*CsvWorker[K], error) {
|
||||
worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow}
|
||||
worker.MakeNew = factory
|
||||
worker.Artifacts = &container
|
||||
err := worker.process()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &worker, nil
|
||||
}
|
||||
|
||||
func (w *CsvWorker[K]) process() error {
|
||||
data := LoadCsv(w.Filepath)
|
||||
var headers []string
|
||||
var startingIdx int
|
||||
|
||||
//intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
|
||||
if w.headerInFirstRow {
|
||||
headers = (*data)[0]
|
||||
startingIdx = 1
|
||||
} else {
|
||||
l := len((*data)[0])
|
||||
headers = make([]string, l, l)
|
||||
for i := 0; i < l; i++ {
|
||||
headers[i] = strconv.Itoa(i)
|
||||
}
|
||||
startingIdx = 0
|
||||
}
|
||||
|
||||
for rowIdx, row := range *data {
|
||||
if rowIdx < startingIdx {
|
||||
continue
|
||||
}
|
||||
obj := w.MakeNew()
|
||||
for i, cell := range row {
|
||||
header := headers[i]
|
||||
err := obj.Set(header, cell)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
*w.Artifacts = append(*w.Artifacts, obj)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -0,0 +1,117 @@
|
||||
package mercuryUtil
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type (
|
||||
MigrationRule struct {
|
||||
ColumnName string
|
||||
MappingFunction func(string) string
|
||||
}
|
||||
|
||||
MigrationWorker struct {
|
||||
FirstRowIsHeader bool
|
||||
ubiRules []MigrationRule //ubiquitous rules
|
||||
headerRules []MigrationRule
|
||||
rules []MigrationRule
|
||||
}
|
||||
)
|
||||
|
||||
func NewMigrationWorker() *MigrationWorker {
|
||||
rules := make([]MigrationRule, 0, 100)
|
||||
headerRules := make([]MigrationRule, 0, 100)
|
||||
worker := MigrationWorker{FirstRowIsHeader: false, headerRules: headerRules, rules: rules}
|
||||
|
||||
//this default rule will remove the web-based zero-width nonblocking space character from everything
|
||||
ufeffRule := MigrationRule{ColumnName: "*", MappingFunction: func(s string) string {
|
||||
var blank []byte
|
||||
bbuff := []byte(s)
|
||||
bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
|
||||
//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
|
||||
bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)
|
||||
|
||||
return string(bbuff)
|
||||
}}
|
||||
|
||||
worker.ubiRules = append(worker.ubiRules, ufeffRule)
|
||||
worker.AddHeaderRule(&ufeffRule)
|
||||
|
||||
return &worker
|
||||
}
|
||||
|
||||
func MigrateCSVs(sourceDir string, targetDir string) error {
|
||||
worker := NewMigrationWorker()
|
||||
|
||||
worker.AddHeaderRule(
|
||||
&MigrationRule{
|
||||
"Badge #",
|
||||
func(s string) string { return "EEID" },
|
||||
},
|
||||
)
|
||||
|
||||
files, err := GetAllFilesInDir(sourceDir)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, file := range *files {
|
||||
table := LoadCsv(file)
|
||||
headerRow := (*table)[0]
|
||||
headers := make([]string, len(headerRow), len(headerRow))
|
||||
for i, header := range headerRow {
|
||||
prepassRules := worker.ubiRules
|
||||
v := header
|
||||
for _, rule := range prepassRules {
|
||||
v = rule.MappingFunction(v)
|
||||
}
|
||||
|
||||
formattingRules := worker.getHeaderRulesByHeader(v)
|
||||
for _, rule := range *formattingRules {
|
||||
v = rule.MappingFunction(v)
|
||||
}
|
||||
headers[i] = v
|
||||
}
|
||||
fmt.Printf("%s\t%#v\n", file, headers)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *MigrationWorker) AddRule(m *MigrationRule) {
|
||||
w.rules = append(w.rules, *m)
|
||||
}
|
||||
func (w *MigrationWorker) AddHeaderRule(m *MigrationRule) {
|
||||
w.headerRules = append(w.headerRules, *m)
|
||||
}
|
||||
|
||||
func (w *MigrationWorker) getRulesByHeader(header string) *[]*MigrationRule {
|
||||
ret := make([]*MigrationRule, 0, 10)
|
||||
|
||||
for _, rule := range w.rules {
|
||||
if rule.ColumnName == "*" || rule.ColumnName == header {
|
||||
ret = append(ret, &rule)
|
||||
}
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (w *MigrationWorker) getHeaderRulesByHeader(header string) *[]*MigrationRule {
|
||||
ret := make([]*MigrationRule, 0, 10)
|
||||
|
||||
for _, rule := range w.headerRules {
|
||||
if rule.ColumnName == "*" || rule.ColumnName == header {
|
||||
lRule := rule
|
||||
ret = append(ret, &lRule)
|
||||
}
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (w *MigrationWorker) ProcessRecords(records *[][]string) {
|
||||
|
||||
}
|
||||
@ -0,0 +1,54 @@
|
||||
package mercuryUtil
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
)
|
||||
|
||||
func GetAllFilesInDir(pathlikeBase string) (*[]string, error) {
|
||||
listing, err := os.ReadDir(pathlikeBase)
|
||||
res := make([]string, 0, 300)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, list := range listing {
|
||||
if list.IsDir() || path.Ext(list.Name()) != ".csv" {
|
||||
fmt.Printf("Skipping: %s\n", list.Name())
|
||||
continue
|
||||
} else {
|
||||
res = append(res, path.Join(pathlikeBase, list.Name()))
|
||||
}
|
||||
}
|
||||
return &res, nil
|
||||
}
|
||||
|
||||
func CopyFile(inPath string, outpath string) error {
|
||||
b, err := os.ReadFile(inPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = os.WriteFile(outpath, b, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func LoadCsv(pathlike string) *[][]string {
|
||||
f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
reader := csv.NewReader(f)
|
||||
|
||||
records, err := reader.ReadAll()
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return &records
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
DROP TABLE IF EXISTS mercury.hr_timesheet_directory;
|
||||
|
||||
CREATE TABLE mercury.hr_timesheet_directory
|
||||
(
|
||||
PayGroup VARCHAR(150),
|
||||
LName VARCHAR(150),
|
||||
FName VARCHAR(150),
|
||||
EEId INT,
|
||||
HomeDept VARCHAR(150),
|
||||
ManagerName VARCHAR(150)
|
||||
);
|
||||
|
||||
|
||||
@ -0,0 +1,2 @@
|
||||
INSERT INTO mercury.hr_timesheet_directory (PayGroup, LName, FName, EEId, HomeDept, ManagerName)
|
||||
VALUES %s;
|
||||
@ -1,9 +1,76 @@
|
||||
package main
|
||||
|
||||
import "mercury/src/hr"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"mercury/src/mercuryUtil"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
reportBase := "/home/dtookey/work/clarity-reporting/paycor/"
|
||||
hr.LoadReports(reportBase)
|
||||
test()
|
||||
}
|
||||
|
||||
type CCStatement struct {
|
||||
TxnDate string
|
||||
PostedDate string
|
||||
CardNo int
|
||||
Description string
|
||||
Category string
|
||||
Debit float64
|
||||
Account string
|
||||
Department string
|
||||
Notes string
|
||||
}
|
||||
|
||||
func (c *CCStatement) Set(header string, content string) error {
|
||||
switch header {
|
||||
case "Transaction Date":
|
||||
c.TxnDate = content
|
||||
case "Posted Date":
|
||||
c.PostedDate = content
|
||||
case "Card No.":
|
||||
i, err := strconv.Atoi(content)
|
||||
if err != nil {
|
||||
c.CardNo = -1
|
||||
return err
|
||||
}
|
||||
c.CardNo = i
|
||||
case "Description":
|
||||
c.Description = content
|
||||
case "Category":
|
||||
c.Category = content
|
||||
case "Debit":
|
||||
f, err := strconv.ParseFloat(content, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Debit = f
|
||||
case "Account":
|
||||
c.Account = content
|
||||
case "Department":
|
||||
c.Department = content
|
||||
case "Notes":
|
||||
c.Notes = content
|
||||
default:
|
||||
return errors.New("could not find header: '" + header + "'")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func test() {
|
||||
path := "/home/dtookey/work/dde-expense/Credit Card coding 0122 Tookey.csv"
|
||||
artifacts := make([]*CCStatement, 0, 5000)
|
||||
worker, err := mercuryUtil.NewCsvWorker[*CCStatement](
|
||||
path,
|
||||
artifacts,
|
||||
func() *CCStatement { return &CCStatement{} },
|
||||
true,
|
||||
)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for _, val := range *worker.Artifacts {
|
||||
fmt.Printf("%#v\n", val)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue