|
|
|
|
@ -3,6 +3,7 @@ package mercuryUtil
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"errors"
|
|
|
|
|
"fmt"
|
|
|
|
|
"os"
|
|
|
|
|
"strconv"
|
|
|
|
|
"strings"
|
|
|
|
|
@ -13,6 +14,7 @@ type (
|
|
|
|
|
Set(header string, content string) error
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//CsvWorker DO NOT INITIALIZE THIS ON YOUR OWN UNLESS YOU KNOW EXACTLY WHAT YOU ARE DOING. Use NewCsvWorker instead
|
|
|
|
|
CsvWorker[K CsvConvertable] struct {
|
|
|
|
|
Filepath string
|
|
|
|
|
cachedData *[][]string
|
|
|
|
|
@ -20,6 +22,8 @@ type (
|
|
|
|
|
MakeNew func() K
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Deprecated: SanitationRule this is used in one spot and one spot only. Don't use it unless you know exactly what
|
|
|
|
|
// you're doing
|
|
|
|
|
SanitationRule struct {
|
|
|
|
|
ColumnName string
|
|
|
|
|
MappingFunction func(string) string
|
|
|
|
|
@ -27,17 +31,27 @@ type (
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
//UfeffRule left over from a prior attempt. however, it encapsulates logic that is useful for sanitizing output from
|
|
|
|
|
//Paycor.
|
|
|
|
|
UfeffRule = SanitationRule{ColumnName: "*", MappingFunction: func(s string) string {
|
|
|
|
|
var blank []byte
|
|
|
|
|
bbuff := []byte(s)
|
|
|
|
|
bbuff = bytes.ReplaceAll(bbuff, []byte("\ufeff"), blank)
|
|
|
|
|
//I have no idea what 0xef, 0xbb, 0xbf is, but it's fucking things up
|
|
|
|
|
//I have no idea what 0xef, 0xbb, 0xbf is, but it's f---ing things up
|
|
|
|
|
bbuff = bytes.ReplaceAll(bbuff, []byte{0xef, 0xbb, 0xbf}, blank)
|
|
|
|
|
|
|
|
|
|
return string(bbuff)
|
|
|
|
|
}}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// NewCsvWorker This absolute mad-lad of a struct will save your bacon. It takes in a path to a csv report, a constructor
|
|
|
|
|
// function to allocate individual structs (handling allocation is a potential area for improvement), and a boolean
|
|
|
|
|
//determining whether the first row is headers
|
|
|
|
|
// pathlike: string regular filepath to a specific csv file "/home/user/work/report.csv"
|
|
|
|
|
// factory: func() K a function to use for initializing new structs
|
|
|
|
|
// headerInFirstRow: boolean value for whether to use the first row as header values. If this is set to false, the
|
|
|
|
|
// Set(header, content) interface will provide 0..len(headers) as a string value to determine which column is being
|
|
|
|
|
// used
|
|
|
|
|
func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInFirstRow bool) *CsvWorker[K] {
|
|
|
|
|
worker := CsvWorker[K]{Filepath: pathlike, headerInFirstRow: headerInFirstRow}
|
|
|
|
|
worker.MakeNew = factory
|
|
|
|
|
@ -45,6 +59,8 @@ func NewCsvWorker[K CsvConvertable](pathlike string, factory func() K, headerInF
|
|
|
|
|
return &worker
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ensureData: This will memoize the structured string data from the csv library workers. This is not a pure function --
|
|
|
|
|
// it will mutate the CsvWorker struct when called
|
|
|
|
|
func (w *CsvWorker[K]) ensureData() error {
|
|
|
|
|
if w.cachedData == nil {
|
|
|
|
|
data, err := LoadCsv(w.Filepath)
|
|
|
|
|
@ -56,6 +72,9 @@ func (w *CsvWorker[K]) ensureData() error {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GetHeaders will read the *entire* csv into memory and memoize the results in the CsvWorker struct. The default behavior
|
|
|
|
|
// will use [UfeffRule] to sanitize headers, so watch out if you're using zero-width non-blocking webfont spaces in your
|
|
|
|
|
// sourcecode
|
|
|
|
|
func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
|
|
|
|
|
err := w.ensureData()
|
|
|
|
|
if err != nil {
|
|
|
|
|
@ -68,6 +87,11 @@ func (w *CsvWorker[K]) GetHeaders() ([]string, error) {
|
|
|
|
|
return (*w.cachedData)[0], nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Process will take
|
|
|
|
|
// artifacts: *[]K this container is used to store the results. Part of the reason for this is that resource allocation
|
|
|
|
|
// in generic functions is a bit more complex than I can handle with my current grasp of generic functions. This procedure
|
|
|
|
|
// will return an error if and only if ensureData() fails. Otherwise, it will panic to preserve the stack trace to aid
|
|
|
|
|
// in debugging.
|
|
|
|
|
func (w *CsvWorker[K]) Process(artifacts *[]K) error {
|
|
|
|
|
err := w.ensureData()
|
|
|
|
|
if err != nil {
|
|
|
|
|
@ -76,7 +100,7 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
|
|
|
|
|
var headers []string
|
|
|
|
|
var startingIdx int
|
|
|
|
|
|
|
|
|
|
//intialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
|
|
|
|
|
//initialize headers. if we don't get strings, we'll generate a set of strings numbered 0..len(data[0])
|
|
|
|
|
data := *w.cachedData
|
|
|
|
|
if w.headerInFirstRow {
|
|
|
|
|
headers = data[0]
|
|
|
|
|
@ -102,7 +126,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
|
|
|
|
|
header := headers[i]
|
|
|
|
|
err := obj.Set(header, cell)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
fmt.Println("[Error] in " + w.Filepath)
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*artifacts = append(*artifacts, obj)
|
|
|
|
|
@ -110,6 +135,8 @@ func (w *CsvWorker[K]) Process(artifacts *[]K) error {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// isEmptyRow: This procedure is a bit piggy. It naively scans the provided array and returns true iff every value returns
|
|
|
|
|
// len(0)
|
|
|
|
|
func isEmptyRow(arr []string) bool {
|
|
|
|
|
for _, v := range arr {
|
|
|
|
|
if len(v) > 0 {
|
|
|
|
|
@ -119,6 +146,9 @@ func isEmptyRow(arr []string) bool {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Deprecated: GetHeadersFromCsvFilepath this reads exactly 1024 bytes into the file to attempt to find the first newline
|
|
|
|
|
// character. once it does, it calls a hard strings.Split(row, ",") to get the headers into an array. It was a quick
|
|
|
|
|
// hack that should not be used. OR USED AT YOUR OWN RISK DON'T SAY I DIDN'T WARN YOU
|
|
|
|
|
func GetHeadersFromCsvFilepath(pathlike string) ([]string, error) {
|
|
|
|
|
//this is unsafe, as it won't handle escape characters for commas in headers, but we'll burn that bridge later
|
|
|
|
|
f, err := os.OpenFile(pathlike, os.O_RDONLY, 0755)
|
|
|
|
|
|