package utils import ( "fmt" "io" "mime/multipart" "regexp" "strconv" "strings" "github.com/samber/lo" ) type Shift struct { EarningCode string `json:"earning_code"` Description string `json:"description"` Hours float64 `json:"hours"` } func (s Shift) CountsAsWorked() bool { return strings.ToLower(s.EarningCode) == "reg" } type Employee struct { Name string `json:"name"` Shifts []Shift `json:"shifts"` } func (e Employee) Worked() bool { return lo.ContainsBy(e.Shifts, func(s Shift) bool { return s.CountsAsWorked() }) } func ParseUploadedXLSFile(file *multipart.FileHeader) ([]Employee, error) { f, err := file.Open() if err != nil { return nil, fmt.Errorf("failed to open XLS file: %w", err) } defer f.Close() content, err := io.ReadAll(f) if err != nil { return nil, fmt.Errorf("failed to read XLS file: %w", err) } return ParseXLSContent(content) } // ParseXLSContent parses the HTML content of an XLS file and returns a slice // of employees with their shifts. The XLS files are HTML tables where: // - Column A: employee name (td with class="smallbold" and valign=top) // - Column B: inner table containing shift rows // - Within the inner table, rows with colspan are category headers (skipped) // - Shift rows have: spacer td, earning code/description td (class="smalltext"), // and hours td func ParseXLSContent(content []byte) ([]Employee, error) { html := string(content) // Split by employee rows in the outer table. Each employee row contains // a name cell (column A) followed by a cell with an inner table (column B). employeePattern := regexp.MustCompile( `([^<]+)\s*(.*?)`, ) employeeMatches := employeePattern.FindAllStringSubmatch(html, -1) // Pattern for shift data rows: spacer td + earning code td + hours td // These are rows where column C has class="smalltext" (not a colspan header) shiftPattern := regexp.MustCompile( `([^<]+)]*>([^<]+)`, ) employees := lo.Map(employeeMatches, func(empMatch []string, _ int) Employee { name := strings.TrimSpace(empMatch[1]) innerTable := empMatch[2] shiftMatches := shiftPattern.FindAllStringSubmatch(innerTable, -1) shifts := lo.FilterMap(shiftMatches, func(sm []string, _ int) (Shift, bool) { codeAndDesc := strings.TrimSpace(sm[1]) hoursStr := strings.TrimSpace(sm[2]) parts := strings.SplitN(codeAndDesc, " - ", 2) if len(parts) != 2 { return Shift{}, false } hours, err := strconv.ParseFloat(hoursStr, 64) if err != nil { return Shift{}, false } return Shift{ EarningCode: strings.TrimSpace(parts[0]), Description: strings.TrimSpace(parts[1]), Hours: hours, }, true }) return Employee{ Name: name, Shifts: shifts, } }) return employees, nil }