Files
calculate_negative_points/internal/utils/parse_xls_file.go
Eugene Howe b0957bfa49
Some checks failed
Docker Build and Publish / publish (push) Failing after 1m33s
webapp
2026-02-17 09:47:30 -05:00

106 lines
2.8 KiB
Go

package utils
import (
"fmt"
"io"
"mime/multipart"
"regexp"
"strconv"
"strings"
"github.com/samber/lo"
)
type Shift struct {
EarningCode string `json:"earning_code"`
Description string `json:"description"`
Hours float64 `json:"hours"`
}
func (s Shift) CountsAsWorked() bool {
return strings.ToLower(s.EarningCode) == "reg"
}
type Employee struct {
Name string `json:"name"`
Shifts []Shift `json:"shifts"`
}
func (e Employee) Worked() bool {
return lo.ContainsBy(e.Shifts, func(s Shift) bool { return s.CountsAsWorked() })
}
func ParseUploadedXLSFile(file *multipart.FileHeader) ([]Employee, error) {
f, err := file.Open()
if err != nil {
return nil, fmt.Errorf("failed to open XLS file: %w", err)
}
defer f.Close()
content, err := io.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("failed to read XLS file: %w", err)
}
return ParseXLSContent(content)
}
// ParseXLSContent parses the HTML content of an XLS file and returns a slice
// of employees with their shifts. The XLS files are HTML tables where:
// - Column A: employee name (td with class="smallbold" and valign=top)
// - Column B: inner table containing shift rows
// - Within the inner table, rows with colspan are category headers (skipped)
// - Shift rows have: spacer td, earning code/description td (class="smalltext"),
// and hours td
func ParseXLSContent(content []byte) ([]Employee, error) {
html := string(content)
// Split by employee rows in the outer table. Each employee row contains
// a name cell (column A) followed by a cell with an inner table (column B).
employeePattern := regexp.MustCompile(
`<td\s+valign=top\s+class="smallbold">([^<]+)</td>\s*<td>(.*?)</table></td>`,
)
employeeMatches := employeePattern.FindAllStringSubmatch(html, -1)
// Pattern for shift data rows: spacer td + earning code td + hours td
// These are rows where column C has class="smalltext" (not a colspan header)
shiftPattern := regexp.MustCompile(
`<tr><td width=25></td><td class=smalltext>([^<]+)</td><td[^>]*>([^<]+)</td></tr>`,
)
employees := lo.Map(employeeMatches, func(empMatch []string, _ int) Employee {
name := strings.TrimSpace(empMatch[1])
innerTable := empMatch[2]
shiftMatches := shiftPattern.FindAllStringSubmatch(innerTable, -1)
shifts := lo.FilterMap(shiftMatches, func(sm []string, _ int) (Shift, bool) {
codeAndDesc := strings.TrimSpace(sm[1])
hoursStr := strings.TrimSpace(sm[2])
parts := strings.SplitN(codeAndDesc, " - ", 2)
if len(parts) != 2 {
return Shift{}, false
}
hours, err := strconv.ParseFloat(hoursStr, 64)
if err != nil {
return Shift{}, false
}
return Shift{
EarningCode: strings.TrimSpace(parts[0]),
Description: strings.TrimSpace(parts[1]),
Hours: hours,
}, true
})
return Employee{
Name: name,
Shifts: shifts,
}
})
return employees, nil
}