This commit is contained in:
105
internal/utils/parse_xls_file.go
Normal file
105
internal/utils/parse_xls_file.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/samber/lo"
|
||||
)
|
||||
|
||||
type Shift struct {
|
||||
EarningCode string `json:"earning_code"`
|
||||
Description string `json:"description"`
|
||||
Hours float64 `json:"hours"`
|
||||
}
|
||||
|
||||
func (s Shift) CountsAsWorked() bool {
|
||||
return strings.ToLower(s.EarningCode) == "reg"
|
||||
}
|
||||
|
||||
type Employee struct {
|
||||
Name string `json:"name"`
|
||||
Shifts []Shift `json:"shifts"`
|
||||
}
|
||||
|
||||
func (e Employee) Worked() bool {
|
||||
return lo.ContainsBy(e.Shifts, func(s Shift) bool { return s.CountsAsWorked() })
|
||||
}
|
||||
|
||||
func ParseUploadedXLSFile(file *multipart.FileHeader) ([]Employee, error) {
|
||||
f, err := file.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open XLS file: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
content, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read XLS file: %w", err)
|
||||
}
|
||||
|
||||
return ParseXLSContent(content)
|
||||
}
|
||||
|
||||
// ParseXLSContent parses the HTML content of an XLS file and returns a slice
|
||||
// of employees with their shifts. The XLS files are HTML tables where:
|
||||
// - Column A: employee name (td with class="smallbold" and valign=top)
|
||||
// - Column B: inner table containing shift rows
|
||||
// - Within the inner table, rows with colspan are category headers (skipped)
|
||||
// - Shift rows have: spacer td, earning code/description td (class="smalltext"),
|
||||
// and hours td
|
||||
func ParseXLSContent(content []byte) ([]Employee, error) {
|
||||
html := string(content)
|
||||
|
||||
// Split by employee rows in the outer table. Each employee row contains
|
||||
// a name cell (column A) followed by a cell with an inner table (column B).
|
||||
employeePattern := regexp.MustCompile(
|
||||
`<td\s+valign=top\s+class="smallbold">([^<]+)</td>\s*<td>(.*?)</table></td>`,
|
||||
)
|
||||
employeeMatches := employeePattern.FindAllStringSubmatch(html, -1)
|
||||
|
||||
// Pattern for shift data rows: spacer td + earning code td + hours td
|
||||
// These are rows where column C has class="smalltext" (not a colspan header)
|
||||
shiftPattern := regexp.MustCompile(
|
||||
`<tr><td width=25></td><td class=smalltext>([^<]+)</td><td[^>]*>([^<]+)</td></tr>`,
|
||||
)
|
||||
|
||||
employees := lo.Map(employeeMatches, func(empMatch []string, _ int) Employee {
|
||||
name := strings.TrimSpace(empMatch[1])
|
||||
innerTable := empMatch[2]
|
||||
|
||||
shiftMatches := shiftPattern.FindAllStringSubmatch(innerTable, -1)
|
||||
|
||||
shifts := lo.FilterMap(shiftMatches, func(sm []string, _ int) (Shift, bool) {
|
||||
codeAndDesc := strings.TrimSpace(sm[1])
|
||||
hoursStr := strings.TrimSpace(sm[2])
|
||||
|
||||
parts := strings.SplitN(codeAndDesc, " - ", 2)
|
||||
if len(parts) != 2 {
|
||||
return Shift{}, false
|
||||
}
|
||||
|
||||
hours, err := strconv.ParseFloat(hoursStr, 64)
|
||||
if err != nil {
|
||||
return Shift{}, false
|
||||
}
|
||||
|
||||
return Shift{
|
||||
EarningCode: strings.TrimSpace(parts[0]),
|
||||
Description: strings.TrimSpace(parts[1]),
|
||||
Hours: hours,
|
||||
}, true
|
||||
})
|
||||
|
||||
return Employee{
|
||||
Name: name,
|
||||
Shifts: shifts,
|
||||
}
|
||||
})
|
||||
|
||||
return employees, nil
|
||||
}
|
||||
Reference in New Issue
Block a user