diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index b79a28a..d2d5d2f 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -131,11 +131,15 @@ func ScrapeCometCalendar(outDir string) { log.Printf("Finished scraping %d events successfully!\n\n", len(calendarEvents)) } -// callAndUnmarshal fetches a calendar page and decodes it into data. +// callAndUnmarshal fetches a calendar page from the production API and decodes it into data. func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { - // Call API to get the byte data - calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", COMET_CALENDAR_URL, page) - request, err := http.NewRequest("GET", calendarUrl, nil) + return callAndUnmarshalFromURL(client, COMET_CALENDAR_URL, page, data) +} + +// callAndUnmarshalFromURL fetches a calendar page from baseURL and decodes it into data. +func callAndUnmarshalFromURL(client *http.Client, baseURL string, page int, data *APICalendarResponse) error { + calendarURL := fmt.Sprintf("%s?days=365&pp=100&page=%d", baseURL, page) + request, err := http.NewRequest("GET", calendarURL, nil) if err != nil { return err } @@ -153,7 +157,6 @@ func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) } defer response.Body.Close() - // Unmarshal bytes to the response data buffer := bytes.Buffer{} if _, err = buffer.ReadFrom(response.Body); err != nil { return err diff --git a/scrapers/cometCalendar_test.go b/scrapers/cometCalendar_test.go new file mode 100644 index 0000000..f67de25 --- /dev/null +++ b/scrapers/cometCalendar_test.go @@ -0,0 +1,269 @@ +package scrapers + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + "time" +) + +func TestCallAndUnmarshalFromURL_Success(t *testing.T) { + t.Parallel() + + payload, err := os.ReadFile(filepath.Join("testdata", "cometCalendar", "page0.json")) + if err != nil { + t.Fatalf("failed to load fixture: %v", err) + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if got := r.URL.Query().Get("days"); got != "365" { + t.Errorf("expected query days=365, got %q", got) + } + if got := r.URL.Query().Get("pp"); got != "100" { + t.Errorf("expected query pp=100, got %q", got) + } + if got := r.URL.Query().Get("page"); got != "2" { + t.Errorf("expected query page=2, got %q", got) + } + if got := r.Header.Get("Accept"); got != "application/json" { + t.Errorf("expected Accept header application/json, got %q", got) + } + if got := r.Header.Get("Content-type"); got != "application/json" { + t.Errorf("expected Content-type header application/json, got %q", got) + } + + w.Header().Set("Content-Type", "application/json") + if _, err := w.Write(payload); err != nil { + t.Errorf("failed to write fixture response: %v", err) + } + })) + defer server.Close() + + client := http.Client{Timeout: 2 * time.Second} + var calendarData APICalendarResponse + + if err := callAndUnmarshalFromURL(&client, server.URL, 2, &calendarData); err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if got := calendarData.Page["total"]; got != 3 { + t.Fatalf("expected page.total=3, got %d", got) + } + + if len(calendarData.Events) != 1 { + t.Fatalf("expected 1 event, got %d", len(calendarData.Events)) + } + + event := calendarData.Events[0].Event + if event.Title != "Nebula Testing Workshop" { + t.Errorf("expected title %q, got %q", "Nebula Testing Workshop", event.Title) + } + if event.Custom_fields.Contact_information_email != "team@utdnebula.com" { + t.Errorf("expected contact email %q, got %q", "team@utdnebula.com", event.Custom_fields.Contact_information_email) + } +} + +func TestCallAndUnmarshalFromURL_EmptyPayload(t *testing.T) { + t.Parallel() + + payload, err := os.ReadFile(filepath.Join("testdata", "cometCalendar", "page-empty.json")) + if err != nil { + t.Fatalf("failed to load empty fixture: %v", err) + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + if _, err := w.Write(payload); err != nil { + t.Fatalf("failed to write fixture response: %v", err) + } + })) + defer server.Close() + + client := http.Client{Timeout: 2 * time.Second} + var calendarData APICalendarResponse + + if err := callAndUnmarshalFromURL(&client, server.URL, 1, &calendarData); err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got := len(calendarData.Events); got != 0 { + t.Fatalf("expected 0 events, got %d", got) + } + if got := calendarData.Page["total"]; got != 0 { + t.Fatalf("expected page.total=0, got %d", got) + } +} + +func TestCallAndUnmarshalFromURL_Non200(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + })) + defer server.Close() + + client := http.Client{Timeout: 2 * time.Second} + var calendarData APICalendarResponse + + if err := callAndUnmarshalFromURL(&client, server.URL, 1, &calendarData); err == nil { + t.Fatal("expected an error for non-200 response, got nil") + } +} + +func TestCallAndUnmarshalFromURL_InvalidJSON(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + if _, err := w.Write([]byte(`{"events":[`)); err != nil { + t.Fatalf("failed to write invalid response: %v", err) + } + })) + defer server.Close() + + client := http.Client{Timeout: 2 * time.Second} + var calendarData APICalendarResponse + + if err := callAndUnmarshalFromURL(&client, server.URL, 1, &calendarData); err == nil { + t.Fatal("expected json unmarshal error, got nil") + } +} + +func TestGetTime(t *testing.T) { + t.Parallel() + + testCases := map[string]struct { + Start string + End string + Err bool + }{ + "start_and_end": { + Start: "2026-03-01T12:00:00-06:00", + End: "2026-03-01T13:30:00-06:00", + }, + "missing_end_uses_start": { + Start: "2026-03-01T12:00:00-06:00", + End: "", + }, + "invalid_start": { + Start: "not-a-time", + End: "2026-03-01T13:30:00-06:00", + Err: true, + }, + "invalid_end": { + Start: "2026-03-01T12:00:00-06:00", + End: "still-not-a-time", + Err: true, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + t.Parallel() + + event := eventWithTimes(testCase.Start, testCase.End) + start, end, err := getTime(event) + + if testCase.Err { + if err == nil { + t.Fatal("expected error, got nil") + } + return + } + + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + expectedStart, _ := time.Parse(time.RFC3339, testCase.Start) + if !start.Equal(expectedStart) { + t.Errorf("unexpected start time: got %v, expected %v", start, expectedStart) + } + + expectedEnd := expectedStart + if testCase.End != "" { + expectedEnd, _ = time.Parse(time.RFC3339, testCase.End) + } + if !end.Equal(expectedEnd) { + t.Errorf("unexpected end time: got %v, expected %v", end, expectedEnd) + } + }) + } +} + +func TestGetEventLocation(t *testing.T) { + t.Parallel() + + testCases := map[string]struct { + Event Event + Expected string + }{ + "building_and_room": { + Event: Event{Location_name: "SSA", Room_number: "2.406"}, + Expected: "SSA, 2.406", + }, + "building_only": { + Event: Event{Location_name: "SSA", Room_number: ""}, + Expected: "SSA", + }, + "room_only": { + Event: Event{Location_name: "", Room_number: "2.406"}, + Expected: "2.406", + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + t.Parallel() + + if got := getEventLocation(testCase.Event); got != testCase.Expected { + t.Errorf("expected %q, got %q", testCase.Expected, got) + } + }) + } +} + +func TestGetFiltersAndDepartments(t *testing.T) { + t.Parallel() + + event := Event{ + Filters: Filters{ + Event_types: []FilterMap{{Name: "Workshop"}, {Name: "Networking"}}, + Event_target_audience: []FilterMap{{Name: "Students"}}, + Event_topic: []FilterMap{{Name: "Technology"}, {Name: "Career"}}, + }, + Departments: []FilterMap{{Name: "Engineering"}, {Name: "Career Center"}}, + } + + types, audiences, topics := getFilters(event) + if len(types) != 2 || types[0] != "Workshop" || types[1] != "Networking" { + t.Errorf("unexpected event types: %v", types) + } + if len(audiences) != 1 || audiences[0] != "Students" { + t.Errorf("unexpected audiences: %v", audiences) + } + if len(topics) != 2 || topics[0] != "Technology" || topics[1] != "Career" { + t.Errorf("unexpected topics: %v", topics) + } + + departments := getDepartments(event) + if len(departments) != 2 || departments[0] != "Engineering" || departments[1] != "Career Center" { + t.Errorf("unexpected departments: %v", departments) + } +} + +func eventWithTimes(start string, end string) Event { + return Event{ + Event_instances: []struct { + Event_instance EventInstance `json:"event_instance"` + }{ + { + Event_instance: EventInstance{ + Start: start, + End: end, + }, + }, + }, + } +} diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index 4f6119c..1f0c0c2 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -268,17 +268,25 @@ func (s *coursebookScraper) getSectionIdsForPrefix(prefix string) ([]string, err if err != nil { return nil, fmt.Errorf("failed to fetch sections: %s", err) } - sectionRegexp := utils.Regexpf(`View details for section (%s%s\.\w+\.%s)`, prefix[3:], utils.R_COURSE_CODE, utils.R_TERM_CODE) - matches := sectionRegexp.FindAllStringSubmatch(content, -1) - for _, match := range matches { - sections = append(sections, match[1]) - } + sections = append(sections, extractSectionIDs(prefix, content)...) } s.prefixIdsCache[prefix] = sections return sections, nil } +// extractSectionIDs parses search response content and returns all matched section IDs. +func extractSectionIDs(prefix string, content string) []string { + sectionRegexp := utils.Regexpf(`View details for section (%s%s\.\w+\.%s)`, prefix[3:], utils.R_COURSE_CODE, utils.R_TERM_CODE) + matches := sectionRegexp.FindAllStringSubmatch(content, -1) + + sections := make([]string, 0, len(matches)) + for _, match := range matches { + sections = append(sections, match[1]) + } + return sections +} + // req utility function for making calling the coursebook api func (s *coursebookScraper) req(queryStr string, retries int, reqName string) (string, error) { var res *http.Response diff --git a/scrapers/coursebook_test.go b/scrapers/coursebook_test.go new file mode 100644 index 0000000..8de371a --- /dev/null +++ b/scrapers/coursebook_test.go @@ -0,0 +1,92 @@ +package scrapers + +import ( + "os" + "path/filepath" + "reflect" + "testing" +) + +func TestExtractSectionIDs_FromFixture(t *testing.T) { + t.Parallel() + + content, err := os.ReadFile(filepath.Join("testdata", "coursebook", "search-results-sample.html")) + if err != nil { + t.Fatalf("failed to load fixture: %v", err) + } + + ids := extractSectionIDs("cp_acct", string(content)) + expected := []string{ + "acct2301.001.25S", + "acct2301.002.25S", + "acct6v01.0W1.25S", + } + + if !reflect.DeepEqual(expected, ids) { + t.Errorf("unexpected ids. expected %v, got %v", expected, ids) + } +} + +func TestExtractSectionIDs_NoMatches(t *testing.T) { + t.Parallel() + + content := `