diff --git a/main.go b/main.go index 2af752d..29f6603 100644 --- a/main.go +++ b/main.go @@ -49,7 +49,7 @@ func main() { // Flag for academic calendar scraping academicCalendars := flag.Bool("academicCalendars", false, "Alongside -scrape, -parse, or -upload, signifies that the academic calendars should be scraped/parsed/uploaded.") // Flag for degree scraping and parsing - degrees := flag.Bool("degrees", false, "Alongside -scrape or -parse, signifies that the degrees should be scraped/parsed.") + degrees := flag.Bool("degrees", false, "Alongside -scrape, -parse, or -upload. Signifies that the degrees should be scraped/parsed/uploaded.") // Flags for parsing parse := flag.Bool("parse", false, "Puts the tool into parsing mode.") @@ -154,6 +154,8 @@ func main() { uploader.UploadAcademicCalendars(*inDir) case *scrapeDiscounts: uploader.UploadDiscounts(*inDir) + case *degrees: + uploader.UploadDegrees(*inDir) default: uploader.Upload(*inDir, *replace, *staticOnly) } diff --git a/parser/degreeParser.go b/parser/degreeParser.go index 2ecb83a..82a769b 100644 --- a/parser/degreeParser.go +++ b/parser/degreeParser.go @@ -49,7 +49,10 @@ func ParseDegrees(inDir string, outDir string) { utils.VPrintf("Extracted %d programs", len(allPrograms)) // Write to output file - utils.WriteJSON(filepath.Join(outDir, "degrees.json"), allPrograms) + err = utils.WriteJSON(filepath.Join(outDir, "degrees.json"), allPrograms) + if err != nil { + log.Fatal("Failed to upload json") + } utils.VPrintf("Successfully wrote degrees to %s/degrees.json", outDir) } @@ -103,11 +106,10 @@ func extractProgram(selection *goquery.Selection, programs *[]schema.AcademicPro areasOfInterest := selection.Find("div.areas_of_interest.d-none").First() newProgram := schema.AcademicProgram{ - Title: strings.TrimSpace(title.Text()), - School: strings.TrimSpace(school.Text()), - DegreeOptions: degrees, - // Normalize to lowercase and split comma-separated values - AreasOfInterest: strings.Split(strings.TrimSpace(strings.ToLower(areasOfInterest.Text())), ", "), + Title: strings.TrimSpace(title.Text()), + School: strings.TrimSpace(school.Text()), + DegreeOptions: degrees, + AreasOfInterest: parseAreasOfInterest(areasOfInterest.Text()), } utils.VPrintf(" Areas of interest: %d topics", len(newProgram.AreasOfInterest)) @@ -132,3 +134,11 @@ func generateAllCombinations() []string { return combinations } + +func parseAreasOfInterest(areasOfInterest string) []string { + trimmed := strings.TrimSpace(areasOfInterest) + if trimmed == "" { + return []string{} + } + return strings.Split(trimmed, ", ") +} diff --git a/uploader/degreesUploader.go b/uploader/degreesUploader.go new file mode 100644 index 0000000..019b8ea --- /dev/null +++ b/uploader/degreesUploader.go @@ -0,0 +1,33 @@ +package uploader + +import ( + "context" + "fmt" + "log" + "os" + "time" + + "github.com/UTDNebula/nebula-api/api/schema" +) + +const DEGREES_FILE string = "degrees.json" + +func UploadDegrees(inDir string) { + client := connectDBFunc() + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + // Open data file for reading + fptr, err := os.Open(fmt.Sprintf("%s/"+DEGREES_FILE, inDir)) + if err != nil { + if os.IsNotExist(err) { + log.Printf("File not found. Skipping %s", DEGREES_FILE) + return + } + log.Panic(err) + } + defer fptr.Close() + + UploadData[schema.AcademicProgram](client, ctx, fptr, true) +}