package main import ( "fmt" "path" "regexp" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" ) // An AlbumDescriptionGetter can be used to GET the description of an Album from www.bedetheque.com type AlbumDescriptionGetter struct { getter HTTPGetter } var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`) func linkFromSelection(s *goquery.Selection) Link { target, _ := s.Attr("href") return Link{ Title: strings.TrimSpace(s.Text()), Target: target, } } // Get fetches data from www.bedetheque.com and parses it to a func (g *AlbumDescriptionGetter) Get(a *Album) error { URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", a.ID)) resp, err := g.getter.Get("http://" + URL) if err != nil { return err } defer closeOrPanic(resp.Body, "GET:http://"+URL) doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return err } principal := doc.Find("div.bandeau-principal") errors := make(chan error) waitSize := 0 waitSize++ go func() { cover := principal.Find(".bandeau-image img.image_album") if cover.Size() != 1 { errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size()) return } src, ok := cover.Attr("src") if ok == false { errors <- fmt.Errorf("Could not find src attribute for cover") return } a.CoverURL = src errors <- nil }() waitSize++ go func() { description := principal.Find(".bandeau-info p.auto-height span") if description.Size() > 1 { errors <- fmt.Errorf("Could not find a single description, found %d", description.Size()) return } if description.Size() == 0 { errors <- nil return } a.Description = description.Text() errors <- nil }() waitSize++ go func() { note := principal.Find(".bandeau-info .etoiles p.message") if note.Size() != 1 { errors <- fmt.Errorf("Could not find a single note, found %d", note.Size()) return } m := noteRx.FindStringSubmatch(note.Text()) if m == nil { a.Note = -1.0 // we simply ignore because there is most likely no note errors <- nil //errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx) return } noteTmp, err := strconv.ParseFloat(m[1], 64) if err != nil { errors <- fmt.Errorf("Could not parse note in `%s`", m[0]) return } a.Note = noteTmp errors <- nil }() details := map[string][]*goquery.Selection{} previous := "" a.Links = map[string]string{} doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) { labelSelection := s.Find("label") if labelSelection.Size() != 1 { return } label := strings.TrimSpace(labelSelection.Text()) if len(label) == 0 { details[previous] = append(details[previous], s) } details[label] = []*goquery.Selection{s} previous = label }) waitSize++ go func() { sList, ok := details["Scénario :"] if ok == false { errors <- fmt.Errorf("Could not find Scenarist") return } for _, s := range sList { l := linkFromSelection(s.Find("a")) if len(l.Title) > 0 { a.Scenarists = append(a.Scenarists, l.Title) a.Links[l.Title] = l.Target } } errors <- nil }() waitSize++ go func() { sList, ok := details["Dessin :"] if ok == false { errors <- fmt.Errorf("Could not find Designe") return } for _, s := range sList { l := linkFromSelection(s.Find("a")) if len(l.Title) > 0 { a.Designers = append(a.Designers, l.Title) a.Links[l.Title] = l.Target } } errors <- nil }() waitSize++ go func() { sList, ok := details["Couleurs :"] if ok == false { errors <- fmt.Errorf("Could not find Colorist") return } for _, s := range sList { l := linkFromSelection(s.Find("a")) if len(l.Title) > 0 { a.Colorists = append(a.Colorists, l.Title) a.Links[l.Title] = l.Target } } errors <- nil }() errorList := make([]string, 0, waitSize) for i := 0; i < waitSize; i++ { err := <-errors if err != nil { errorList = append(errorList, err.Error()) } } if len(errorList) != 0 { return fmt.Errorf("Could not parse description from http://%s:%s", URL, strings.Join(append([]string{""}, errorList...), "\n * ")) } a.FetchDate = time.Now() return nil }