Files
satbd-explorer/album_description_getter.go

184 lines
4.1 KiB
Go

package main
import (
"fmt"
"path"
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// An AlbumDescriptionGetter can be used to GET the description of an Album from www.bedetheque.com
type AlbumDescriptionGetter struct {
getter HTTPGetter
}
var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`)
func linkFromSelection(s *goquery.Selection) Link {
target, _ := s.Attr("href")
return Link{
Title: strings.TrimSpace(s.Text()),
Target: target,
}
}
// Get fetches data from www.bedetheque.com and parses it to a
func (g *AlbumDescriptionGetter) Get(a *Album) error {
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", a.ID))
resp, err := g.getter.Get("http://" + URL)
if err != nil {
return err
}
defer closeOrPanic(resp.Body, "GET:http://"+URL)
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return err
}
principal := doc.Find("div.bandeau-principal")
errors := make(chan error)
waitSize := 0
waitSize++
go func() {
cover := principal.Find(".bandeau-image img.image_album")
if cover.Size() != 1 {
errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size())
return
}
src, ok := cover.Attr("src")
if ok == false {
errors <- fmt.Errorf("Could not find src attribute for cover")
return
}
a.CoverURL = src
errors <- nil
}()
waitSize++
go func() {
description := principal.Find(".bandeau-info p.auto-height span")
if description.Size() > 1 {
errors <- fmt.Errorf("Could not find a single description, found %d", description.Size())
return
}
if description.Size() == 0 {
errors <- nil
return
}
a.Description = description.Text()
errors <- nil
}()
waitSize++
go func() {
note := principal.Find(".bandeau-info .etoiles p.message")
if note.Size() != 1 {
errors <- fmt.Errorf("Could not find a single note, found %d", note.Size())
return
}
m := noteRx.FindStringSubmatch(note.Text())
if m == nil {
a.Note = -1.0
// we simply ignore because there is most likely no note
errors <- nil
//errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx)
return
}
noteTmp, err := strconv.ParseFloat(m[1], 64)
if err != nil {
errors <- fmt.Errorf("Could not parse note in `%s`", m[0])
return
}
a.Note = noteTmp
errors <- nil
}()
details := map[string][]*goquery.Selection{}
previous := ""
a.Links = map[string]string{}
doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) {
labelSelection := s.Find("label")
if labelSelection.Size() != 1 {
return
}
label := strings.TrimSpace(labelSelection.Text())
if len(label) == 0 {
details[previous] = append(details[previous], s)
}
details[label] = []*goquery.Selection{s}
previous = label
})
waitSize++
go func() {
sList, ok := details["Scénario :"]
if ok == false {
errors <- fmt.Errorf("Could not find Scenarist")
return
}
for _, s := range sList {
l := linkFromSelection(s.Find("a"))
if len(l.Title) > 0 {
a.Scenarists = append(a.Scenarists, l.Title)
a.Links[l.Title] = l.Target
}
}
errors <- nil
}()
waitSize++
go func() {
sList, ok := details["Dessin :"]
if ok == false {
errors <- fmt.Errorf("Could not find Designe")
return
}
for _, s := range sList {
l := linkFromSelection(s.Find("a"))
if len(l.Title) > 0 {
a.Designers = append(a.Designers, l.Title)
a.Links[l.Title] = l.Target
}
}
errors <- nil
}()
waitSize++
go func() {
sList, ok := details["Couleurs :"]
if ok == false {
errors <- fmt.Errorf("Could not find Colorist")
return
}
for _, s := range sList {
l := linkFromSelection(s.Find("a"))
if len(l.Title) > 0 {
a.Colorists = append(a.Colorists, l.Title)
a.Links[l.Title] = l.Target
}
}
errors <- nil
}()
errorList := make([]string, 0, waitSize)
for i := 0; i < waitSize; i++ {
err := <-errors
if err != nil {
errorList = append(errorList, err.Error())
}
}
if len(errorList) != 0 {
return fmt.Errorf("Could not parse description from http://%s:%s",
URL,
strings.Join(append([]string{""}, errorList...), "\n * "))
}
a.FetchDate = time.Now()
return nil
}