package main import ( "encoding/json" "fmt" "io" "os" "path" "path/filepath" "regexp" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/nightlyone/lockfile" ) // An AlbumDescriptionCache is used to fetch and cache AlbumDescription from www.bedetheque.com type AlbumDescriptionCache struct { basepath string lock lockfile.Lockfile getter HTTPGetter // time to live of the cache, data which is older than this TTL will be automatically removed TTL time.Duration } // NewAlbumDescriptionCache is creating a new album description at specified location func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) { res := &AlbumDescriptionCache{ basepath: path, getter: NewRateLimitedGetter(maxRequest, window), TTL: 3 * 31 * 24 * time.Hour, // 3 Months } var err error res.lock, err = lockfile.New(filepath.Join(path, "global.lock")) if err != nil { return nil, fmt.Errorf("DescriptionCache: could not create lock: %s", err) } err = os.MkdirAll(filepath.Join(res.basepath, "albums"), 0755) if err != nil { return nil, err } err = os.MkdirAll(filepath.Join(res.basepath, "covers"), 0755) if err != nil { return nil, err } return res, nil } func (c *AlbumDescriptionCache) unlockOrPanic() { if err := c.lock.Unlock(); err != nil { panic(fmt.Sprintf("Could not unlock '%s': %s", c.lock, err)) } } func (c *AlbumDescriptionCache) albumPath(ID AlbumID) string { return filepath.Join(c.basepath, "albums", fmt.Sprintf("%d.json", ID)) } // CoverPath gets the path of the cover in the cache func (c *AlbumDescriptionCache) CoverPath(ID AlbumID, ext string) string { return filepath.Join(c.basepath, "covers", fmt.Sprintf("%d%s", ID, ext)) } var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`) func linkFromSelection(s *goquery.Selection) Link { target, _ := s.Attr("href") return Link{ Title: strings.TrimSpace(s.Text()), Target: target, } } func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) { URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID)) resp, err := c.getter.Get("http://" + URL) if err != nil { return nil, err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, err } res := &AlbumDescription{} principal := doc.Find("div.bandeau-principal") errors := make(chan error) waitSize := 0 waitSize++ go func() { cover := principal.Find(".bandeau-image img.image_album") if cover.Size() != 1 { errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size()) return } src, ok := cover.Attr("src") if ok == false { errors <- fmt.Errorf("Could not find src attribute for cover") return } resp, err := c.getter.Get(src) if err != nil { errors <- err return } defer resp.Body.Close() savePath := c.CoverPath(ID, path.Ext(src)) f, err := os.Create(savePath) if err != nil { errors <- err return } defer f.Close() _, err = io.Copy(f, resp.Body) if err != nil { errors <- err return } res.CoverExt = path.Ext(src) errors <- nil }() waitSize++ go func() { description := principal.Find(".bandeau-info p.auto-height span") if description.Size() > 1 { errors <- fmt.Errorf("Could not find a single description, found %d", description.Size()) return } if description.Size() == 0 { errors <- nil return } res.Description = description.Text() errors <- nil }() waitSize++ go func() { note := principal.Find(".bandeau-info .etoiles p.message") if note.Size() != 1 { errors <- fmt.Errorf("Could not find a single note, found %d", note.Size()) return } m := noteRx.FindStringSubmatch(note.Text()) if m == nil { res.Note = -1.0 // we simply ignore because there is most likely no note errors <- nil //errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx) return } noteTmp, err := strconv.ParseFloat(m[1], 64) if err != nil { errors <- fmt.Errorf("Could not parse note in `%s`", m[0]) return } res.Note = noteTmp errors <- nil }() details := map[string][]*goquery.Selection{} previous := "" doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) { labelSelection := s.Find("label") if labelSelection.Size() != 1 { return } label := strings.TrimSpace(labelSelection.Text()) if len(label) == 0 { details[previous] = append(details[previous], s) } details[label] = []*goquery.Selection{s} previous = label }) waitSize++ go func() { sList, ok := details["Scénario :"] if ok == false { errors <- fmt.Errorf("Could not find Scenarist") return } for _, s := range sList { res.Scenarist = append(res.Scenarist, linkFromSelection(s.Find("a"))) } errors <- nil }() waitSize++ go func() { sList, ok := details["Dessin :"] if ok == false { errors <- fmt.Errorf("Could not find Designe") return } for _, s := range sList { res.Designer = append(res.Designer, linkFromSelection(s.Find("a"))) } errors <- nil }() waitSize++ go func() { sList, ok := details["Couleurs :"] if ok == false { errors <- fmt.Errorf("Could not find Colorist") return } for _, s := range sList { res.Colorist = append(res.Colorist, linkFromSelection(s.Find("a"))) } errors <- nil }() errorList := make([]string, 0, waitSize) for i := 0; i < waitSize; i++ { err := <-errors if err != nil { errorList = append(errorList, err.Error()) } } if len(errorList) != 0 { return nil, fmt.Errorf("Could not parse description from http://%s:%s", URL, strings.Join(append([]string{""}, errorList...), "\n * ")) } f, err := os.Create(c.albumPath(ID)) if err != nil { return res, err } defer f.Close() enc := json.NewEncoder(f) return res, enc.Encode(res) } // GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album func (c *AlbumDescriptionCache) GetDescription(ID AlbumID) (*AlbumDescription, error) { // we should lock the cache while we are using it if err := c.lock.TryLock(); err != nil { return nil, fmt.Errorf("Could not lock %s: %s", c.lock, err) } defer c.unlockOrPanic() info, err := os.Stat(c.albumPath(ID)) if err != nil { if os.IsNotExist(err) == false { return nil, err } return c.fetchAndCache(ID) } // check TTL if info.ModTime().Before(time.Now().Add(-c.TTL)) == true { return c.fetchAndCache(ID) } f, err := os.Open(c.albumPath(ID)) if err != nil { return nil, err } defer f.Close() dec := json.NewDecoder(f) res := &AlbumDescription{} err = dec.Decode(&res) if err != nil { return nil, err } return res, nil }