Implements cahcing of album description and cover

This commit is contained in:
2016-01-17 18:45:36 +01:00
parent 105961a466
commit b58b7e7e6a
3 changed files with 304 additions and 16 deletions

View File

@@ -115,22 +115,23 @@ func (a *Album) GetBedethequeComURI() string {
a.ID)
}
// A Link represent a link to a ressource
type Link struct {
// Title of the link
Title string
// Target of the link
Target string
}
// An AlbumDescription is a more complete BD description
//
// It holds data that can be fetched from bedetheque.com
type AlbumDescription struct {
Album *Album
HasCover bool
CoverExt string
Description string
Note float64
Scenarist string
Designer string
Colorist string
Cycle string
Format string
Pages int32
Created time.Time
Scenarist []Link
Designer []Link
Colorist []Link
}

View File

@@ -1,17 +1,271 @@
package main
import "fmt"
import (
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/nightlyone/lockfile"
)
// An AlbumDescriptionCache is used to fetch and cache AlbumDescription from www.bedetheque.com
type AlbumDescriptionCache struct {
basepath string
lock lockfile.Lockfile
// time to live of the cache, data which is older than this TTL will be automatically removed
TTL time.Duration
}
// NewAlbumDescriptionCache is creating a new album description at specified location
func NewAlbumDescriptionCache(filepath string) (*AlbumDescriptionCache, error) {
return nil, fmt.Errorf("Not yet implemented")
func NewAlbumDescriptionCache(path string) (*AlbumDescriptionCache, error) {
res := &AlbumDescriptionCache{
basepath: path,
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
}
var err error
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
if err != nil {
return nil, fmt.Errorf("DescriptionCache: could not create lock: %s", err)
}
err = os.MkdirAll(filepath.Join(res.basepath, "albums"), 0755)
if err != nil {
return nil, err
}
err = os.MkdirAll(filepath.Join(res.basepath, "covers"), 0755)
if err != nil {
return nil, err
}
return res, nil
}
func (c *AlbumDescriptionCache) unlockOrPanic() {
if err := c.lock.Unlock(); err != nil {
panic(fmt.Sprintf("Could not unlock '%s': %s", c.lock, err))
}
}
func (c *AlbumDescriptionCache) albumPath(ID AlbumID) string {
return filepath.Join(c.basepath, "albums", fmt.Sprintf("%d.json", ID))
}
// CoverPath gets the path of the cover in the cache
func (c *AlbumDescriptionCache) CoverPath(ID AlbumID, ext string) string {
return filepath.Join(c.basepath, "covers", fmt.Sprintf("%d%s", ID, ext))
}
var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`)
func linkFromSelection(s *goquery.Selection) Link {
target, _ := s.Attr("href")
return Link{
Title: strings.TrimSpace(s.Text()),
Target: target,
}
}
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
resp, err := http.Get("http://" + URL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
res := &AlbumDescription{}
principal := doc.Find("div.bandeau-principal")
errors := make(chan error)
waitSize := 0
waitSize++
go func() {
cover := principal.Find(".bandeau-image img.image_album")
if cover.Size() != 1 {
errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size())
return
}
src, ok := cover.Attr("src")
if ok == false {
errors <- fmt.Errorf("Could not find src attribute for cover")
return
}
resp, err := http.Get(src)
if err != nil {
errors <- err
return
}
defer resp.Body.Close()
savePath := c.CoverPath(ID, path.Ext(src))
f, err := os.Create(savePath)
if err != nil {
errors <- err
return
}
defer f.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
errors <- err
return
}
res.CoverExt = path.Ext(src)
errors <- nil
}()
waitSize++
go func() {
description := principal.Find(".bandeau-info p.auto-height span")
if description.Size() != 1 {
errors <- fmt.Errorf("Could not find a single description, found %d", description.Size())
return
}
res.Description = description.Text()
errors <- nil
}()
waitSize++
go func() {
note := principal.Find(".bandeau-info .etoiles p.message")
if note.Size() != 1 {
errors <- fmt.Errorf("Could not find a single note, found %d", note.Size())
return
}
m := noteRx.FindStringSubmatch(note.Text())
if m == nil {
errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx)
return
}
noteTmp, err := strconv.ParseFloat(m[1], 64)
if err != nil {
errors <- fmt.Errorf("Could not parse note in `%s`", m[0])
return
}
res.Note = noteTmp
errors <- nil
}()
details := map[string][]*goquery.Selection{}
previous := ""
doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) {
labelSelection := s.Find("label")
if labelSelection.Size() != 1 {
return
}
label := strings.TrimSpace(labelSelection.Text())
if len(label) == 0 {
details[previous] = append(details[previous], s)
}
details[label] = []*goquery.Selection{s}
previous = label
})
waitSize++
go func() {
sList, ok := details["Scénario :"]
if ok == false {
errors <- fmt.Errorf("Could not find Scenarist")
return
}
for _, s := range sList {
res.Scenarist = append(res.Scenarist, linkFromSelection(s.Find("a")))
}
errors <- nil
}()
waitSize++
go func() {
sList, ok := details["Dessin :"]
if ok == false {
errors <- fmt.Errorf("Could not find Designe")
return
}
for _, s := range sList {
res.Designer = append(res.Designer, linkFromSelection(s.Find("a")))
}
errors <- nil
}()
waitSize++
go func() {
sList, ok := details["Couleurs :"]
if ok == false {
errors <- fmt.Errorf("Could not find Colorist")
return
}
for _, s := range sList {
res.Colorist = append(res.Colorist, linkFromSelection(s.Find("a")))
}
errors <- nil
}()
errorList := make([]string, 0, waitSize)
for i := 0; i < waitSize; i++ {
err := <-errors
if err != nil {
errorList = append(errorList, err.Error())
}
}
if len(errorList) == 0 {
return res, nil
}
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
URL,
strings.Join(append([]string{""}, errorList...), "\n * "))
}
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album
func (c *AlbumDescriptionCache) GetDescription(ID AlbumID) (*AlbumDescription, error) {
return nil, fmt.Errorf("Not yet implemented")
// we should lock the cache while we are using it
if err := c.lock.TryLock(); err != nil {
return nil, fmt.Errorf("Could not lock %s: %s", c.lock, err)
}
defer c.unlockOrPanic()
info, err := os.Stat(c.albumPath(ID))
if err != nil {
if os.IsNotExist(err) == false {
return nil, err
}
return c.fetchAndCache(ID)
}
// check TTL
if info.ModTime().Before(time.Now().Add(-c.TTL)) == true {
return c.fetchAndCache(ID)
}
f, err := os.Open(c.albumPath(ID))
if err != nil {
return nil, err
}
defer f.Close()
dec := json.NewDecoder(f)
res := &AlbumDescription{}
err = dec.Decode(&res)
if err != nil {
return nil, err
}
return res, nil
}

View File

@@ -1,10 +1,43 @@
package main
import . "gopkg.in/check.v1"
import (
"io/ioutil"
"os"
. "gopkg.in/check.v1"
)
type AlbumDescriptionCacheSuite struct{}
var _ = Suite(&AlbumDescriptionCacheSuite{})
func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) {
data := map[AlbumID]AlbumDescription{
41693: AlbumDescription{
CoverExt: ".jpg",
Description: `Un couple Pennagolans - une race de vampire - s'est substitué depuis longtemps à une famille d'aristocrates japonais. Ils se font régulièrement livrer des proies humaines pour changer de corps, et ainsi survivre. Cependant leur dernier enlèvement n'est pas aussi discret que les précédents... Voilà les puissants vampires traqués par le redoutable Okko et ses deux compagnons !`,
Note: 4.2,
Scenarist: []Link{Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"}},
Designer: []Link{Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"}},
Colorist: []Link{
Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"},
Link{"Pelayo, Stephan", "http://www.bedetheque.com/auteur-9852-BD-Pelayo-Stephan.html"},
},
},
}
tmpdir, err := ioutil.TempDir("", "satbdexplorer-tests-cache")
c.Assert(err, IsNil)
defer os.RemoveAll(tmpdir)
cache, err := NewAlbumDescriptionCache(tmpdir)
c.Assert(err, IsNil)
for ID, expected := range data {
desc, err := cache.GetDescription(ID)
if c.Check(err, IsNil) && c.Check(desc, NotNil) == true {
c.Check(*desc, DeepEquals, expected)
}
}
}