Merges Album and AlbumDescription
This commit is contained in:
141
album.go
141
album.go
@@ -1,14 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
import "time"
|
||||
|
||||
// An AlbumState describe the state of an Album
|
||||
type AlbumState int
|
||||
@@ -29,109 +21,44 @@ const (
|
||||
BAD // 4
|
||||
)
|
||||
|
||||
// A Link represent a link to a ressource
|
||||
type Link struct {
|
||||
// Title of the link
|
||||
Title string `bl_name:"nom" bl_analyzer:"simple"`
|
||||
// Target of the link
|
||||
Target string `bl_name:"target" bl_index:"false" bl_include_all:"false"`
|
||||
}
|
||||
|
||||
// An Album is the core object in our system
|
||||
//
|
||||
// This is basically the data we store on bdgest.com, and that we want
|
||||
// in our system to be retrieve from
|
||||
type Album struct {
|
||||
ID AlbumID
|
||||
ISBN string
|
||||
Series string
|
||||
Title string
|
||||
Num int
|
||||
NumA string
|
||||
State AlbumState
|
||||
ID AlbumID `bl_name:"id" bl_index:"false" bl_include_all:"false"`
|
||||
ISBN string `bl_name:"isbn" bl_analyzer:"keyword" bl_include_all:"false"`
|
||||
Series string `bl_name:"série" bl_analyzer:"fr"`
|
||||
Title string `bl_name:"titre" bl_analyzer:"fr"`
|
||||
Num int `bl_name:"num" bl_index:"false" bl_include_all:"false"`
|
||||
NumA string `bl_name:"num_a" bl_index:"false" bl_include_all:"false"`
|
||||
State AlbumState `bl_name:"state" bl_index:"false" bl_include_all:"false"`
|
||||
|
||||
Author string
|
||||
Editor string
|
||||
Collection string
|
||||
SatID string
|
||||
Author string `bl_name:"auteur" bl_analyzer:"simple"`
|
||||
Editor string `bl_name:"editor" bl_analyzer:"simple"`
|
||||
Collection string `bl_name:"collection" bl_analyzer:"fr"`
|
||||
|
||||
LegalDeposit time.Time
|
||||
PrintDate time.Time
|
||||
PurchaseDate time.Time
|
||||
}
|
||||
|
||||
var endDelim = regexp.MustCompile(` \(.*\)\z`)
|
||||
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
|
||||
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
|
||||
|
||||
func sanitizeTitleString(title string) string {
|
||||
// first sanitize accuented characters.
|
||||
isOk := func(r rune) bool {
|
||||
return r < 32 || r >= 127
|
||||
}
|
||||
// The isOk filter is such that there is no need to chain to norm.NFC
|
||||
t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
|
||||
// This Transformer could also trivially be applied as an io.Reader
|
||||
// or io.Writer filter to automatically do such filtering when reading
|
||||
// or writing data anywhere.
|
||||
title, _, _ = transform.String(t, title)
|
||||
//Now we remove all punctuat
|
||||
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
|
||||
}
|
||||
|
||||
// GetBedethequeComURI tries to guess the URI used by bedetheque.com to reference an album, using reverse-engineered euristics
|
||||
func (a *Album) GetBedethequeComURI() string {
|
||||
// we check for determinant
|
||||
matches := endDelim.FindString(a.Series)
|
||||
series := a.Series
|
||||
titleMatch := false
|
||||
if len(matches) != 0 {
|
||||
series = strings.TrimSuffix(series, matches)
|
||||
det := strings.Trim(matches, " ()")
|
||||
if det[len(det)-1] != '\'' {
|
||||
det = det + " "
|
||||
}
|
||||
titleCompare := det + strings.ToLower(series[:1]) + series[1:]
|
||||
titleMatch = (titleCompare == a.Title)
|
||||
} else {
|
||||
titleMatch = (a.Series == a.Title)
|
||||
}
|
||||
|
||||
series = sanitizeTitleString(series)
|
||||
title := sanitizeTitleString(a.Title)
|
||||
//first we test if we have a tome identifier
|
||||
tomeIdent := ""
|
||||
if a.Num < 0 {
|
||||
tomeIdent = a.NumA
|
||||
} else {
|
||||
tomeIdent = fmt.Sprintf("Tome-%d%s", a.Num, a.NumA)
|
||||
}
|
||||
|
||||
if titleMatch {
|
||||
if len(tomeIdent) == 0 {
|
||||
return fmt.Sprintf("BD-%s-%d.html", series, a.ID)
|
||||
}
|
||||
return fmt.Sprintf("BD-%s-%s-%d.html", series, tomeIdent, a.ID)
|
||||
}
|
||||
if len(tomeIdent) == 0 {
|
||||
return fmt.Sprintf("BD-%s-%s-%d.html", series, title, a.ID)
|
||||
}
|
||||
return fmt.Sprintf("BD-%s-%s-%s-%d.html",
|
||||
series,
|
||||
tomeIdent,
|
||||
title,
|
||||
a.ID)
|
||||
}
|
||||
|
||||
// A Link represent a link to a ressource
|
||||
type Link struct {
|
||||
// Title of the link
|
||||
Title string
|
||||
// Target of the link
|
||||
Target string
|
||||
}
|
||||
|
||||
// An AlbumDescription is a more complete BD description
|
||||
//
|
||||
// It holds data that can be fetched from bedetheque.com
|
||||
type AlbumDescription struct {
|
||||
CoverExt string
|
||||
Description string
|
||||
Note float64
|
||||
|
||||
Scenarist []Link
|
||||
Designer []Link
|
||||
Colorist []Link
|
||||
SatID string `bl_name:"cote" bl_analyzer:"keyword"`
|
||||
|
||||
LegalDeposit time.Time `bl_name:"dl" bl_index:"false" bl_include_all:"false"`
|
||||
PrintDate time.Time `bl_name:"ai" bl_index:"false" bl_include_all:"false"`
|
||||
PurchaseDate time.Time `bl_name:"achat" bl_index:"false" bl_include_all:"false"`
|
||||
|
||||
CoverURL string `bl_name:"cover" bl_index:"false" bl_include_all:"false"`
|
||||
Description string `bl_name:"description" bl_analyzer:"fr"`
|
||||
Note float64 `bl_name:"note" bl_index:"false" bl_include_all:"false"`
|
||||
|
||||
Scenarist []Link `bl_name:"scenario" bl_analyzer:"simple"`
|
||||
Designer []Link `bl_name:"dessins" bl_analyzer:"simple"`
|
||||
Colorist []Link `bl_name:"couleurs" bl_analyzer:"simple"`
|
||||
|
||||
FetchDate time.Time `bl_name:"old" bl_index:"false" bl_include_all:"false"`
|
||||
}
|
||||
|
||||
113
album_cover_cache.go
Normal file
113
album_cover_cache.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// An AlbumCoverCache is used to fetch and cache Album Cover image from www.bedetheque.com
|
||||
type AlbumCoverCache struct {
|
||||
basepath string
|
||||
getter HTTPGetter
|
||||
// time to live of the cache, data which is older than this TTL will be automatically removed
|
||||
TTL time.Duration
|
||||
}
|
||||
|
||||
// NewAlbumCoverCache is creating a new cache at specified location on the fs
|
||||
func NewAlbumCoverCache(path string, maxRequest uint, window time.Duration) (*AlbumCoverCache, error) {
|
||||
res := &AlbumCoverCache{
|
||||
basepath: path,
|
||||
getter: NewRateLimitedGetter(maxRequest, window),
|
||||
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
||||
}
|
||||
|
||||
err := os.MkdirAll(filepath.Join(res.basepath, "covers"), 0755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// CoverPath gets the path of the cover in the cache
|
||||
func (c *AlbumCoverCache) coverPath(a *Album) string {
|
||||
return filepath.Join(c.basepath, "covers", fmt.Sprintf("%d%s", a.ID, path.Ext(a.CoverURL)))
|
||||
}
|
||||
|
||||
type teeReaderCloser struct {
|
||||
r io.Reader
|
||||
cr, cw io.Closer
|
||||
}
|
||||
|
||||
func (t *teeReaderCloser) Read(p []byte) (int, error) {
|
||||
return t.r.Read(p)
|
||||
}
|
||||
|
||||
func (t *teeReaderCloser) Close() error {
|
||||
err1 := t.cr.Close()
|
||||
err2 := t.cw.Close()
|
||||
if err1 == nil && err2 == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err1 == nil {
|
||||
return err2
|
||||
}
|
||||
|
||||
if err2 == nil {
|
||||
return err1
|
||||
}
|
||||
|
||||
return fmt.Errorf("%s;%s", err1, err2)
|
||||
}
|
||||
|
||||
// NewTeeReadCloser creates a new ReadCloser that writes to w the byte it reads from r
|
||||
func NewTeeReadCloser(r io.ReadCloser, w io.WriteCloser) io.ReadCloser {
|
||||
return &teeReaderCloser{
|
||||
r: io.TeeReader(r, w),
|
||||
cr: r,
|
||||
cw: w,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *AlbumCoverCache) fetch(a *Album) (io.ReadCloser, error) {
|
||||
resp, err := c.getter.Get(a.CoverURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f, err := os.Create(c.coverPath(a))
|
||||
if err != nil {
|
||||
resp.Body.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewTeeReadCloser(resp.Body, f), nil
|
||||
}
|
||||
|
||||
// GetCover retrieves from the cache or either from www.bedetheque.com the Cover of an album
|
||||
func (c *AlbumCoverCache) GetCover(a *Album) (io.ReadCloser, error) {
|
||||
// we should lock the cache while we are using it
|
||||
info, err := os.Stat(c.coverPath(a))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) == false {
|
||||
return nil, err
|
||||
}
|
||||
return c.fetch(a)
|
||||
}
|
||||
|
||||
// check TTL
|
||||
if info.ModTime().Before(time.Now().Add(-c.TTL)) == true {
|
||||
return c.fetch(a)
|
||||
}
|
||||
|
||||
f, err := os.Open(c.coverPath(a))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
83
album_cover_cache_test.go
Normal file
83
album_cover_cache_test.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
type AlbumCoverCacheSuite struct{}
|
||||
|
||||
var _ = Suite(&AlbumCoverCacheSuite{})
|
||||
|
||||
// a simple HTTPGetter that will never GET anything
|
||||
type errorGetter struct{}
|
||||
|
||||
func (g *errorGetter) Get(URL string) (*http.Response, error) {
|
||||
return nil, fmt.Errorf("I will always have an error")
|
||||
}
|
||||
|
||||
func (s *AlbumCoverCacheSuite) TestCanFetchCache(c *C) {
|
||||
data := []*Album{
|
||||
&Album{
|
||||
ID: 41693,
|
||||
CoverURL: "http://www.bedetheque.com/media/Couvertures/Couv_41693.jpg",
|
||||
},
|
||||
&Album{
|
||||
ID: 1285,
|
||||
CoverURL: "http://www.bedetheque.com/media/Couvertures/OumpahPahLepeauxrouge.jpg",
|
||||
},
|
||||
}
|
||||
|
||||
tmpdir, err := ioutil.TempDir("", "satbdexplorer-tests-cache")
|
||||
c.Assert(err, IsNil)
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
cache, err := NewAlbumCoverCache(tmpdir, 10, 10*time.Second)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
var resData = []bytes.Buffer{}
|
||||
|
||||
for _, a := range data {
|
||||
cover, err := cache.GetCover(a)
|
||||
var buf bytes.Buffer
|
||||
if c.Check(err, IsNil) == true {
|
||||
_, err := io.Copy(&buf, cover)
|
||||
c.Check(err, IsNil)
|
||||
c.Check(cover.Close(), IsNil)
|
||||
}
|
||||
resData = append(resData, buf)
|
||||
}
|
||||
|
||||
cache.getter = &errorGetter{}
|
||||
|
||||
// now we check that we get it again, but from the disk, not
|
||||
// hitting the web
|
||||
for i, a := range data {
|
||||
cover, err := cache.GetCover(a)
|
||||
var buf bytes.Buffer
|
||||
if c.Check(err, IsNil) == true {
|
||||
_, err := io.Copy(&buf, cover)
|
||||
if c.Check(err, IsNil) == true {
|
||||
c.Check(buf.Bytes(), DeepEquals, resData[i].Bytes())
|
||||
}
|
||||
c.Check(cover.Close(), IsNil)
|
||||
}
|
||||
}
|
||||
|
||||
// now if we it the TTL, we will reftech and get error
|
||||
cache.TTL = 0
|
||||
|
||||
for _, a := range data {
|
||||
cover, err := cache.GetCover(a)
|
||||
c.Check(cover, IsNil)
|
||||
c.Check(err, ErrorMatches, "I will always have an error")
|
||||
}
|
||||
|
||||
}
|
||||
@@ -203,7 +203,7 @@ func (s *AlbumCsvReaderSuite) TestCanReadCsv(c *C) {
|
||||
for _, e := range expected {
|
||||
res, err := r.Read()
|
||||
if c.Check(err, IsNil) == true {
|
||||
c.Check(*res, Equals, e, Commentf("expected: %+v actual: %+v", e, res))
|
||||
c.Check(*res, DeepEquals, e, Commentf("expected: %+v actual: %+v", e, res))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,284 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/nightlyone/lockfile"
|
||||
)
|
||||
|
||||
// An AlbumDescriptionCache is used to fetch and cache AlbumDescription from www.bedetheque.com
|
||||
type AlbumDescriptionCache struct {
|
||||
basepath string
|
||||
lock lockfile.Lockfile
|
||||
getter HTTPGetter
|
||||
// time to live of the cache, data which is older than this TTL will be automatically removed
|
||||
TTL time.Duration
|
||||
}
|
||||
|
||||
// NewAlbumDescriptionCache is creating a new album description at specified location
|
||||
func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) {
|
||||
res := &AlbumDescriptionCache{
|
||||
basepath: path,
|
||||
getter: NewRateLimitedGetter(maxRequest, window),
|
||||
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
||||
}
|
||||
var err error
|
||||
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("DescriptionCache: could not create lock: %s", err)
|
||||
}
|
||||
|
||||
err = os.MkdirAll(filepath.Join(res.basepath, "albums"), 0755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = os.MkdirAll(filepath.Join(res.basepath, "covers"), 0755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *AlbumDescriptionCache) unlockOrPanic() {
|
||||
if err := c.lock.Unlock(); err != nil {
|
||||
panic(fmt.Sprintf("Could not unlock '%s': %s", c.lock, err))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *AlbumDescriptionCache) albumPath(ID AlbumID) string {
|
||||
return filepath.Join(c.basepath, "albums", fmt.Sprintf("%d.json", ID))
|
||||
}
|
||||
|
||||
// CoverPath gets the path of the cover in the cache
|
||||
func (c *AlbumDescriptionCache) CoverPath(ID AlbumID, ext string) string {
|
||||
return filepath.Join(c.basepath, "covers", fmt.Sprintf("%d%s", ID, ext))
|
||||
}
|
||||
|
||||
var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`)
|
||||
|
||||
func linkFromSelection(s *goquery.Selection) Link {
|
||||
target, _ := s.Attr("href")
|
||||
return Link{
|
||||
Title: strings.TrimSpace(s.Text()),
|
||||
Target: target,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
|
||||
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
|
||||
resp, err := c.getter.Get("http://" + URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res := &AlbumDescription{}
|
||||
principal := doc.Find("div.bandeau-principal")
|
||||
errors := make(chan error)
|
||||
waitSize := 0
|
||||
waitSize++
|
||||
go func() {
|
||||
cover := principal.Find(".bandeau-image img.image_album")
|
||||
if cover.Size() != 1 {
|
||||
errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size())
|
||||
return
|
||||
}
|
||||
|
||||
src, ok := cover.Attr("src")
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find src attribute for cover")
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := c.getter.Get(src)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
return
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
savePath := c.CoverPath(ID, path.Ext(src))
|
||||
|
||||
f, err := os.Create(savePath)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
_, err = io.Copy(f, resp.Body)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
return
|
||||
}
|
||||
res.CoverExt = path.Ext(src)
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
description := principal.Find(".bandeau-info p.auto-height span")
|
||||
if description.Size() > 1 {
|
||||
errors <- fmt.Errorf("Could not find a single description, found %d", description.Size())
|
||||
return
|
||||
}
|
||||
if description.Size() == 0 {
|
||||
errors <- nil
|
||||
return
|
||||
}
|
||||
res.Description = description.Text()
|
||||
errors <- nil
|
||||
}()
|
||||
waitSize++
|
||||
go func() {
|
||||
note := principal.Find(".bandeau-info .etoiles p.message")
|
||||
if note.Size() != 1 {
|
||||
errors <- fmt.Errorf("Could not find a single note, found %d", note.Size())
|
||||
return
|
||||
}
|
||||
m := noteRx.FindStringSubmatch(note.Text())
|
||||
if m == nil {
|
||||
res.Note = -1.0
|
||||
// we simply ignore because there is most likely no note
|
||||
errors <- nil
|
||||
//errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx)
|
||||
return
|
||||
}
|
||||
noteTmp, err := strconv.ParseFloat(m[1], 64)
|
||||
if err != nil {
|
||||
errors <- fmt.Errorf("Could not parse note in `%s`", m[0])
|
||||
return
|
||||
}
|
||||
res.Note = noteTmp
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
details := map[string][]*goquery.Selection{}
|
||||
previous := ""
|
||||
doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) {
|
||||
labelSelection := s.Find("label")
|
||||
if labelSelection.Size() != 1 {
|
||||
return
|
||||
}
|
||||
label := strings.TrimSpace(labelSelection.Text())
|
||||
if len(label) == 0 {
|
||||
details[previous] = append(details[previous], s)
|
||||
}
|
||||
details[label] = []*goquery.Selection{s}
|
||||
previous = label
|
||||
})
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Scénario :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Scenarist")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
res.Scenarist = append(res.Scenarist, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Dessin :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Designe")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
res.Designer = append(res.Designer, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Couleurs :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Colorist")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
res.Colorist = append(res.Colorist, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
errorList := make([]string, 0, waitSize)
|
||||
for i := 0; i < waitSize; i++ {
|
||||
err := <-errors
|
||||
if err != nil {
|
||||
errorList = append(errorList, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if len(errorList) != 0 {
|
||||
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
|
||||
URL,
|
||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||
}
|
||||
|
||||
f, err := os.Create(c.albumPath(ID))
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
enc := json.NewEncoder(f)
|
||||
return res, enc.Encode(res)
|
||||
}
|
||||
|
||||
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album
|
||||
func (c *AlbumDescriptionCache) GetDescription(ID AlbumID) (*AlbumDescription, error) {
|
||||
// we should lock the cache while we are using it
|
||||
if err := c.lock.TryLock(); err != nil {
|
||||
return nil, fmt.Errorf("Could not lock %s: %s", c.lock, err)
|
||||
}
|
||||
defer c.unlockOrPanic()
|
||||
|
||||
info, err := os.Stat(c.albumPath(ID))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) == false {
|
||||
return nil, err
|
||||
}
|
||||
return c.fetchAndCache(ID)
|
||||
}
|
||||
|
||||
// check TTL
|
||||
if info.ModTime().Before(time.Now().Add(-c.TTL)) == true {
|
||||
return c.fetchAndCache(ID)
|
||||
}
|
||||
|
||||
f, err := os.Open(c.albumPath(ID))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
dec := json.NewDecoder(f)
|
||||
res := &AlbumDescription{}
|
||||
err = dec.Decode(&res)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
type AlbumDescriptionCacheSuite struct{}
|
||||
|
||||
var _ = Suite(&AlbumDescriptionCacheSuite{})
|
||||
|
||||
func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) {
|
||||
data := map[AlbumID]AlbumDescription{
|
||||
41693: AlbumDescription{
|
||||
CoverExt: ".jpg",
|
||||
Description: `Un couple Pennagolans - une race de vampire - s'est substitué depuis longtemps à une famille d'aristocrates japonais. Ils se font régulièrement livrer des proies humaines pour changer de corps, et ainsi survivre. Cependant leur dernier enlèvement n'est pas aussi discret que les précédents... Voilà les puissants vampires traqués par le redoutable Okko et ses deux compagnons !`,
|
||||
Note: 4.2,
|
||||
Scenarist: []Link{Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"}},
|
||||
Designer: []Link{Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"}},
|
||||
Colorist: []Link{
|
||||
Link{"Hub", "http://www.bedetheque.com/auteur-9851-BD-Hub.html"},
|
||||
Link{"Pelayo, Stephan", "http://www.bedetheque.com/auteur-9852-BD-Pelayo-Stephan.html"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tmpdir, err := ioutil.TempDir("", "satbdexplorer-tests-cache")
|
||||
c.Assert(err, IsNil)
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
cache, err := NewAlbumDescriptionCache(tmpdir, 10, 10*time.Second)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
for ID, expected := range data {
|
||||
desc, err := cache.GetDescription(ID)
|
||||
if c.Check(err, IsNil) && c.Check(desc, NotNil) == true {
|
||||
c.Check(*desc, DeepEquals, expected)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
168
album_description_getter.go
Normal file
168
album_description_getter.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// An AlbumDescriptionGetter can be used to GET the description of an Album from www.bedetheque.com
|
||||
type AlbumDescriptionGetter struct {
|
||||
getter HTTPGetter
|
||||
}
|
||||
|
||||
var noteRx = regexp.MustCompile(`Note:\s*([0-9\.]+)\s*/\s*[0-9\.]+`)
|
||||
|
||||
func linkFromSelection(s *goquery.Selection) Link {
|
||||
target, _ := s.Attr("href")
|
||||
return Link{
|
||||
Title: strings.TrimSpace(s.Text()),
|
||||
Target: target,
|
||||
}
|
||||
}
|
||||
|
||||
// Get fetches data from www.bedetheque.com and parses it to a
|
||||
func (g *AlbumDescriptionGetter) Get(a *Album) error {
|
||||
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", a.ID))
|
||||
resp, err := g.getter.Get("http://" + URL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
principal := doc.Find("div.bandeau-principal")
|
||||
errors := make(chan error)
|
||||
waitSize := 0
|
||||
waitSize++
|
||||
go func() {
|
||||
cover := principal.Find(".bandeau-image img.image_album")
|
||||
if cover.Size() != 1 {
|
||||
errors <- fmt.Errorf("could not find a single cover, found (%d)", cover.Size())
|
||||
return
|
||||
}
|
||||
|
||||
src, ok := cover.Attr("src")
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find src attribute for cover")
|
||||
return
|
||||
}
|
||||
|
||||
a.CoverURL = path.Ext(src)
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
description := principal.Find(".bandeau-info p.auto-height span")
|
||||
if description.Size() > 1 {
|
||||
errors <- fmt.Errorf("Could not find a single description, found %d", description.Size())
|
||||
return
|
||||
}
|
||||
if description.Size() == 0 {
|
||||
errors <- nil
|
||||
return
|
||||
}
|
||||
a.Description = description.Text()
|
||||
errors <- nil
|
||||
}()
|
||||
waitSize++
|
||||
go func() {
|
||||
note := principal.Find(".bandeau-info .etoiles p.message")
|
||||
if note.Size() != 1 {
|
||||
errors <- fmt.Errorf("Could not find a single note, found %d", note.Size())
|
||||
return
|
||||
}
|
||||
m := noteRx.FindStringSubmatch(note.Text())
|
||||
if m == nil {
|
||||
a.Note = -1.0
|
||||
// we simply ignore because there is most likely no note
|
||||
errors <- nil
|
||||
//errors <- fmt.Errorf("Could not parse note with regexp %s", noteRx)
|
||||
return
|
||||
}
|
||||
noteTmp, err := strconv.ParseFloat(m[1], 64)
|
||||
if err != nil {
|
||||
errors <- fmt.Errorf("Could not parse note in `%s`", m[0])
|
||||
return
|
||||
}
|
||||
a.Note = noteTmp
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
details := map[string][]*goquery.Selection{}
|
||||
previous := ""
|
||||
doc.Find("div.detail-album ul.infos-albums li").Each(func(i int, s *goquery.Selection) {
|
||||
labelSelection := s.Find("label")
|
||||
if labelSelection.Size() != 1 {
|
||||
return
|
||||
}
|
||||
label := strings.TrimSpace(labelSelection.Text())
|
||||
if len(label) == 0 {
|
||||
details[previous] = append(details[previous], s)
|
||||
}
|
||||
details[label] = []*goquery.Selection{s}
|
||||
previous = label
|
||||
})
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Scénario :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Scenarist")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
a.Scenarist = append(a.Scenarist, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Dessin :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Designe")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
a.Designer = append(a.Designer, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
waitSize++
|
||||
go func() {
|
||||
sList, ok := details["Couleurs :"]
|
||||
if ok == false {
|
||||
errors <- fmt.Errorf("Could not find Colorist")
|
||||
return
|
||||
}
|
||||
for _, s := range sList {
|
||||
a.Colorist = append(a.Colorist, linkFromSelection(s.Find("a")))
|
||||
}
|
||||
errors <- nil
|
||||
}()
|
||||
|
||||
errorList := make([]string, 0, waitSize)
|
||||
for i := 0; i < waitSize; i++ {
|
||||
err := <-errors
|
||||
if err != nil {
|
||||
errorList = append(errorList, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if len(errorList) != 0 {
|
||||
return fmt.Errorf("Could not parse description from http://%s:%s",
|
||||
URL,
|
||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
117
album_test.go
117
album_test.go
@@ -11,120 +11,3 @@ func Test(t *testing.T) { TestingT(t) }
|
||||
type AlbumSuite struct{}
|
||||
|
||||
var _ = Suite(&AlbumSuite{})
|
||||
|
||||
func (s *AlbumSuite) TestSanitazation(c *C) {
|
||||
data := map[string]string{
|
||||
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
|
||||
"Nef des fous": "Nef-des-fous",
|
||||
"Oiseau noir": "Oiseau-noir",
|
||||
"Foo": "Foo",
|
||||
"Nuit de l'étoile": "Nuit-de-l-etoile",
|
||||
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
|
||||
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
|
||||
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
|
||||
"100.000 femmes": "100000-femmes",
|
||||
}
|
||||
|
||||
for title, expected := range data {
|
||||
san := sanitizeTitleString(title)
|
||||
c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *AlbumSuite) TestBedecomURI(c *C) {
|
||||
data := map[string]Album{
|
||||
"BD-Okko-Tome-1-Le-cycle-de-l-eau-I-41693.html": Album{
|
||||
ID: 41693,
|
||||
ISBN: "2-84789-164-1",
|
||||
Series: "Okko",
|
||||
Title: "Le cycle de l'eau - I",
|
||||
Num: 1,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Nef-des-fous-Tome-3-Turbulences-950.html": Album{
|
||||
ID: 950,
|
||||
ISBN: "2840551292",
|
||||
Series: "Nef des fous (La)",
|
||||
Title: "Turbulences",
|
||||
Num: 3,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Oiseau-noir-21819.html": Album{
|
||||
ID: 21819,
|
||||
ISBN: "2-8001-1949-7",
|
||||
Series: "Oiseau noir (L')",
|
||||
Title: "L'oiseau noir",
|
||||
Num: -1,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Nuit-de-l-etoile-8103.html": Album{
|
||||
ID: 8103,
|
||||
ISBN: "2-905035-25-0",
|
||||
Series: "Nuit de l'étoile (La)",
|
||||
Title: "La nuit de l'étoile",
|
||||
Num: -1,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{
|
||||
ID: 1387,
|
||||
ISBN: "2-84055-142-X",
|
||||
Series: "Nef des fous (La)",
|
||||
Title: "Le petit Roy",
|
||||
Num: -1,
|
||||
NumA: "HS03",
|
||||
},
|
||||
"BD-Maus-Tome-1-Mon-Pere-saigne-l-Histoire-17248.html": Album{
|
||||
ID: 17248,
|
||||
ISBN: "2080660292",
|
||||
Series: "Maus",
|
||||
Title: "Mon Père saigne l'Histoire",
|
||||
Num: 1,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Iriacynthe-Tome-1a-11975.html": Album{
|
||||
ID: 11975,
|
||||
ISBN: "2-203-33844-X",
|
||||
Series: "Iriacynthe",
|
||||
Title: "Iriacynthe",
|
||||
Num: 1,
|
||||
NumA: "a",
|
||||
},
|
||||
"BD-Jack-Palmer-Tome-4-Les-disparus-d-apostrophes-7023.html": Album{
|
||||
ID: 7023,
|
||||
ISBN: "2205022350",
|
||||
Series: "Jack Palmer",
|
||||
Title: "Les disparus d'apostrophes !",
|
||||
Num: 4,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Eden-It-s-an-Endless-World-Tome-4-Vengeance-19912.html": Album{
|
||||
ID: 19912,
|
||||
ISBN: "2845380429",
|
||||
Series: "Eden - It's an Endless World!",
|
||||
Title: "Vengeance",
|
||||
Num: 4,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-100000-femmes-Tome-1-100000-femmes-les-175515.html": Album{
|
||||
ID: 175515,
|
||||
ISBN: "978-2-917456-30-9",
|
||||
Series: "100.000 femmes (Les)",
|
||||
Title: "100.000 femmes (les)",
|
||||
Num: 1,
|
||||
NumA: "",
|
||||
},
|
||||
"BD-Catalogues-Expositions-Brassens-ou-la-liberte-124218.html": Album{
|
||||
ID: 124218,
|
||||
ISBN: "9782205066975",
|
||||
Series: "(Catalogues) Expositions",
|
||||
Title: "Brassens ou la liberté",
|
||||
Num: -1,
|
||||
NumA: "",
|
||||
},
|
||||
}
|
||||
|
||||
for expectedURL, album := range data {
|
||||
url := album.GetBedethequeComURI()
|
||||
c.Check(url, Equals, expectedURL)
|
||||
}
|
||||
}
|
||||
|
||||
5
main.go
5
main.go
@@ -26,10 +26,11 @@ func Execute() error {
|
||||
return err
|
||||
}
|
||||
cacheBase = filepath.Dir(cacheBase)
|
||||
c, err := NewAlbumDescriptionCache(cacheBase, 10, 10*time.Second)
|
||||
c, err := NewAlbumCoverCache(cacheBase, 10, 10*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
g := AlbumDescriptionGetter{getter: c.getter}
|
||||
start := time.Now()
|
||||
go func() {
|
||||
defer close(albums)
|
||||
@@ -66,7 +67,7 @@ func Execute() error {
|
||||
if ok == false {
|
||||
break
|
||||
}
|
||||
_, err := c.GetDescription(a.ID)
|
||||
err := g.Get(a)
|
||||
cached <- true
|
||||
if err != nil {
|
||||
errors <- err
|
||||
|
||||
Reference in New Issue
Block a user