Implements URI guessing
This commit is contained in:
59
bd.go
59
bd.go
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -13,10 +14,15 @@ import (
|
|||||||
type AlbumState int
|
type AlbumState int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
// NEW is "État neuf" state
|
||||||
NEW AlbumState = iota // 0
|
NEW AlbumState = iota // 0
|
||||||
|
// MINT is "Très bon état" state
|
||||||
MINT // 1
|
MINT // 1
|
||||||
|
// GOOD is "Bon état" state
|
||||||
GOOD // 2
|
GOOD // 2
|
||||||
|
// AVERAGE is "État moyen" state
|
||||||
AVERAGE // 3
|
AVERAGE // 3
|
||||||
|
// BAD is "Mauvais état" state
|
||||||
BAD // 4
|
BAD // 4
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -40,13 +46,14 @@ type Album struct {
|
|||||||
|
|
||||||
LegalDeposit time.Time
|
LegalDeposit time.Time
|
||||||
PrintDate time.Time
|
PrintDate time.Time
|
||||||
|
PurchaseDate time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
var endDelim = regexp.MustCompile(` \(.*\)\z`)
|
var endDelim = regexp.MustCompile(` \(.*\)\z`)
|
||||||
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
|
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
|
||||||
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
|
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
|
||||||
|
|
||||||
func sanitizeTitleString(title string, removeEndParent bool) string {
|
func sanitizeTitleString(title string) string {
|
||||||
// first sanitize accuented characters.
|
// first sanitize accuented characters.
|
||||||
isOk := func(r rune) bool {
|
isOk := func(r rune) bool {
|
||||||
return r < 32 || r >= 127
|
return r < 32 || r >= 127
|
||||||
@@ -57,16 +64,52 @@ func sanitizeTitleString(title string, removeEndParent bool) string {
|
|||||||
// or io.Writer filter to automatically do such filtering when reading
|
// or io.Writer filter to automatically do such filtering when reading
|
||||||
// or writing data anywhere.
|
// or writing data anywhere.
|
||||||
title, _, _ = transform.String(t, title)
|
title, _, _ = transform.String(t, title)
|
||||||
//Now we remove all punctuation
|
//Now we remove all punctuat
|
||||||
if removeEndParent == true {
|
|
||||||
title = endDelim.ReplaceAllString(title, "")
|
|
||||||
}
|
|
||||||
|
|
||||||
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
|
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*Album) GetBedethequeComURI() string {
|
// GetBedethequeComURI tries to guess the URI used by bedetheque.com to reference an album, using reverse-engineered euristics
|
||||||
return ""
|
func (a *Album) GetBedethequeComURI() string {
|
||||||
|
// we check for determinant
|
||||||
|
matches := endDelim.FindString(a.Series)
|
||||||
|
series := a.Series
|
||||||
|
titleMatch := false
|
||||||
|
if len(matches) != 0 {
|
||||||
|
series = strings.TrimSuffix(series, matches)
|
||||||
|
det := strings.Trim(matches, " ()")
|
||||||
|
if det[len(det)-1] != '\'' {
|
||||||
|
det = det + " "
|
||||||
|
}
|
||||||
|
titleCompare := det + strings.ToLower(series[:1]) + series[1:]
|
||||||
|
titleMatch = (titleCompare == a.Title)
|
||||||
|
} else {
|
||||||
|
titleMatch = (a.Series == a.Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
series = sanitizeTitleString(series)
|
||||||
|
title := sanitizeTitleString(a.Title)
|
||||||
|
//first we test if we have a tome identifier
|
||||||
|
tomeIdent := ""
|
||||||
|
if a.Num < 0 {
|
||||||
|
tomeIdent = a.NumA
|
||||||
|
} else {
|
||||||
|
tomeIdent = fmt.Sprintf("Tome-%d%s", a.Num, a.NumA)
|
||||||
|
}
|
||||||
|
|
||||||
|
if titleMatch {
|
||||||
|
if len(tomeIdent) == 0 {
|
||||||
|
return fmt.Sprintf("BD-%s-%d.html", series, a.ID)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("BD-%s-%s-%d.html", series, tomeIdent, a.ID)
|
||||||
|
}
|
||||||
|
if len(tomeIdent) == 0 {
|
||||||
|
return fmt.Sprintf("BD-%s-%s-%d.html", series, title, a.ID)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("BD-%s-%s-%s-%d.html",
|
||||||
|
series,
|
||||||
|
tomeIdent,
|
||||||
|
title,
|
||||||
|
a.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// An AlbumDescription is a more complete BD description
|
// An AlbumDescription is a more complete BD description
|
||||||
|
|||||||
24
bd_test.go
24
bd_test.go
@@ -15,18 +15,18 @@ var _ = Suite(&AlbumSuite{})
|
|||||||
func (s *AlbumSuite) TestSanitazation(c *C) {
|
func (s *AlbumSuite) TestSanitazation(c *C) {
|
||||||
data := map[string]string{
|
data := map[string]string{
|
||||||
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
|
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
|
||||||
"Nef des fous (La)": "Nef-des-fous",
|
"Nef des fous": "Nef-des-fous",
|
||||||
"Oiseau noir (L')": "Oiseau-noir",
|
"Oiseau noir": "Oiseau-noir",
|
||||||
"Foo": "Foo",
|
"Foo": "Foo",
|
||||||
"Nuit de l'étoile (La)": "Nuit-de-l-etoile",
|
"Nuit de l'étoile": "Nuit-de-l-etoile",
|
||||||
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
|
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
|
||||||
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
|
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
|
||||||
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
|
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
|
||||||
"100.000 femmes (Les)": "100000-femmes",
|
"100.000 femmes": "100000-femmes",
|
||||||
}
|
}
|
||||||
|
|
||||||
for title, expected := range data {
|
for title, expected := range data {
|
||||||
san := sanitizeTitleString(title, true)
|
san := sanitizeTitleString(title)
|
||||||
c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
|
c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -66,7 +66,7 @@ func (s *AlbumSuite) TestBedecomURI(c *C) {
|
|||||||
NumA: "",
|
NumA: "",
|
||||||
},
|
},
|
||||||
"BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{
|
"BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{
|
||||||
ID: 1287,
|
ID: 1387,
|
||||||
ISBN: "2-84055-142-X",
|
ISBN: "2-84055-142-X",
|
||||||
Series: "Nef des fous (La)",
|
Series: "Nef des fous (La)",
|
||||||
Title: "Le petit Roy",
|
Title: "Le petit Roy",
|
||||||
@@ -113,10 +113,18 @@ func (s *AlbumSuite) TestBedecomURI(c *C) {
|
|||||||
Num: 1,
|
Num: 1,
|
||||||
NumA: "",
|
NumA: "",
|
||||||
},
|
},
|
||||||
|
"BD-Catalogues-Expositions-Brassens-ou-la-liberte-124218.html": Album{
|
||||||
|
ID: 124218,
|
||||||
|
ISBN: "9782205066975",
|
||||||
|
Series: "(Catalogues) Expositions",
|
||||||
|
Title: "Brassens ou la liberté",
|
||||||
|
Num: -1,
|
||||||
|
NumA: "",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for expectedUrl, album := range data {
|
for expectedURL, album := range data {
|
||||||
url := album.GetBedethequeComURI()
|
url := album.GetBedethequeComURI()
|
||||||
c.Check(url, Equals, expectedUrl)
|
c.Check(url, Equals, expectedURL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user