Implements URI guessing

This commit is contained in:
2016-01-14 15:07:45 +01:00
parent 21f6c21de4
commit f394ff0069
2 changed files with 74 additions and 23 deletions

59
bd.go
View File

@@ -1,6 +1,7 @@
package main package main
import ( import (
"fmt"
"regexp" "regexp"
"strings" "strings"
"time" "time"
@@ -13,10 +14,15 @@ import (
type AlbumState int type AlbumState int
const ( const (
// NEW is "État neuf" state
NEW AlbumState = iota // 0 NEW AlbumState = iota // 0
// MINT is "Très bon état" state
MINT // 1 MINT // 1
// GOOD is "Bon état" state
GOOD // 2 GOOD // 2
// AVERAGE is "État moyen" state
AVERAGE // 3 AVERAGE // 3
// BAD is "Mauvais état" state
BAD // 4 BAD // 4
) )
@@ -40,13 +46,14 @@ type Album struct {
LegalDeposit time.Time LegalDeposit time.Time
PrintDate time.Time PrintDate time.Time
PurchaseDate time.Time
} }
var endDelim = regexp.MustCompile(` \(.*\)\z`) var endDelim = regexp.MustCompile(` \(.*\)\z`)
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`) var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
var punctuation = regexp.MustCompile(`[!?\.:;,]`) var punctuation = regexp.MustCompile(`[!?\.:;,]`)
func sanitizeTitleString(title string, removeEndParent bool) string { func sanitizeTitleString(title string) string {
// first sanitize accuented characters. // first sanitize accuented characters.
isOk := func(r rune) bool { isOk := func(r rune) bool {
return r < 32 || r >= 127 return r < 32 || r >= 127
@@ -57,16 +64,52 @@ func sanitizeTitleString(title string, removeEndParent bool) string {
// or io.Writer filter to automatically do such filtering when reading // or io.Writer filter to automatically do such filtering when reading
// or writing data anywhere. // or writing data anywhere.
title, _, _ = transform.String(t, title) title, _, _ = transform.String(t, title)
//Now we remove all punctuation //Now we remove all punctuat
if removeEndParent == true {
title = endDelim.ReplaceAllString(title, "")
}
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-") return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
} }
func (*Album) GetBedethequeComURI() string { // GetBedethequeComURI tries to guess the URI used by bedetheque.com to reference an album, using reverse-engineered euristics
return "" func (a *Album) GetBedethequeComURI() string {
// we check for determinant
matches := endDelim.FindString(a.Series)
series := a.Series
titleMatch := false
if len(matches) != 0 {
series = strings.TrimSuffix(series, matches)
det := strings.Trim(matches, " ()")
if det[len(det)-1] != '\'' {
det = det + " "
}
titleCompare := det + strings.ToLower(series[:1]) + series[1:]
titleMatch = (titleCompare == a.Title)
} else {
titleMatch = (a.Series == a.Title)
}
series = sanitizeTitleString(series)
title := sanitizeTitleString(a.Title)
//first we test if we have a tome identifier
tomeIdent := ""
if a.Num < 0 {
tomeIdent = a.NumA
} else {
tomeIdent = fmt.Sprintf("Tome-%d%s", a.Num, a.NumA)
}
if titleMatch {
if len(tomeIdent) == 0 {
return fmt.Sprintf("BD-%s-%d.html", series, a.ID)
}
return fmt.Sprintf("BD-%s-%s-%d.html", series, tomeIdent, a.ID)
}
if len(tomeIdent) == 0 {
return fmt.Sprintf("BD-%s-%s-%d.html", series, title, a.ID)
}
return fmt.Sprintf("BD-%s-%s-%s-%d.html",
series,
tomeIdent,
title,
a.ID)
} }
// An AlbumDescription is a more complete BD description // An AlbumDescription is a more complete BD description

View File

@@ -15,18 +15,18 @@ var _ = Suite(&AlbumSuite{})
func (s *AlbumSuite) TestSanitazation(c *C) { func (s *AlbumSuite) TestSanitazation(c *C) {
data := map[string]string{ data := map[string]string{
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I", "Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
"Nef des fous (La)": "Nef-des-fous", "Nef des fous": "Nef-des-fous",
"Oiseau noir (L')": "Oiseau-noir", "Oiseau noir": "Oiseau-noir",
"Foo": "Foo", "Foo": "Foo",
"Nuit de l'étoile (La)": "Nuit-de-l-etoile", "Nuit de l'étoile": "Nuit-de-l-etoile",
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire", "Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes", "Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World", "Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
"100.000 femmes (Les)": "100000-femmes", "100.000 femmes": "100000-femmes",
} }
for title, expected := range data { for title, expected := range data {
san := sanitizeTitleString(title, true) san := sanitizeTitleString(title)
c.Check(san, Equals, expected, Commentf("Processing '%s'", title)) c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
} }
} }
@@ -66,7 +66,7 @@ func (s *AlbumSuite) TestBedecomURI(c *C) {
NumA: "", NumA: "",
}, },
"BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{ "BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{
ID: 1287, ID: 1387,
ISBN: "2-84055-142-X", ISBN: "2-84055-142-X",
Series: "Nef des fous (La)", Series: "Nef des fous (La)",
Title: "Le petit Roy", Title: "Le petit Roy",
@@ -113,10 +113,18 @@ func (s *AlbumSuite) TestBedecomURI(c *C) {
Num: 1, Num: 1,
NumA: "", NumA: "",
}, },
"BD-Catalogues-Expositions-Brassens-ou-la-liberte-124218.html": Album{
ID: 124218,
ISBN: "9782205066975",
Series: "(Catalogues) Expositions",
Title: "Brassens ou la liberté",
Num: -1,
NumA: "",
},
} }
for expectedUrl, album := range data { for expectedURL, album := range data {
url := album.GetBedethequeComURI() url := album.GetBedethequeComURI()
c.Check(url, Equals, expectedUrl) c.Check(url, Equals, expectedURL)
} }
} }