From f394ff006964efeda75a7a49e8560b84618a5f0f Mon Sep 17 00:00:00 2001 From: Alexandre Tuleu Date: Thu, 14 Jan 2016 15:07:45 +0100 Subject: [PATCH] Implements URI guessing --- bd.go | 69 ++++++++++++++++++++++++++++++++++++++++++++---------- bd_test.go | 28 ++++++++++++++-------- 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/bd.go b/bd.go index 14e8ee7..b1b6847 100644 --- a/bd.go +++ b/bd.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "regexp" "strings" "time" @@ -13,11 +14,16 @@ import ( type AlbumState int const ( - NEW AlbumState = iota // 0 - MINT // 1 - GOOD // 2 - AVERAGE // 3 - BAD // 4 + // NEW is "État neuf" state + NEW AlbumState = iota // 0 + // MINT is "Très bon état" state + MINT // 1 + // GOOD is "Bon état" state + GOOD // 2 + // AVERAGE is "État moyen" state + AVERAGE // 3 + // BAD is "Mauvais état" state + BAD // 4 ) // An Album is the core object in our system @@ -40,13 +46,14 @@ type Album struct { LegalDeposit time.Time PrintDate time.Time + PurchaseDate time.Time } var endDelim = regexp.MustCompile(` \(.*\)\z`) var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`) var punctuation = regexp.MustCompile(`[!?\.:;,]`) -func sanitizeTitleString(title string, removeEndParent bool) string { +func sanitizeTitleString(title string) string { // first sanitize accuented characters. isOk := func(r rune) bool { return r < 32 || r >= 127 @@ -57,16 +64,52 @@ func sanitizeTitleString(title string, removeEndParent bool) string { // or io.Writer filter to automatically do such filtering when reading // or writing data anywhere. title, _, _ = transform.String(t, title) - //Now we remove all punctuation - if removeEndParent == true { - title = endDelim.ReplaceAllString(title, "") - } - + //Now we remove all punctuat return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-") } -func (*Album) GetBedethequeComURI() string { - return "" +// GetBedethequeComURI tries to guess the URI used by bedetheque.com to reference an album, using reverse-engineered euristics +func (a *Album) GetBedethequeComURI() string { + // we check for determinant + matches := endDelim.FindString(a.Series) + series := a.Series + titleMatch := false + if len(matches) != 0 { + series = strings.TrimSuffix(series, matches) + det := strings.Trim(matches, " ()") + if det[len(det)-1] != '\'' { + det = det + " " + } + titleCompare := det + strings.ToLower(series[:1]) + series[1:] + titleMatch = (titleCompare == a.Title) + } else { + titleMatch = (a.Series == a.Title) + } + + series = sanitizeTitleString(series) + title := sanitizeTitleString(a.Title) + //first we test if we have a tome identifier + tomeIdent := "" + if a.Num < 0 { + tomeIdent = a.NumA + } else { + tomeIdent = fmt.Sprintf("Tome-%d%s", a.Num, a.NumA) + } + + if titleMatch { + if len(tomeIdent) == 0 { + return fmt.Sprintf("BD-%s-%d.html", series, a.ID) + } + return fmt.Sprintf("BD-%s-%s-%d.html", series, tomeIdent, a.ID) + } + if len(tomeIdent) == 0 { + return fmt.Sprintf("BD-%s-%s-%d.html", series, title, a.ID) + } + return fmt.Sprintf("BD-%s-%s-%s-%d.html", + series, + tomeIdent, + title, + a.ID) } // An AlbumDescription is a more complete BD description diff --git a/bd_test.go b/bd_test.go index fe80a87..b088feb 100644 --- a/bd_test.go +++ b/bd_test.go @@ -14,19 +14,19 @@ var _ = Suite(&AlbumSuite{}) func (s *AlbumSuite) TestSanitazation(c *C) { data := map[string]string{ - "Le cycle de l'eau - I": "Le-cycle-de-l-eau-I", - "Nef des fous (La)": "Nef-des-fous", - "Oiseau noir (L')": "Oiseau-noir", - "Foo": "Foo", - "Nuit de l'étoile (La)": "Nuit-de-l-etoile", + "Le cycle de l'eau - I": "Le-cycle-de-l-eau-I", + "Nef des fous": "Nef-des-fous", + "Oiseau noir": "Oiseau-noir", + "Foo": "Foo", + "Nuit de l'étoile": "Nuit-de-l-etoile", "Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire", "Les disparus d'apostrophes !": "Les-disparus-d-apostrophes", "Eden - It's an Endless World!": "Eden-It-s-an-Endless-World", - "100.000 femmes (Les)": "100000-femmes", + "100.000 femmes": "100000-femmes", } for title, expected := range data { - san := sanitizeTitleString(title, true) + san := sanitizeTitleString(title) c.Check(san, Equals, expected, Commentf("Processing '%s'", title)) } } @@ -66,7 +66,7 @@ func (s *AlbumSuite) TestBedecomURI(c *C) { NumA: "", }, "BD-Nef-des-fous-HS03-Le-petit-Roy-1387.html": Album{ - ID: 1287, + ID: 1387, ISBN: "2-84055-142-X", Series: "Nef des fous (La)", Title: "Le petit Roy", @@ -113,10 +113,18 @@ func (s *AlbumSuite) TestBedecomURI(c *C) { Num: 1, NumA: "", }, + "BD-Catalogues-Expositions-Brassens-ou-la-liberte-124218.html": Album{ + ID: 124218, + ISBN: "9782205066975", + Series: "(Catalogues) Expositions", + Title: "Brassens ou la liberté", + Num: -1, + NumA: "", + }, } - for expectedUrl, album := range data { + for expectedURL, album := range data { url := album.GetBedethequeComURI() - c.Check(url, Equals, expectedUrl) + c.Check(url, Equals, expectedURL) } }