Makes the sanitazition test pass

This commit is contained in:
2016-01-14 14:15:01 +01:00
parent 9854ec868a
commit 21f6c21de4
2 changed files with 33 additions and 7 deletions

32
bd.go
View File

@@ -1,6 +1,13 @@
package main package main
import "time" import (
"regexp"
"strings"
"time"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// An AlbumState describe the state of an Album // An AlbumState describe the state of an Album
type AlbumState int type AlbumState int
@@ -35,8 +42,27 @@ type Album struct {
PrintDate time.Time PrintDate time.Time
} }
func sanitizeTitleString(title string) string { var endDelim = regexp.MustCompile(` \(.*\)\z`)
return "" var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
func sanitizeTitleString(title string, removeEndParent bool) string {
// first sanitize accuented characters.
isOk := func(r rune) bool {
return r < 32 || r >= 127
}
// The isOk filter is such that there is no need to chain to norm.NFC
t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
// This Transformer could also trivially be applied as an io.Reader
// or io.Writer filter to automatically do such filtering when reading
// or writing data anywhere.
title, _, _ = transform.String(t, title)
//Now we remove all punctuation
if removeEndParent == true {
title = endDelim.ReplaceAllString(title, "")
}
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
} }
func (*Album) GetBedethequeComURI() string { func (*Album) GetBedethequeComURI() string {

View File

@@ -16,18 +16,18 @@ func (s *AlbumSuite) TestSanitazation(c *C) {
data := map[string]string{ data := map[string]string{
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I", "Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
"Nef des fous (La)": "Nef-des-fous", "Nef des fous (La)": "Nef-des-fous",
"Oiseau noir (L')": "Oiseau-Noir", "Oiseau noir (L')": "Oiseau-noir",
"Foo": "Foo", "Foo": "Foo",
"Nuit de l'étoile (La)": "Nuit-de-l-etoile", "Nuit de l'étoile (La)": "Nuit-de-l-etoile",
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire", "Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes", "Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
"Eden - It's an Endless World!": "Eden-it-s-an-Endless-World", "Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
"100.000 femmes (Les)": "100000-femmes", "100.000 femmes (Les)": "100000-femmes",
} }
for title, expected := range data { for title, expected := range data {
san := sanitizeTitleString(title) san := sanitizeTitleString(title, true)
c.Check(san, Equals, expected) c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
} }
} }