Makes the sanitazition test pass
This commit is contained in:
32
bd.go
32
bd.go
@@ -1,6 +1,13 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
// An AlbumState describe the state of an Album
|
// An AlbumState describe the state of an Album
|
||||||
type AlbumState int
|
type AlbumState int
|
||||||
@@ -35,8 +42,27 @@ type Album struct {
|
|||||||
PrintDate time.Time
|
PrintDate time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func sanitizeTitleString(title string) string {
|
var endDelim = regexp.MustCompile(` \(.*\)\z`)
|
||||||
return ""
|
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
|
||||||
|
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
|
||||||
|
|
||||||
|
func sanitizeTitleString(title string, removeEndParent bool) string {
|
||||||
|
// first sanitize accuented characters.
|
||||||
|
isOk := func(r rune) bool {
|
||||||
|
return r < 32 || r >= 127
|
||||||
|
}
|
||||||
|
// The isOk filter is such that there is no need to chain to norm.NFC
|
||||||
|
t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
|
||||||
|
// This Transformer could also trivially be applied as an io.Reader
|
||||||
|
// or io.Writer filter to automatically do such filtering when reading
|
||||||
|
// or writing data anywhere.
|
||||||
|
title, _, _ = transform.String(t, title)
|
||||||
|
//Now we remove all punctuation
|
||||||
|
if removeEndParent == true {
|
||||||
|
title = endDelim.ReplaceAllString(title, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*Album) GetBedethequeComURI() string {
|
func (*Album) GetBedethequeComURI() string {
|
||||||
|
|||||||
@@ -16,18 +16,18 @@ func (s *AlbumSuite) TestSanitazation(c *C) {
|
|||||||
data := map[string]string{
|
data := map[string]string{
|
||||||
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
|
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
|
||||||
"Nef des fous (La)": "Nef-des-fous",
|
"Nef des fous (La)": "Nef-des-fous",
|
||||||
"Oiseau noir (L')": "Oiseau-Noir",
|
"Oiseau noir (L')": "Oiseau-noir",
|
||||||
"Foo": "Foo",
|
"Foo": "Foo",
|
||||||
"Nuit de l'étoile (La)": "Nuit-de-l-etoile",
|
"Nuit de l'étoile (La)": "Nuit-de-l-etoile",
|
||||||
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
|
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
|
||||||
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
|
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
|
||||||
"Eden - It's an Endless World!": "Eden-it-s-an-Endless-World",
|
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
|
||||||
"100.000 femmes (Les)": "100000-femmes",
|
"100.000 femmes (Les)": "100000-femmes",
|
||||||
}
|
}
|
||||||
|
|
||||||
for title, expected := range data {
|
for title, expected := range data {
|
||||||
san := sanitizeTitleString(title)
|
san := sanitizeTitleString(title, true)
|
||||||
c.Check(san, Equals, expected)
|
c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user