Makes the sanitazition test pass

This commit is contained in:
2016-01-14 14:15:01 +01:00
parent 9854ec868a
commit 21f6c21de4
2 changed files with 33 additions and 7 deletions

32
bd.go
View File

@@ -1,6 +1,13 @@
package main
import "time"
import (
"regexp"
"strings"
"time"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// An AlbumState describe the state of an Album
type AlbumState int
@@ -35,8 +42,27 @@ type Album struct {
PrintDate time.Time
}
func sanitizeTitleString(title string) string {
return ""
var endDelim = regexp.MustCompile(` \(.*\)\z`)
var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`)
var punctuation = regexp.MustCompile(`[!?\.:;,]`)
func sanitizeTitleString(title string, removeEndParent bool) string {
// first sanitize accuented characters.
isOk := func(r rune) bool {
return r < 32 || r >= 127
}
// The isOk filter is such that there is no need to chain to norm.NFC
t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
// This Transformer could also trivially be applied as an io.Reader
// or io.Writer filter to automatically do such filtering when reading
// or writing data anywhere.
title, _, _ = transform.String(t, title)
//Now we remove all punctuation
if removeEndParent == true {
title = endDelim.ReplaceAllString(title, "")
}
return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-")
}
func (*Album) GetBedethequeComURI() string {

View File

@@ -16,18 +16,18 @@ func (s *AlbumSuite) TestSanitazation(c *C) {
data := map[string]string{
"Le cycle de l'eau - I": "Le-cycle-de-l-eau-I",
"Nef des fous (La)": "Nef-des-fous",
"Oiseau noir (L')": "Oiseau-Noir",
"Oiseau noir (L')": "Oiseau-noir",
"Foo": "Foo",
"Nuit de l'étoile (La)": "Nuit-de-l-etoile",
"Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire",
"Les disparus d'apostrophes !": "Les-disparus-d-apostrophes",
"Eden - It's an Endless World!": "Eden-it-s-an-Endless-World",
"Eden - It's an Endless World!": "Eden-It-s-an-Endless-World",
"100.000 femmes (Les)": "100000-femmes",
}
for title, expected := range data {
san := sanitizeTitleString(title)
c.Check(san, Equals, expected)
san := sanitizeTitleString(title, true)
c.Check(san, Equals, expected, Commentf("Processing '%s'", title))
}
}