From 21f6c21de4fad1f8e466cb949054555f1c39fb9b Mon Sep 17 00:00:00 2001 From: Alexandre Tuleu Date: Thu, 14 Jan 2016 14:15:01 +0100 Subject: [PATCH] Makes the sanitazition test pass --- bd.go | 32 +++++++++++++++++++++++++++++--- bd_test.go | 8 ++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/bd.go b/bd.go index 8875c2e..14e8ee7 100644 --- a/bd.go +++ b/bd.go @@ -1,6 +1,13 @@ package main -import "time" +import ( + "regexp" + "strings" + "time" + + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) // An AlbumState describe the state of an Album type AlbumState int @@ -35,8 +42,27 @@ type Album struct { PrintDate time.Time } -func sanitizeTitleString(title string) string { - return "" +var endDelim = regexp.MustCompile(` \(.*\)\z`) +var wordBoundaries = regexp.MustCompile(`[^[:alnum:]]+`) +var punctuation = regexp.MustCompile(`[!?\.:;,]`) + +func sanitizeTitleString(title string, removeEndParent bool) string { + // first sanitize accuented characters. + isOk := func(r rune) bool { + return r < 32 || r >= 127 + } + // The isOk filter is such that there is no need to chain to norm.NFC + t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) + // This Transformer could also trivially be applied as an io.Reader + // or io.Writer filter to automatically do such filtering when reading + // or writing data anywhere. + title, _, _ = transform.String(t, title) + //Now we remove all punctuation + if removeEndParent == true { + title = endDelim.ReplaceAllString(title, "") + } + + return strings.Trim(wordBoundaries.ReplaceAllString(punctuation.ReplaceAllString(title, ""), "-"), "-") } func (*Album) GetBedethequeComURI() string { diff --git a/bd_test.go b/bd_test.go index 3125964..fe80a87 100644 --- a/bd_test.go +++ b/bd_test.go @@ -16,18 +16,18 @@ func (s *AlbumSuite) TestSanitazation(c *C) { data := map[string]string{ "Le cycle de l'eau - I": "Le-cycle-de-l-eau-I", "Nef des fous (La)": "Nef-des-fous", - "Oiseau noir (L')": "Oiseau-Noir", + "Oiseau noir (L')": "Oiseau-noir", "Foo": "Foo", "Nuit de l'étoile (La)": "Nuit-de-l-etoile", "Mon Père saigne l'Histoire": "Mon-Pere-saigne-l-Histoire", "Les disparus d'apostrophes !": "Les-disparus-d-apostrophes", - "Eden - It's an Endless World!": "Eden-it-s-an-Endless-World", + "Eden - It's an Endless World!": "Eden-It-s-an-Endless-World", "100.000 femmes (Les)": "100000-femmes", } for title, expected := range data { - san := sanitizeTitleString(title) - c.Check(san, Equals, expected) + san := sanitizeTitleString(title, true) + c.Check(san, Equals, expected, Commentf("Processing '%s'", title)) } }