From ef458eaaeaeb0d97720cc86591d456b62672f5bf Mon Sep 17 00:00:00 2001 From: Alexandre Tuleu Date: Thu, 21 Jan 2016 19:19:04 +0100 Subject: [PATCH] Splits bleve.Index and database in two --- album.go | 9 +- album_database.go | 27 +++++ album_description_getter.go | 2 + album_mapping.go | 63 +++++++++++ bleve_indexer_test.go | 46 ++++---- indexer.go | 203 ------------------------------------ main.go | 17 ++- 7 files changed, 132 insertions(+), 235 deletions(-) create mode 100644 album_database.go create mode 100644 album_mapping.go delete mode 100644 indexer.go diff --git a/album.go b/album.go index 54ce27c..cfb62f7 100644 --- a/album.go +++ b/album.go @@ -1,6 +1,9 @@ package main -import "time" +import ( + "strconv" + "time" +) // An AlbumState describe the state of an Album type AlbumState int @@ -63,3 +66,7 @@ type Album struct { FetchDate time.Time } + +func (a *Album) IDString() string { + return strconv.FormatUint(uint64(a.ID), 10) +} diff --git a/album_database.go b/album_database.go new file mode 100644 index 0000000..af178dd --- /dev/null +++ b/album_database.go @@ -0,0 +1,27 @@ +package main + +import "github.com/peterbourgon/diskv" + +type AlbumDatabase struct { + db *diskv.Diskv +} + +func OpenAlbumDatabase(basepath string) (*AlbumDatabase, error) { + return nil, notYetImplemented() +} + +func (db *AlbumDatabase) AddOrUpdate(*Album) error { + return notYetImplemented() +} + +func (db *AlbumDatabase) Delete(AlbumID) error { + return notYetImplemented() +} + +func (db *AlbumDatabase) Get(AlbumID) (*Album, error) { + return nil, notYetImplemented() +} + +func (db *AlbumDatabase) ByPurchaseDate() ([]*Album, error) { + return nil, notYetImplemented() +} diff --git a/album_description_getter.go b/album_description_getter.go index 82f284d..dfb8244 100644 --- a/album_description_getter.go +++ b/album_description_getter.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/PuerkitoBio/goquery" ) @@ -177,5 +178,6 @@ func (g *AlbumDescriptionGetter) Get(a *Album) error { strings.Join(append([]string{""}, errorList...), "\n * ")) } + a.FetchDate = time.Now() return nil } diff --git a/album_mapping.go b/album_mapping.go new file mode 100644 index 0000000..a431c34 --- /dev/null +++ b/album_mapping.go @@ -0,0 +1,63 @@ +package main + +import ( + "github.com/blevesearch/bleve" + "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" + "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" + "github.com/blevesearch/bleve/analysis/language/fr" +) + +func buildAlbumMapping() *bleve.IndexMapping { + simpleMapping := bleve.NewTextFieldMapping() + simpleMapping.Analyzer = simple_analyzer.Name + + frenchTextMapping := bleve.NewTextFieldMapping() + frenchTextMapping.Analyzer = fr.AnalyzerName + + keywordFieldMapping := bleve.NewTextFieldMapping() + keywordFieldMapping.Analyzer = keyword_analyzer.Name + keywordFieldMapping.IncludeInAll = false + + numericDisabled := bleve.NewNumericFieldMapping() + numericDisabled.Index = false + numericDisabled.Store = false + numericDisabled.IncludeInAll = false + + textDisabled := bleve.NewTextFieldMapping() + textDisabled.Index = false + textDisabled.Store = false + textDisabled.IncludeInAll = false + + dateDisabled := bleve.NewDateTimeFieldMapping() + dateDisabled.Index = false + dateDisabled.Store = false + dateDisabled.IncludeInAll = false + + albumMapping := bleve.NewDocumentStaticMapping() + + albumMapping.AddFieldMappingsAt("ID", numericDisabled) + albumMapping.AddFieldMappingsAt("ISBN", keywordFieldMapping) + albumMapping.AddFieldMappingsAt("Series", frenchTextMapping) + albumMapping.AddFieldMappingsAt("Title", frenchTextMapping) + albumMapping.AddFieldMappingsAt("Num", numericDisabled) + albumMapping.AddFieldMappingsAt("NumA", textDisabled) + albumMapping.AddFieldMappingsAt("State", numericDisabled) + albumMapping.AddFieldMappingsAt("Editor", simpleMapping) + albumMapping.AddFieldMappingsAt("Collection", frenchTextMapping) + albumMapping.AddFieldMappingsAt("SatID", keywordFieldMapping) + albumMapping.AddFieldMappingsAt("Description", frenchTextMapping) + albumMapping.AddFieldMappingsAt("LegalDeposit", dateDisabled) + albumMapping.AddFieldMappingsAt("PrintDate", dateDisabled) + albumMapping.AddFieldMappingsAt("PurchaseDate", dateDisabled) + albumMapping.AddFieldMappingsAt("CoverURL", dateDisabled) + albumMapping.AddFieldMappingsAt("Note", bleve.NewNumericFieldMapping()) + albumMapping.AddFieldMappingsAt("Scenarists", simpleMapping) + albumMapping.AddFieldMappingsAt("Designers", simpleMapping) + albumMapping.AddFieldMappingsAt("Colorists", simpleMapping) + albumMapping.AddFieldMappingsAt("Colorists", textDisabled) + albumMapping.AddFieldMappingsAt("FetchDate", dateDisabled) + + indexMapping := bleve.NewIndexMapping() + indexMapping.AddDocumentMapping("album", albumMapping) + return indexMapping +} diff --git a/bleve_indexer_test.go b/bleve_indexer_test.go index 9a09328..9980309 100644 --- a/bleve_indexer_test.go +++ b/bleve_indexer_test.go @@ -3,13 +3,15 @@ package main import ( "log" "path/filepath" + "strconv" "time" + "github.com/blevesearch/bleve" . "gopkg.in/check.v1" ) type BleveIndexerSuite struct { - i Indexer + i bleve.Index } var _ = Suite(&BleveIndexerSuite{}) @@ -17,31 +19,14 @@ var _ = Suite(&BleveIndexerSuite{}) func (s *BleveIndexerSuite) SetUpSuite(c *C) { var err error start := time.Now() - s.i, err = NewBleveIndexer(filepath.Join(c.MkDir(), "satbd-test.bar.satellite")) + s.i, err = bleve.New(filepath.Join(c.MkDir(), "satbd-test.bar.satellite"), buildAlbumMapping()) c.Assert(err, IsNil) for _, a := range albumsDataTest { - c.Assert(s.i.Index(&a), IsNil) + c.Assert(s.i.Index((&a).IDString(), &a), IsNil) } log.Printf("Indexing took %s", time.Since(start)) } -func (s *BleveIndexerSuite) TestCanRetrieveAlbum(c *C) { - start := time.Now() - for _, a := range albumsDataTest { - fromIndex, err := s.i.Get(a.ID) - if c.Check(err, IsNil) == true { - c.Check(*fromIndex, DeepEquals, a) - } - } - log.Printf("%s: %s", c.TestName(), time.Since(start)) -} - -func (s *BleveIndexerSuite) TestCanDeleteAlbum(c *C) { - c.Check(s.i.Delete(albumsDataTest[0].ID), IsNil) - c.Check(s.i.Index(&(albumsDataTest[0])), IsNil) - c.Check(s.i.Delete(0), ErrorMatches, "No album 0 in the index") -} - func (s *BleveIndexerSuite) TestCanSearch(c *C) { data := map[string]map[AlbumID]bool{ // Research by amny keyword (here in the description @@ -77,17 +62,24 @@ func (s *BleveIndexerSuite) TestCanSearch(c *C) { } start := time.Now() for q, expected := range data { - res, err := s.i.Search(q) + query := bleve.NewQueryStringQuery(q) + search := bleve.NewSearchRequest(query) + //make sure we can have all data at once + search.Size = len(albumsDataTest) + searchResults, err := s.i.Search(search) if c.Check(err, IsNil) == true { - if c.Check(len(res), Equals, len(expected), Commentf("Query: %s", q)) == true { - for _, resAlbum := range res { - _, ok := expected[resAlbum.ID] + + if c.Check(int(searchResults.Total), Equals, len(expected), Commentf("Query: %s", q)) == true { + for _, resAlbum := range searchResults.Hits { + aID, err := strconv.ParseUint(resAlbum.ID, 0, 64) + c.Check(err, IsNil) + _, ok := expected[AlbumID(aID)] c.Check(ok, Equals, true, Commentf("Query: %s, got %d instead of %v", q, resAlbum.ID, expected)) } } else { - log.Printf("Got %d result(s)", len(res)) - for i, a := range res { - log.Printf(" - %d: %d: %s", i, a.ID, a.Title) + log.Printf("Got %d result(s)", len(searchResults.Hits)) + for i, a := range searchResults.Hits { + log.Printf(" - %d: %s", i, a.ID) } } } diff --git a/indexer.go b/indexer.go deleted file mode 100644 index d707fa6..0000000 --- a/indexer.go +++ /dev/null @@ -1,203 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "path/filepath" - "strconv" - - "launchpad.net/go-xdg" - - "github.com/blevesearch/bleve" - "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" - "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" - "github.com/blevesearch/bleve/analysis/language/fr" - "github.com/peterbourgon/diskv" -) - -type SearchResult bleve.SearchResult - -type Indexer interface { - Index(a *Album) error - Get(ID AlbumID) (*Album, error) - Delete(ID AlbumID) error - Search(query string) ([]*Album, error) -} - -type bleveIndexer struct { - bl bleve.Index - - db *diskv.Diskv -} - -func buildAlbumMapping() *bleve.IndexMapping { - simpleMapping := bleve.NewTextFieldMapping() - simpleMapping.Analyzer = simple_analyzer.Name - - frenchTextMapping := bleve.NewTextFieldMapping() - frenchTextMapping.Analyzer = fr.AnalyzerName - - keywordFieldMapping := bleve.NewTextFieldMapping() - keywordFieldMapping.Analyzer = keyword_analyzer.Name - keywordFieldMapping.IncludeInAll = false - - numericDisabled := bleve.NewNumericFieldMapping() - numericDisabled.Index = false - numericDisabled.Store = false - numericDisabled.IncludeInAll = false - - textDisabled := bleve.NewTextFieldMapping() - textDisabled.Index = false - textDisabled.Store = false - textDisabled.IncludeInAll = false - - dateDisabled := bleve.NewDateTimeFieldMapping() - dateDisabled.Index = false - dateDisabled.Store = false - dateDisabled.IncludeInAll = false - - albumMapping := bleve.NewDocumentStaticMapping() - - albumMapping.AddFieldMappingsAt("ID", numericDisabled) - albumMapping.AddFieldMappingsAt("ISBN", keywordFieldMapping) - albumMapping.AddFieldMappingsAt("Series", frenchTextMapping) - albumMapping.AddFieldMappingsAt("Title", frenchTextMapping) - albumMapping.AddFieldMappingsAt("Num", numericDisabled) - albumMapping.AddFieldMappingsAt("NumA", textDisabled) - albumMapping.AddFieldMappingsAt("State", numericDisabled) - albumMapping.AddFieldMappingsAt("Editor", simpleMapping) - albumMapping.AddFieldMappingsAt("Collection", frenchTextMapping) - albumMapping.AddFieldMappingsAt("SatID", keywordFieldMapping) - albumMapping.AddFieldMappingsAt("Description", frenchTextMapping) - albumMapping.AddFieldMappingsAt("LegalDeposit", dateDisabled) - albumMapping.AddFieldMappingsAt("PrintDate", dateDisabled) - albumMapping.AddFieldMappingsAt("PurchaseDate", dateDisabled) - albumMapping.AddFieldMappingsAt("CoverURL", dateDisabled) - albumMapping.AddFieldMappingsAt("Note", bleve.NewNumericFieldMapping()) - albumMapping.AddFieldMappingsAt("Scenarists", simpleMapping) - albumMapping.AddFieldMappingsAt("Designers", simpleMapping) - albumMapping.AddFieldMappingsAt("Colorists", simpleMapping) - albumMapping.AddFieldMappingsAt("Colorists", textDisabled) - albumMapping.AddFieldMappingsAt("FetchDate", dateDisabled) - - indexMapping := bleve.NewIndexMapping() - indexMapping.AddDocumentMapping("album", albumMapping) - return indexMapping -} - -func NewBleveIndexer(path string) (Indexer, error) { - blIndex, err := bleve.Open(path) - if err == bleve.ErrorIndexPathDoesNotExist { - blIndex, err = bleve.New(path, buildAlbumMapping()) - if err != nil { - return nil, err - } - } - - return &bleveIndexer{ - bl: blIndex, - db: diskv.New(diskv.Options{ - BasePath: filepath.Join(xdg.Cache.Home(), "satbd.bar.satellite", path), - CacheSizeMax: 100 * 1024 * 1024, // 100 Mb - Compression: diskv.NewGzipCompression(), - }), - }, nil -} - -func (i *bleveIndexer) id(ID AlbumID) string { - return strconv.FormatUint(uint64(ID), 10) -} - -func (i *bleveIndexer) Index(a *Album) error { - docID := i.id(a.ID) - err := i.bl.Index(docID, a) - - if err != nil { - return fmt.Errorf("Could not index album %d: %s", a.ID, err) - } - - //now we store it - data, err := json.Marshal(a) - if err != nil { - return fmt.Errorf("Could not encode data for album %d: %s", a.ID, err) - } - - if err := i.db.Write(docID, data); err != nil { - return fmt.Errorf("Could not store data for album %d: %s", a.ID, err) - } - - return nil -} - -func (i *bleveIndexer) get(docID string) (*Album, error) { - if i.db.Has(docID) == false { - return nil, fmt.Errorf("No album %s in the index", docID) - } - - r, err := i.db.ReadStream(docID, false) - if err != nil { - return nil, fmt.Errorf("Inconsistency in db: %s", err) - } - - dec := json.NewDecoder(r) - res := &Album{} - - err = dec.Decode(res) - if err != nil { - return nil, fmt.Errorf("Could not decode data for album %s: %s", docID, err) - } - return res, nil -} - -func (i *bleveIndexer) Get(ID AlbumID) (*Album, error) { - return i.get(i.id(ID)) -} - -func (i *bleveIndexer) Delete(ID AlbumID) error { - docID := i.id(ID) - if i.db.Has(docID) == false { - return fmt.Errorf("No album %d in the index", ID) - } - - errDB := i.db.Erase(docID) - - errIndex := i.bl.Delete(docID) - - if errDB == nil && errIndex == nil { - return nil - } - - return fmt.Errorf("Error during deletion of %d: %v", ID, []string{errDB.Error(), errIndex.Error()}) -} - -func (i *bleveIndexer) Search(query string) ([]*Album, error) { - - blq := bleve.NewQueryStringQuery(query) - search := bleve.NewSearchRequest(blq) - search.Highlight = bleve.NewHighlight() - sRes, err := i.bl.Search(search) - if err != nil { - return nil, fmt.Errorf("Could not perform search on the index: %s", err) - } - - res := make([]*Album, 0, sRes.Total) - for len(res) < int(sRes.Total) { - if len(res) != 0 { - //fetches the next results - search.From = len(res) - sRes, err = i.bl.Search(search) - if err != nil { - return nil, fmt.Errorf("Could not perform search on the index: %s", err) - } - } - for _, d := range sRes.Hits { - a, err := i.get(d.ID) - if err != nil { - return nil, err - } - res = append(res, a) - } - } - - return res, nil -} diff --git a/main.go b/main.go index a9e902f..de730b4 100644 --- a/main.go +++ b/main.go @@ -13,6 +13,7 @@ import ( "github.com/jessevdk/go-flags" "github.com/tylerb/graceful" + "github.com/blevesearch/bleve" bleveHttp "github.com/blevesearch/bleve/http" ) @@ -49,11 +50,11 @@ func readAlbums(csvPath string, albums chan *Album, errors chan error) { } } -func indexAlbums(i Indexer, albums chan *Album, errors chan error) { +func indexAlbums(i bleve.Index, albums chan *Album, errors chan error) { iAlbum := 0 start := time.Now() for a := range albums { - err := i.Index(a) + err := i.Index(a.IDString(), a) if err != nil { errors <- err } @@ -65,6 +66,14 @@ func indexAlbums(i Indexer, albums chan *Album, errors chan error) { } } +func buildOrOpen(basepath string) (bleve.Index, error) { + i, err := bleve.Open(basepath) + if err == bleve.ErrorIndexPathDoesNotExist { + return bleve.New(basepath, buildAlbumMapping()) + } + return i, err +} + // Execute executes the job func Execute() error { var opts Options @@ -80,7 +89,7 @@ func Execute() error { return err } - i, err := NewBleveIndexer("satbd.bar.satellite") + i, err := buildOrOpen("satbd.bar.satellite") if err != nil { return err } @@ -98,7 +107,7 @@ func Execute() error { router := mux.NewRouter() - bleveHttp.RegisterIndexName("album", i.(*bleveIndexer).bl) + bleveHttp.RegisterIndexName("album", i) searchHandler := bleveHttp.NewSearchHandler("album") router.Handle("/api/search", searchHandler).Methods("POST")