diff --git a/bleve_indexer_test.go b/bleve_indexer_test.go new file mode 100644 index 0000000..69d12b4 --- /dev/null +++ b/bleve_indexer_test.go @@ -0,0 +1,62 @@ +package main + +import ( + "log" + "time" + + . "gopkg.in/check.v1" +) + +type BleveIndexerSuite struct { + i Indexer +} + +var _ = Suite(&BleveIndexerSuite{}) + +func (s *BleveIndexerSuite) SetUpSuite(c *C) { + var err error + start := time.Now() + s.i, err = NewBleveIndexer("satbd-test.bar.satellite") + c.Assert(err, IsNil) + for _, a := range albumsDataTest { + c.Assert(s.i.Index(&a), IsNil) + } + log.Printf("Indexing took %s", time.Since(start)) +} + +func (s *BleveIndexerSuite) TestCanRetrieveAlbum(c *C) { + start := time.Now() + for _, a := range albumsDataTest { + fromIndex, err := s.i.Get(a.ID) + if c.Check(err, IsNil) == true { + c.Check(*fromIndex, DeepEquals, a) + } + } + log.Printf("%s: %s", c.TestName(), time.Since(start)) +} + +func (s *BleveIndexerSuite) TestCanDeleteAlbum(c *C) { + c.Check(s.i.Delete(albumsDataTest[0].ID), IsNil) + c.Check(s.i.Index(&(albumsDataTest[0])), IsNil) + c.Check(s.i.Delete(0), ErrorMatches, "No album 0 in the index") +} + +func (s *BleveIndexerSuite) TestCanSearch(c *C) { + data := map[string][]AlbumID{ + "Black Crow Hermione": []AlbumID{albumsDataTest[10].ID}, + "Plantu": []AlbumID{albumsDataTest[2].ID}, + "FOOOOOOOBAAAAAR": []AlbumID{}, + } + start := time.Now() + for q, expected := range data { + res, err := s.i.Search(q) + if c.Check(err, IsNil) == true { + if c.Check(len(res), Equals, len(expected)) == true { + for idx, resAlbum := range res { + c.Check(resAlbum.ID, Equals, expected[idx]) + } + } + } + } + log.Printf("%s: %s", c.TestName(), time.Since(start)) +} diff --git a/indexer.go b/indexer.go new file mode 100644 index 0000000..0440494 --- /dev/null +++ b/indexer.go @@ -0,0 +1,176 @@ +package main + +import ( + "encoding/json" + "fmt" + "path/filepath" + "strconv" + + "launchpad.net/go-xdg" + + "github.com/blevesearch/bleve" + "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" + "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" + "github.com/blevesearch/bleve/analysis/language/fr" + "github.com/peterbourgon/diskv" +) + +type Indexer interface { + Index(a *Album) error + Get(ID AlbumID) (*Album, error) + Delete(ID AlbumID) error + Search(query string) ([]*Album, error) +} + +type bleveIndexer struct { + bl bleve.Index + + db *diskv.Diskv +} + +func buildAlbumMapping() *bleve.IndexMapping { + simpleMapping := bleve.NewTextFieldMapping() + simpleMapping.Analyzer = simple_analyzer.Name + + frenchTextMapping := bleve.NewTextFieldMapping() + frenchTextMapping.Analyzer = fr.AnalyzerName + + keywordFieldMapping := bleve.NewTextFieldMapping() + keywordFieldMapping.Analyzer = keyword_analyzer.Name + keywordFieldMapping.IncludeInAll = false + + numericMappingDisabled := bleve.NewNumericFieldMapping() + numericMappingDisabled.Index = false + numericMappingDisabled.IncludeInAll = false + + textDisabled := bleve.NewTextFieldMapping() + textDisabled.Index = false + textDisabled.IncludeInAll = false + + dateStore := bleve.NewDateTimeFieldMapping() + dateStore.Index = false + dateStore.IncludeInAll = false + + albumMapping := bleve.NewDocumentStaticMapping() + + albumMapping.AddFieldMappingsAt("ID", numericMappingDisabled) + albumMapping.AddFieldMappingsAt("ISBN", keywordFieldMapping) + albumMapping.AddFieldMappingsAt("Series", frenchTextMapping) + albumMapping.AddFieldMappingsAt("Title", frenchTextMapping) + albumMapping.AddFieldMappingsAt("Num", numericMappingDisabled) + albumMapping.AddFieldMappingsAt("NumA", textDisabled) + albumMapping.AddFieldMappingsAt("Editor", simpleMapping) + albumMapping.AddFieldMappingsAt("Collection", frenchTextMapping) + albumMapping.AddFieldMappingsAt("SatID", keywordFieldMapping) + albumMapping.AddFieldMappingsAt("Description", frenchTextMapping) + + indexMapping := bleve.NewIndexMapping() + indexMapping.AddDocumentMapping("album", albumMapping) + return indexMapping +} + +func NewBleveIndexer(path string) (Indexer, error) { + blIndex, err := bleve.Open(path) + if err == bleve.ErrorIndexPathDoesNotExist { + blIndex, err = bleve.New(path, buildAlbumMapping()) + if err != nil { + return nil, err + } + } + + return &bleveIndexer{ + bl: blIndex, + db: diskv.New(diskv.Options{ + BasePath: filepath.Join(xdg.Cache.Home(), "satbd.bar.satellite", path), + CacheSizeMax: 100 * 1024 * 1024, // 100 Mb + Compression: diskv.NewGzipCompression(), + }), + }, nil +} + +func (i *bleveIndexer) id(ID AlbumID) string { + return strconv.FormatUint(uint64(ID), 10) +} + +func (i *bleveIndexer) Index(a *Album) error { + docID := i.id(a.ID) + err := i.bl.Index(docID, a) + + if err != nil { + return fmt.Errorf("Could not index album %d: %s", a.ID, err) + } + + //now we store it + data, err := json.Marshal(a) + if err != nil { + return fmt.Errorf("Could not encode data for album %d: %s", a.ID, err) + } + + if err := i.db.Write(docID, data); err != nil { + return fmt.Errorf("Could not store data for album %d: %s", a.ID, err) + } + + return nil +} + +func (i *bleveIndexer) get(docID string) (*Album, error) { + if i.db.Has(docID) == false { + return nil, fmt.Errorf("No album %s in the index", docID) + } + + r, err := i.db.ReadStream(docID, false) + if err != nil { + return nil, fmt.Errorf("Inconsistency in db: %s", err) + } + + dec := json.NewDecoder(r) + res := &Album{} + + err = dec.Decode(res) + if err != nil { + return nil, fmt.Errorf("Could not decode data for album %s: %s", docID, err) + } + return res, nil +} + +func (i *bleveIndexer) Get(ID AlbumID) (*Album, error) { + return i.get(i.id(ID)) +} + +func (i *bleveIndexer) Delete(ID AlbumID) error { + docID := i.id(ID) + if i.db.Has(docID) == false { + return fmt.Errorf("No album %d in the index", ID) + } + + errDB := i.db.Erase(docID) + + errIndex := i.bl.Delete(docID) + + if errDB == nil && errIndex == nil { + return nil + } + + return fmt.Errorf("Error during deletion of %d: %v", ID, []string{errDB.Error(), errIndex.Error()}) +} + +func (i *bleveIndexer) Search(query string) ([]*Album, error) { + + blq := bleve.NewQueryStringQuery(query) + search := bleve.NewSearchRequest(blq) + searchResults, err := i.bl.Search(search) + if err != nil { + return nil, fmt.Errorf("Could not perform search on the index: %s", err) + } + + res := make([]*Album, 0, searchResults.Total) + for _, d := range searchResults.Hits { + a, err := i.get(d.ID) + if err != nil { + return res, err + } + res = append(res, a) + } + + return res, nil +}