Splits bleve.Index and database in two
This commit is contained in:
9
album.go
9
album.go
@@ -1,6 +1,9 @@
|
||||
package main
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
// An AlbumState describe the state of an Album
|
||||
type AlbumState int
|
||||
@@ -63,3 +66,7 @@ type Album struct {
|
||||
|
||||
FetchDate time.Time
|
||||
}
|
||||
|
||||
func (a *Album) IDString() string {
|
||||
return strconv.FormatUint(uint64(a.ID), 10)
|
||||
}
|
||||
|
||||
27
album_database.go
Normal file
27
album_database.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package main
|
||||
|
||||
import "github.com/peterbourgon/diskv"
|
||||
|
||||
type AlbumDatabase struct {
|
||||
db *diskv.Diskv
|
||||
}
|
||||
|
||||
func OpenAlbumDatabase(basepath string) (*AlbumDatabase, error) {
|
||||
return nil, notYetImplemented()
|
||||
}
|
||||
|
||||
func (db *AlbumDatabase) AddOrUpdate(*Album) error {
|
||||
return notYetImplemented()
|
||||
}
|
||||
|
||||
func (db *AlbumDatabase) Delete(AlbumID) error {
|
||||
return notYetImplemented()
|
||||
}
|
||||
|
||||
func (db *AlbumDatabase) Get(AlbumID) (*Album, error) {
|
||||
return nil, notYetImplemented()
|
||||
}
|
||||
|
||||
func (db *AlbumDatabase) ByPurchaseDate() ([]*Album, error) {
|
||||
return nil, notYetImplemented()
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
@@ -177,5 +178,6 @@ func (g *AlbumDescriptionGetter) Get(a *Album) error {
|
||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||
}
|
||||
|
||||
a.FetchDate = time.Now()
|
||||
return nil
|
||||
}
|
||||
|
||||
63
album_mapping.go
Normal file
63
album_mapping.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve"
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer"
|
||||
"github.com/blevesearch/bleve/analysis/language/fr"
|
||||
)
|
||||
|
||||
func buildAlbumMapping() *bleve.IndexMapping {
|
||||
simpleMapping := bleve.NewTextFieldMapping()
|
||||
simpleMapping.Analyzer = simple_analyzer.Name
|
||||
|
||||
frenchTextMapping := bleve.NewTextFieldMapping()
|
||||
frenchTextMapping.Analyzer = fr.AnalyzerName
|
||||
|
||||
keywordFieldMapping := bleve.NewTextFieldMapping()
|
||||
keywordFieldMapping.Analyzer = keyword_analyzer.Name
|
||||
keywordFieldMapping.IncludeInAll = false
|
||||
|
||||
numericDisabled := bleve.NewNumericFieldMapping()
|
||||
numericDisabled.Index = false
|
||||
numericDisabled.Store = false
|
||||
numericDisabled.IncludeInAll = false
|
||||
|
||||
textDisabled := bleve.NewTextFieldMapping()
|
||||
textDisabled.Index = false
|
||||
textDisabled.Store = false
|
||||
textDisabled.IncludeInAll = false
|
||||
|
||||
dateDisabled := bleve.NewDateTimeFieldMapping()
|
||||
dateDisabled.Index = false
|
||||
dateDisabled.Store = false
|
||||
dateDisabled.IncludeInAll = false
|
||||
|
||||
albumMapping := bleve.NewDocumentStaticMapping()
|
||||
|
||||
albumMapping.AddFieldMappingsAt("ID", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("ISBN", keywordFieldMapping)
|
||||
albumMapping.AddFieldMappingsAt("Series", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("Title", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("Num", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("NumA", textDisabled)
|
||||
albumMapping.AddFieldMappingsAt("State", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("Editor", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Collection", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("SatID", keywordFieldMapping)
|
||||
albumMapping.AddFieldMappingsAt("Description", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("LegalDeposit", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("PrintDate", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("PurchaseDate", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("CoverURL", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("Note", bleve.NewNumericFieldMapping())
|
||||
albumMapping.AddFieldMappingsAt("Scenarists", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Designers", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Colorists", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Colorists", textDisabled)
|
||||
albumMapping.AddFieldMappingsAt("FetchDate", dateDisabled)
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
indexMapping.AddDocumentMapping("album", albumMapping)
|
||||
return indexMapping
|
||||
}
|
||||
@@ -3,13 +3,15 @@ package main
|
||||
import (
|
||||
"log"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
type BleveIndexerSuite struct {
|
||||
i Indexer
|
||||
i bleve.Index
|
||||
}
|
||||
|
||||
var _ = Suite(&BleveIndexerSuite{})
|
||||
@@ -17,31 +19,14 @@ var _ = Suite(&BleveIndexerSuite{})
|
||||
func (s *BleveIndexerSuite) SetUpSuite(c *C) {
|
||||
var err error
|
||||
start := time.Now()
|
||||
s.i, err = NewBleveIndexer(filepath.Join(c.MkDir(), "satbd-test.bar.satellite"))
|
||||
s.i, err = bleve.New(filepath.Join(c.MkDir(), "satbd-test.bar.satellite"), buildAlbumMapping())
|
||||
c.Assert(err, IsNil)
|
||||
for _, a := range albumsDataTest {
|
||||
c.Assert(s.i.Index(&a), IsNil)
|
||||
c.Assert(s.i.Index((&a).IDString(), &a), IsNil)
|
||||
}
|
||||
log.Printf("Indexing took %s", time.Since(start))
|
||||
}
|
||||
|
||||
func (s *BleveIndexerSuite) TestCanRetrieveAlbum(c *C) {
|
||||
start := time.Now()
|
||||
for _, a := range albumsDataTest {
|
||||
fromIndex, err := s.i.Get(a.ID)
|
||||
if c.Check(err, IsNil) == true {
|
||||
c.Check(*fromIndex, DeepEquals, a)
|
||||
}
|
||||
}
|
||||
log.Printf("%s: %s", c.TestName(), time.Since(start))
|
||||
}
|
||||
|
||||
func (s *BleveIndexerSuite) TestCanDeleteAlbum(c *C) {
|
||||
c.Check(s.i.Delete(albumsDataTest[0].ID), IsNil)
|
||||
c.Check(s.i.Index(&(albumsDataTest[0])), IsNil)
|
||||
c.Check(s.i.Delete(0), ErrorMatches, "No album 0 in the index")
|
||||
}
|
||||
|
||||
func (s *BleveIndexerSuite) TestCanSearch(c *C) {
|
||||
data := map[string]map[AlbumID]bool{
|
||||
// Research by amny keyword (here in the description
|
||||
@@ -77,17 +62,24 @@ func (s *BleveIndexerSuite) TestCanSearch(c *C) {
|
||||
}
|
||||
start := time.Now()
|
||||
for q, expected := range data {
|
||||
res, err := s.i.Search(q)
|
||||
query := bleve.NewQueryStringQuery(q)
|
||||
search := bleve.NewSearchRequest(query)
|
||||
//make sure we can have all data at once
|
||||
search.Size = len(albumsDataTest)
|
||||
searchResults, err := s.i.Search(search)
|
||||
if c.Check(err, IsNil) == true {
|
||||
if c.Check(len(res), Equals, len(expected), Commentf("Query: %s", q)) == true {
|
||||
for _, resAlbum := range res {
|
||||
_, ok := expected[resAlbum.ID]
|
||||
|
||||
if c.Check(int(searchResults.Total), Equals, len(expected), Commentf("Query: %s", q)) == true {
|
||||
for _, resAlbum := range searchResults.Hits {
|
||||
aID, err := strconv.ParseUint(resAlbum.ID, 0, 64)
|
||||
c.Check(err, IsNil)
|
||||
_, ok := expected[AlbumID(aID)]
|
||||
c.Check(ok, Equals, true, Commentf("Query: %s, got %d instead of %v", q, resAlbum.ID, expected))
|
||||
}
|
||||
} else {
|
||||
log.Printf("Got %d result(s)", len(res))
|
||||
for i, a := range res {
|
||||
log.Printf(" - %d: %d: %s", i, a.ID, a.Title)
|
||||
log.Printf("Got %d result(s)", len(searchResults.Hits))
|
||||
for i, a := range searchResults.Hits {
|
||||
log.Printf(" - %d: %s", i, a.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
203
indexer.go
203
indexer.go
@@ -1,203 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"launchpad.net/go-xdg"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer"
|
||||
"github.com/blevesearch/bleve/analysis/language/fr"
|
||||
"github.com/peterbourgon/diskv"
|
||||
)
|
||||
|
||||
type SearchResult bleve.SearchResult
|
||||
|
||||
type Indexer interface {
|
||||
Index(a *Album) error
|
||||
Get(ID AlbumID) (*Album, error)
|
||||
Delete(ID AlbumID) error
|
||||
Search(query string) ([]*Album, error)
|
||||
}
|
||||
|
||||
type bleveIndexer struct {
|
||||
bl bleve.Index
|
||||
|
||||
db *diskv.Diskv
|
||||
}
|
||||
|
||||
func buildAlbumMapping() *bleve.IndexMapping {
|
||||
simpleMapping := bleve.NewTextFieldMapping()
|
||||
simpleMapping.Analyzer = simple_analyzer.Name
|
||||
|
||||
frenchTextMapping := bleve.NewTextFieldMapping()
|
||||
frenchTextMapping.Analyzer = fr.AnalyzerName
|
||||
|
||||
keywordFieldMapping := bleve.NewTextFieldMapping()
|
||||
keywordFieldMapping.Analyzer = keyword_analyzer.Name
|
||||
keywordFieldMapping.IncludeInAll = false
|
||||
|
||||
numericDisabled := bleve.NewNumericFieldMapping()
|
||||
numericDisabled.Index = false
|
||||
numericDisabled.Store = false
|
||||
numericDisabled.IncludeInAll = false
|
||||
|
||||
textDisabled := bleve.NewTextFieldMapping()
|
||||
textDisabled.Index = false
|
||||
textDisabled.Store = false
|
||||
textDisabled.IncludeInAll = false
|
||||
|
||||
dateDisabled := bleve.NewDateTimeFieldMapping()
|
||||
dateDisabled.Index = false
|
||||
dateDisabled.Store = false
|
||||
dateDisabled.IncludeInAll = false
|
||||
|
||||
albumMapping := bleve.NewDocumentStaticMapping()
|
||||
|
||||
albumMapping.AddFieldMappingsAt("ID", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("ISBN", keywordFieldMapping)
|
||||
albumMapping.AddFieldMappingsAt("Series", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("Title", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("Num", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("NumA", textDisabled)
|
||||
albumMapping.AddFieldMappingsAt("State", numericDisabled)
|
||||
albumMapping.AddFieldMappingsAt("Editor", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Collection", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("SatID", keywordFieldMapping)
|
||||
albumMapping.AddFieldMappingsAt("Description", frenchTextMapping)
|
||||
albumMapping.AddFieldMappingsAt("LegalDeposit", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("PrintDate", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("PurchaseDate", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("CoverURL", dateDisabled)
|
||||
albumMapping.AddFieldMappingsAt("Note", bleve.NewNumericFieldMapping())
|
||||
albumMapping.AddFieldMappingsAt("Scenarists", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Designers", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Colorists", simpleMapping)
|
||||
albumMapping.AddFieldMappingsAt("Colorists", textDisabled)
|
||||
albumMapping.AddFieldMappingsAt("FetchDate", dateDisabled)
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
indexMapping.AddDocumentMapping("album", albumMapping)
|
||||
return indexMapping
|
||||
}
|
||||
|
||||
func NewBleveIndexer(path string) (Indexer, error) {
|
||||
blIndex, err := bleve.Open(path)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
blIndex, err = bleve.New(path, buildAlbumMapping())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &bleveIndexer{
|
||||
bl: blIndex,
|
||||
db: diskv.New(diskv.Options{
|
||||
BasePath: filepath.Join(xdg.Cache.Home(), "satbd.bar.satellite", path),
|
||||
CacheSizeMax: 100 * 1024 * 1024, // 100 Mb
|
||||
Compression: diskv.NewGzipCompression(),
|
||||
}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) id(ID AlbumID) string {
|
||||
return strconv.FormatUint(uint64(ID), 10)
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) Index(a *Album) error {
|
||||
docID := i.id(a.ID)
|
||||
err := i.bl.Index(docID, a)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not index album %d: %s", a.ID, err)
|
||||
}
|
||||
|
||||
//now we store it
|
||||
data, err := json.Marshal(a)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not encode data for album %d: %s", a.ID, err)
|
||||
}
|
||||
|
||||
if err := i.db.Write(docID, data); err != nil {
|
||||
return fmt.Errorf("Could not store data for album %d: %s", a.ID, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) get(docID string) (*Album, error) {
|
||||
if i.db.Has(docID) == false {
|
||||
return nil, fmt.Errorf("No album %s in the index", docID)
|
||||
}
|
||||
|
||||
r, err := i.db.ReadStream(docID, false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Inconsistency in db: %s", err)
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(r)
|
||||
res := &Album{}
|
||||
|
||||
err = dec.Decode(res)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Could not decode data for album %s: %s", docID, err)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) Get(ID AlbumID) (*Album, error) {
|
||||
return i.get(i.id(ID))
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) Delete(ID AlbumID) error {
|
||||
docID := i.id(ID)
|
||||
if i.db.Has(docID) == false {
|
||||
return fmt.Errorf("No album %d in the index", ID)
|
||||
}
|
||||
|
||||
errDB := i.db.Erase(docID)
|
||||
|
||||
errIndex := i.bl.Delete(docID)
|
||||
|
||||
if errDB == nil && errIndex == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("Error during deletion of %d: %v", ID, []string{errDB.Error(), errIndex.Error()})
|
||||
}
|
||||
|
||||
func (i *bleveIndexer) Search(query string) ([]*Album, error) {
|
||||
|
||||
blq := bleve.NewQueryStringQuery(query)
|
||||
search := bleve.NewSearchRequest(blq)
|
||||
search.Highlight = bleve.NewHighlight()
|
||||
sRes, err := i.bl.Search(search)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Could not perform search on the index: %s", err)
|
||||
}
|
||||
|
||||
res := make([]*Album, 0, sRes.Total)
|
||||
for len(res) < int(sRes.Total) {
|
||||
if len(res) != 0 {
|
||||
//fetches the next results
|
||||
search.From = len(res)
|
||||
sRes, err = i.bl.Search(search)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Could not perform search on the index: %s", err)
|
||||
}
|
||||
}
|
||||
for _, d := range sRes.Hits {
|
||||
a, err := i.get(d.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res = append(res, a)
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
17
main.go
17
main.go
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/jessevdk/go-flags"
|
||||
"github.com/tylerb/graceful"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
bleveHttp "github.com/blevesearch/bleve/http"
|
||||
)
|
||||
|
||||
@@ -49,11 +50,11 @@ func readAlbums(csvPath string, albums chan *Album, errors chan error) {
|
||||
}
|
||||
}
|
||||
|
||||
func indexAlbums(i Indexer, albums chan *Album, errors chan error) {
|
||||
func indexAlbums(i bleve.Index, albums chan *Album, errors chan error) {
|
||||
iAlbum := 0
|
||||
start := time.Now()
|
||||
for a := range albums {
|
||||
err := i.Index(a)
|
||||
err := i.Index(a.IDString(), a)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
}
|
||||
@@ -65,6 +66,14 @@ func indexAlbums(i Indexer, albums chan *Album, errors chan error) {
|
||||
}
|
||||
}
|
||||
|
||||
func buildOrOpen(basepath string) (bleve.Index, error) {
|
||||
i, err := bleve.Open(basepath)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
return bleve.New(basepath, buildAlbumMapping())
|
||||
}
|
||||
return i, err
|
||||
}
|
||||
|
||||
// Execute executes the job
|
||||
func Execute() error {
|
||||
var opts Options
|
||||
@@ -80,7 +89,7 @@ func Execute() error {
|
||||
return err
|
||||
}
|
||||
|
||||
i, err := NewBleveIndexer("satbd.bar.satellite")
|
||||
i, err := buildOrOpen("satbd.bar.satellite")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -98,7 +107,7 @@ func Execute() error {
|
||||
|
||||
router := mux.NewRouter()
|
||||
|
||||
bleveHttp.RegisterIndexName("album", i.(*bleveIndexer).bl)
|
||||
bleveHttp.RegisterIndexName("album", i)
|
||||
searchHandler := bleveHttp.NewSearchHandler("album")
|
||||
|
||||
router.Handle("/api/search", searchHandler).Methods("POST")
|
||||
|
||||
Reference in New Issue
Block a user