From c070c634630fe911610c1b178ea7026d07fd7abb Mon Sep 17 00:00:00 2001 From: Alexandre Tuleu Date: Sun, 17 Jan 2016 19:05:30 +0100 Subject: [PATCH] Adds HTTP Get rate limitation on cache We don't want to have a large bandwidth on bedetheque.com, so the pace at which we perform GET request is limited by a maximal number of request over a window (per example no more than 10 request over 10 second) If more request are required, the request is simply paused until we go back within the limit ;). Go rulez ! --- album_description_cache.go | 30 ++++++++++++--------- album_description_cache_test.go | 3 ++- rate_limited_getter.go | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 rate_limited_getter.go diff --git a/album_description_cache.go b/album_description_cache.go index f421f26..b19791e 100644 --- a/album_description_cache.go +++ b/album_description_cache.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "net/http" "os" "path" "path/filepath" @@ -21,17 +20,17 @@ import ( type AlbumDescriptionCache struct { basepath string lock lockfile.Lockfile - + getter HTTPGetter // time to live of the cache, data which is older than this TTL will be automatically removed TTL time.Duration } // NewAlbumDescriptionCache is creating a new album description at specified location -func NewAlbumDescriptionCache(path string) (*AlbumDescriptionCache, error) { +func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) { res := &AlbumDescriptionCache{ basepath: path, - - TTL: 3 * 31 * 24 * time.Hour, // 3 Months + getter: NewRateLimitedGetter(maxRequest, window), + TTL: 3 * 31 * 24 * time.Hour, // 3 Months } var err error res.lock, err = lockfile.New(filepath.Join(path, "global.lock")) @@ -79,7 +78,7 @@ func linkFromSelection(s *goquery.Selection) Link { func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) { URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID)) - resp, err := http.Get("http://" + URL) + resp, err := c.getter.Get("http://" + URL) if err != nil { return nil, err } @@ -107,7 +106,7 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er return } - resp, err := http.Get(src) + resp, err := c.getter.Get(src) if err != nil { errors <- err return @@ -224,13 +223,20 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er } } - if len(errorList) == 0 { - return res, nil + if len(errorList) != 0 { + return nil, fmt.Errorf("Could not parse description from http://%s:%s", + URL, + strings.Join(append([]string{""}, errorList...), "\n * ")) } - return nil, fmt.Errorf("Could not parse description from http://%s:%s", - URL, - strings.Join(append([]string{""}, errorList...), "\n * ")) + f, err := os.Create(c.albumPath(ID)) + if err != nil { + return res, err + } + defer f.Close() + + enc := json.NewEncoder(f) + return res, enc.Encode(res) } // GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album diff --git a/album_description_cache_test.go b/album_description_cache_test.go index 4b2ad21..06022d8 100644 --- a/album_description_cache_test.go +++ b/album_description_cache_test.go @@ -3,6 +3,7 @@ package main import ( "io/ioutil" "os" + "time" . "gopkg.in/check.v1" ) @@ -30,7 +31,7 @@ func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) { c.Assert(err, IsNil) defer os.RemoveAll(tmpdir) - cache, err := NewAlbumDescriptionCache(tmpdir) + cache, err := NewAlbumDescriptionCache(tmpdir, 10, 10*time.Second) c.Assert(err, IsNil) for ID, expected := range data { diff --git a/rate_limited_getter.go b/rate_limited_getter.go new file mode 100644 index 0000000..c766573 --- /dev/null +++ b/rate_limited_getter.go @@ -0,0 +1,47 @@ +package main + +import ( + "net/http" + "time" +) + +// An HTTPGetter can GET over HTTP +type HTTPGetter interface { + Get(URL string) (*http.Response, error) +} + +type simpleHTTPGetter struct{} + +// NewHTTPGetter returns the simplest object that can GET over HTTP +func NewHTTPGetter() HTTPGetter { + return &simpleHTTPGetter{} +} + +func (g *simpleHTTPGetter) Get(URL string) (*http.Response, error) { + return http.Get(URL) +} + +type rateLimitedHTTPGetter struct { + tokens chan bool + window time.Duration +} + +// NewRateLimitedGetter returns an HTTPGetter that is limited by a +// maximal amount of request over a time window +func NewRateLimitedGetter(maxRequest uint, window time.Duration) HTTPGetter { + return &rateLimitedHTTPGetter{ + tokens: make(chan bool, maxRequest), + window: window, + } +} + +func (g *rateLimitedHTTPGetter) Get(URL string) (*http.Response, error) { + g.tokens <- true + defer func() { + go func() { + time.Sleep(g.window) + <-g.tokens + }() + }() + return http.Get(URL) +}