Adds HTTP Get rate limitation on cache

We don't want to have a large bandwidth on bedetheque.com, so the pace
at which we perform GET request is limited by a maximal number of
request over a window (per example no more than 10 request over 10
second)

If more request are required, the request is simply paused until we go
back within the limit ;). Go rulez !
This commit is contained in:
2016-01-17 19:05:30 +01:00
parent b58b7e7e6a
commit c070c63463
3 changed files with 67 additions and 13 deletions

View File

@@ -4,7 +4,6 @@ import (
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
@@ -21,17 +20,17 @@ import (
type AlbumDescriptionCache struct {
basepath string
lock lockfile.Lockfile
getter HTTPGetter
// time to live of the cache, data which is older than this TTL will be automatically removed
TTL time.Duration
}
// NewAlbumDescriptionCache is creating a new album description at specified location
func NewAlbumDescriptionCache(path string) (*AlbumDescriptionCache, error) {
func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) {
res := &AlbumDescriptionCache{
basepath: path,
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
getter: NewRateLimitedGetter(maxRequest, window),
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
}
var err error
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
@@ -79,7 +78,7 @@ func linkFromSelection(s *goquery.Selection) Link {
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
resp, err := http.Get("http://" + URL)
resp, err := c.getter.Get("http://" + URL)
if err != nil {
return nil, err
}
@@ -107,7 +106,7 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
return
}
resp, err := http.Get(src)
resp, err := c.getter.Get(src)
if err != nil {
errors <- err
return
@@ -224,13 +223,20 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
}
}
if len(errorList) == 0 {
return res, nil
if len(errorList) != 0 {
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
URL,
strings.Join(append([]string{""}, errorList...), "\n * "))
}
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
URL,
strings.Join(append([]string{""}, errorList...), "\n * "))
f, err := os.Create(c.albumPath(ID))
if err != nil {
return res, err
}
defer f.Close()
enc := json.NewEncoder(f)
return res, enc.Encode(res)
}
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album

View File

@@ -3,6 +3,7 @@ package main
import (
"io/ioutil"
"os"
"time"
. "gopkg.in/check.v1"
)
@@ -30,7 +31,7 @@ func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) {
c.Assert(err, IsNil)
defer os.RemoveAll(tmpdir)
cache, err := NewAlbumDescriptionCache(tmpdir)
cache, err := NewAlbumDescriptionCache(tmpdir, 10, 10*time.Second)
c.Assert(err, IsNil)
for ID, expected := range data {

47
rate_limited_getter.go Normal file
View File

@@ -0,0 +1,47 @@
package main
import (
"net/http"
"time"
)
// An HTTPGetter can GET over HTTP
type HTTPGetter interface {
Get(URL string) (*http.Response, error)
}
type simpleHTTPGetter struct{}
// NewHTTPGetter returns the simplest object that can GET over HTTP
func NewHTTPGetter() HTTPGetter {
return &simpleHTTPGetter{}
}
func (g *simpleHTTPGetter) Get(URL string) (*http.Response, error) {
return http.Get(URL)
}
type rateLimitedHTTPGetter struct {
tokens chan bool
window time.Duration
}
// NewRateLimitedGetter returns an HTTPGetter that is limited by a
// maximal amount of request over a time window
func NewRateLimitedGetter(maxRequest uint, window time.Duration) HTTPGetter {
return &rateLimitedHTTPGetter{
tokens: make(chan bool, maxRequest),
window: window,
}
}
func (g *rateLimitedHTTPGetter) Get(URL string) (*http.Response, error) {
g.tokens <- true
defer func() {
go func() {
time.Sleep(g.window)
<-g.tokens
}()
}()
return http.Get(URL)
}