Adds HTTP Get rate limitation on cache
We don't want to have a large bandwidth on bedetheque.com, so the pace at which we perform GET request is limited by a maximal number of request over a window (per example no more than 10 request over 10 second) If more request are required, the request is simply paused until we go back within the limit ;). Go rulez !
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
@@ -21,17 +20,17 @@ import (
|
||||
type AlbumDescriptionCache struct {
|
||||
basepath string
|
||||
lock lockfile.Lockfile
|
||||
|
||||
getter HTTPGetter
|
||||
// time to live of the cache, data which is older than this TTL will be automatically removed
|
||||
TTL time.Duration
|
||||
}
|
||||
|
||||
// NewAlbumDescriptionCache is creating a new album description at specified location
|
||||
func NewAlbumDescriptionCache(path string) (*AlbumDescriptionCache, error) {
|
||||
func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) {
|
||||
res := &AlbumDescriptionCache{
|
||||
basepath: path,
|
||||
|
||||
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
||||
getter: NewRateLimitedGetter(maxRequest, window),
|
||||
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
||||
}
|
||||
var err error
|
||||
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
|
||||
@@ -79,7 +78,7 @@ func linkFromSelection(s *goquery.Selection) Link {
|
||||
|
||||
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
|
||||
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
|
||||
resp, err := http.Get("http://" + URL)
|
||||
resp, err := c.getter.Get("http://" + URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -107,7 +106,7 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := http.Get(src)
|
||||
resp, err := c.getter.Get(src)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
return
|
||||
@@ -224,13 +223,20 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
|
||||
}
|
||||
}
|
||||
|
||||
if len(errorList) == 0 {
|
||||
return res, nil
|
||||
if len(errorList) != 0 {
|
||||
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
|
||||
URL,
|
||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
|
||||
URL,
|
||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||
f, err := os.Create(c.albumPath(ID))
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
enc := json.NewEncoder(f)
|
||||
return res, enc.Encode(res)
|
||||
}
|
||||
|
||||
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album
|
||||
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
@@ -30,7 +31,7 @@ func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) {
|
||||
c.Assert(err, IsNil)
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
cache, err := NewAlbumDescriptionCache(tmpdir)
|
||||
cache, err := NewAlbumDescriptionCache(tmpdir, 10, 10*time.Second)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
for ID, expected := range data {
|
||||
|
||||
47
rate_limited_getter.go
Normal file
47
rate_limited_getter.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// An HTTPGetter can GET over HTTP
|
||||
type HTTPGetter interface {
|
||||
Get(URL string) (*http.Response, error)
|
||||
}
|
||||
|
||||
type simpleHTTPGetter struct{}
|
||||
|
||||
// NewHTTPGetter returns the simplest object that can GET over HTTP
|
||||
func NewHTTPGetter() HTTPGetter {
|
||||
return &simpleHTTPGetter{}
|
||||
}
|
||||
|
||||
func (g *simpleHTTPGetter) Get(URL string) (*http.Response, error) {
|
||||
return http.Get(URL)
|
||||
}
|
||||
|
||||
type rateLimitedHTTPGetter struct {
|
||||
tokens chan bool
|
||||
window time.Duration
|
||||
}
|
||||
|
||||
// NewRateLimitedGetter returns an HTTPGetter that is limited by a
|
||||
// maximal amount of request over a time window
|
||||
func NewRateLimitedGetter(maxRequest uint, window time.Duration) HTTPGetter {
|
||||
return &rateLimitedHTTPGetter{
|
||||
tokens: make(chan bool, maxRequest),
|
||||
window: window,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *rateLimitedHTTPGetter) Get(URL string) (*http.Response, error) {
|
||||
g.tokens <- true
|
||||
defer func() {
|
||||
go func() {
|
||||
time.Sleep(g.window)
|
||||
<-g.tokens
|
||||
}()
|
||||
}()
|
||||
return http.Get(URL)
|
||||
}
|
||||
Reference in New Issue
Block a user