Adds HTTP Get rate limitation on cache
We don't want to have a large bandwidth on bedetheque.com, so the pace at which we perform GET request is limited by a maximal number of request over a window (per example no more than 10 request over 10 second) If more request are required, the request is simply paused until we go back within the limit ;). Go rulez !
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -21,17 +20,17 @@ import (
|
|||||||
type AlbumDescriptionCache struct {
|
type AlbumDescriptionCache struct {
|
||||||
basepath string
|
basepath string
|
||||||
lock lockfile.Lockfile
|
lock lockfile.Lockfile
|
||||||
|
getter HTTPGetter
|
||||||
// time to live of the cache, data which is older than this TTL will be automatically removed
|
// time to live of the cache, data which is older than this TTL will be automatically removed
|
||||||
TTL time.Duration
|
TTL time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAlbumDescriptionCache is creating a new album description at specified location
|
// NewAlbumDescriptionCache is creating a new album description at specified location
|
||||||
func NewAlbumDescriptionCache(path string) (*AlbumDescriptionCache, error) {
|
func NewAlbumDescriptionCache(path string, maxRequest uint, window time.Duration) (*AlbumDescriptionCache, error) {
|
||||||
res := &AlbumDescriptionCache{
|
res := &AlbumDescriptionCache{
|
||||||
basepath: path,
|
basepath: path,
|
||||||
|
getter: NewRateLimitedGetter(maxRequest, window),
|
||||||
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
TTL: 3 * 31 * 24 * time.Hour, // 3 Months
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
|
res.lock, err = lockfile.New(filepath.Join(path, "global.lock"))
|
||||||
@@ -79,7 +78,7 @@ func linkFromSelection(s *goquery.Selection) Link {
|
|||||||
|
|
||||||
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
|
func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, error) {
|
||||||
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
|
URL := path.Join("www.bedetheque.com", fmt.Sprintf("BD--%d.html", ID))
|
||||||
resp, err := http.Get("http://" + URL)
|
resp, err := c.getter.Get("http://" + URL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -107,7 +106,7 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := http.Get(src)
|
resp, err := c.getter.Get(src)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errors <- err
|
errors <- err
|
||||||
return
|
return
|
||||||
@@ -224,13 +223,20 @@ func (c *AlbumDescriptionCache) fetchAndCache(ID AlbumID) (*AlbumDescription, er
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errorList) == 0 {
|
if len(errorList) != 0 {
|
||||||
return res, nil
|
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
|
||||||
|
URL,
|
||||||
|
strings.Join(append([]string{""}, errorList...), "\n * "))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("Could not parse description from http://%s:%s",
|
f, err := os.Create(c.albumPath(ID))
|
||||||
URL,
|
if err != nil {
|
||||||
strings.Join(append([]string{""}, errorList...), "\n * "))
|
return res, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
enc := json.NewEncoder(f)
|
||||||
|
return res, enc.Encode(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album
|
// GetDescription retrieves from the cache or either from www.bedetheque.com the AlbumDescription of an album
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
. "gopkg.in/check.v1"
|
. "gopkg.in/check.v1"
|
||||||
)
|
)
|
||||||
@@ -30,7 +31,7 @@ func (s *AlbumDescriptionCacheSuite) TestCanFetchCache(c *C) {
|
|||||||
c.Assert(err, IsNil)
|
c.Assert(err, IsNil)
|
||||||
defer os.RemoveAll(tmpdir)
|
defer os.RemoveAll(tmpdir)
|
||||||
|
|
||||||
cache, err := NewAlbumDescriptionCache(tmpdir)
|
cache, err := NewAlbumDescriptionCache(tmpdir, 10, 10*time.Second)
|
||||||
c.Assert(err, IsNil)
|
c.Assert(err, IsNil)
|
||||||
|
|
||||||
for ID, expected := range data {
|
for ID, expected := range data {
|
||||||
|
|||||||
47
rate_limited_getter.go
Normal file
47
rate_limited_getter.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// An HTTPGetter can GET over HTTP
|
||||||
|
type HTTPGetter interface {
|
||||||
|
Get(URL string) (*http.Response, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type simpleHTTPGetter struct{}
|
||||||
|
|
||||||
|
// NewHTTPGetter returns the simplest object that can GET over HTTP
|
||||||
|
func NewHTTPGetter() HTTPGetter {
|
||||||
|
return &simpleHTTPGetter{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *simpleHTTPGetter) Get(URL string) (*http.Response, error) {
|
||||||
|
return http.Get(URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
type rateLimitedHTTPGetter struct {
|
||||||
|
tokens chan bool
|
||||||
|
window time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRateLimitedGetter returns an HTTPGetter that is limited by a
|
||||||
|
// maximal amount of request over a time window
|
||||||
|
func NewRateLimitedGetter(maxRequest uint, window time.Duration) HTTPGetter {
|
||||||
|
return &rateLimitedHTTPGetter{
|
||||||
|
tokens: make(chan bool, maxRequest),
|
||||||
|
window: window,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *rateLimitedHTTPGetter) Get(URL string) (*http.Response, error) {
|
||||||
|
g.tokens <- true
|
||||||
|
defer func() {
|
||||||
|
go func() {
|
||||||
|
time.Sleep(g.window)
|
||||||
|
<-g.tokens
|
||||||
|
}()
|
||||||
|
}()
|
||||||
|
return http.Get(URL)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user