Implements a stupid crawler

This commit is contained in:
2016-01-14 15:41:13 +01:00
parent f394ff0069
commit 3d636d4cca

90
main.go
View File

@@ -1,11 +1,95 @@
package main
import (
"encoding/csv"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"time"
)
func main() {
fmt.Fprintf(os.Stderr, "Not Yet Implemented")
os.Exit(1)
func Execute() error {
if len(os.Args) != 2 {
return fmt.Errorf("Missing mandatory .csv parameter")
}
csvFile, err := os.Open(os.Args[1])
if err != nil {
return err
}
defer csvFile.Close()
csvReader := csv.NewReader(csvFile)
csvReader.Comma = ';'
_, err = csvReader.Read()
if err != nil {
return err
}
errorFile, err := os.Create("errors.csv")
if err != nil {
return err
}
waitUntil := time.Now()
for {
waitUntil = waitUntil.Add(2 * time.Second)
data, err := csvReader.Read()
if err != nil {
return err
}
a := Album{}
// ID
ID, err := strconv.ParseInt(data[0], 0, 64)
if err != nil {
return err
}
a.ID = uint64(ID)
// ISBN
a.ISBN = data[1]
// Series
a.Series = data[2]
//Num
if len(data[3]) == 0 {
a.Num = -1
} else {
Num, err := strconv.ParseInt(data[3], 0, 64)
if err != nil {
return err
}
a.Num = int(Num)
}
// NumA
a.NumA = data[4]
// Titre
a.Title = data[5]
URL := "http://www.bedetheque.com/" + a.GetBedethequeComURI()
log.Printf("Contacting %s", URL)
resp, err := http.Get(URL)
if err != nil {
return err
}
defer resp.Body.Close()
log.Printf("Got %d", resp.StatusCode)
if resp.StatusCode >= 400 {
fmt.Fprintf(errorFile, "%s;%d", strings.Join(data, ";"), resp.StatusCode)
}
time.Sleep(waitUntil.Sub(time.Now()))
}
return nil
}
func main() {
if err := Execute(); err != nil {
log.Printf("got unhandled error: %s", err)
os.Exit(1)
}
}