Add crawler skeleton and add handler

This commit is contained in:
James Mills
2021-02-01 21:23:58 +10:00
parent 8c84ee8b3d
commit b69b27eeed
6 changed files with 148 additions and 13 deletions

34
internal/crawler.go Normal file
View File

@@ -0,0 +1,34 @@
package internal
import (
log "github.com/sirupsen/logrus"
)
type Crawler interface {
Start()
Crawl(url string) error
}
type crawler struct {
q chan string
}
func NewCrawler() (Crawler, error) {
return &crawler{q: make(chan string)}, nil
}
func (c *crawler) loop() {
for {
url := <-c.q
log.Debugf("crawling %s", url)
}
}
func (c *crawler) Crawl(url string) error {
c.q <- url
return nil
}
func (c *crawler) Start() {
go c.loop()
}

View File

@@ -103,7 +103,30 @@ func (s *Server) IndexHandler() httprouter.Handle {
func (s *Server) AddHandler() httprouter.Handle {
return func(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
ctx := NewContext(s.config, s.db, r)
s.render("add", w, ctx)
if r.Method == http.MethodGet {
s.render("add", w, ctx)
return
}
url := NormalizeURL(strings.TrimSpace(r.FormValue("url")))
if url == "" {
ctx.Error = true
ctx.Message = "Invalid URL"
s.render("error", w, ctx)
return
}
if err := s.crawler.Crawl(url); err != nil {
ctx.Error = true
ctx.Message = fmt.Sprintf("Error adding URL: %s", err)
s.render("error", w, ctx)
return
}
ctx.Error = false
ctx.Message = "Successfully added url"
s.render("error", w, ctx)
}
}

View File

@@ -8,6 +8,10 @@ import (
log "github.com/sirupsen/logrus"
)
const (
URLHashLength = 15
)
// User ...
type User struct {
Username string
@@ -28,6 +32,15 @@ type Token struct {
ExpiresAt time.Time
}
// URL ...
type URL struct {
URL string
CrawledAt time.Time
ExpiresAt time.Time
hash string
}
func LoadToken(data []byte) (token *Token, err error) {
token = &Token{}
if err := defaults.Set(token); err != nil {
@@ -99,3 +112,48 @@ func (u *User) Bytes() ([]byte, error) {
}
return data, nil
}
// NewURL ...
func NewURL() *URL {
u := &URL{}
if err := defaults.Set(u); err != nil {
log.WithError(err).Error("error creating new URI object")
}
return u
}
func LoadURL(data []byte) (u *URL, err error) {
u = &URL{}
if err := defaults.Set(u); err != nil {
return nil, err
}
if err = json.Unmarshal(data, &u); err != nil {
return nil, err
}
return
}
func (u *URL) Hash() string {
if u.hash != "" {
return u.hash
}
hash := FastHash(u.String())
u.hash = hash[len(hash)-URLHashLength:]
return u.hash
}
func (u *URL) String() string {
return u.URL
}
func (u *URL) Bytes() ([]byte, error) {
data, err := json.Marshal(u)
if err != nil {
return nil, err
}
return data, nil
}

View File

@@ -41,6 +41,9 @@ type Server struct {
router *Router
server *http.Server
// Crawler
crawler Crawler
// Data Store
db Store
@@ -345,6 +348,12 @@ func NewServer(bind string, options ...Option) (*Server, error) {
return nil, fmt.Errorf("error validating config: %w", err)
}
crawler, err := NewCrawler()
if err != nil {
log.WithError(err).Error("error creating crawler")
return nil, err
}
db, err := NewStore(config.Store)
if err != nil {
log.WithError(err).Error("error creating store")
@@ -406,6 +415,9 @@ func NewServer(bind string, options ...Option) (*Server, error) {
// API
api: api,
// Crawler
crawler: crawler,
// Data Store
db: db,
@@ -430,6 +442,9 @@ func NewServer(bind string, options ...Option) (*Server, error) {
server.cron.Start()
log.Info("started background jobs")
server.crawler.Start()
log.Infof("started crawler")
server.setupMetrics()
log.Infof("serving metrics endpoint at %s/metrics", server.config.BaseURL)