Add working Crawler, Scraper and Indexer

This commit is contained in:
James Mills
2021-02-01 22:38:17 +10:00
parent 753aff61a1
commit b398a3a709
8 changed files with 328 additions and 21 deletions

View File

@@ -10,17 +10,41 @@ type Crawler interface {
}
type crawler struct {
q chan string
conf *Config
q chan string
indexer Indexer
}
func NewCrawler() (Crawler, error) {
return &crawler{q: make(chan string)}, nil
func NewCrawler(conf *Config, indexer Indexer) (Crawler, error) {
return &crawler{
conf: conf,
q: make(chan string),
indexer: indexer,
}, nil
}
func (c *crawler) loop() {
for {
url := <-c.q
log.Debugf("crawling %s", url)
links, err := GetLinks(url)
if err != nil {
log.WithError(err).Error("error crawling %s", url)
continue
}
for link := range links {
log.Debugf("found %s", link)
entry, err := Scrape(c.conf, link)
if err != nil {
log.WithError(err).Error("error scraping %s", link)
} else {
if err := c.indexer.Index(entry); err != nil {
log.WithError(err).Error("error indexing %s", link)
}
}
}
}
}