Record and skip urls the crawler has seen before
This commit is contained in:
@@ -10,6 +10,10 @@ import (
|
||||
readability "github.com/go-shiori/go-readability"
|
||||
)
|
||||
|
||||
const (
|
||||
cacheDir = "cache"
|
||||
)
|
||||
|
||||
func Scrape(conf *Config, url string) (*Entry, error) {
|
||||
if url == "" {
|
||||
return nil, ErrInvalidURL
|
||||
@@ -48,7 +52,7 @@ func Scrape(conf *Config, url string) (*Entry, error) {
|
||||
HTMLContent: article.Content,
|
||||
}
|
||||
|
||||
fn := filepath.Join(conf.Data, fmt.Sprintf("%s.json", entry.Hash()))
|
||||
fn := filepath.Join(conf.Data, cacheDir, fmt.Sprintf("%s.json", entry.Hash()))
|
||||
data, err := entry.Bytes()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error serializing entry")
|
||||
|
||||
Reference in New Issue
Block a user