Record and skip urls the crawler has seen before
This commit is contained in:
@@ -8,10 +8,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
URLHashLength = 15
|
||||
)
|
||||
|
||||
// User ...
|
||||
type User struct {
|
||||
Username string
|
||||
@@ -114,8 +110,8 @@ func (u *User) Bytes() ([]byte, error) {
|
||||
}
|
||||
|
||||
// NewURL ...
|
||||
func NewURL() *URL {
|
||||
u := &URL{}
|
||||
func NewURL(url string) *URL {
|
||||
u := &URL{URL: url}
|
||||
if err := defaults.Set(u); err != nil {
|
||||
log.WithError(err).Error("error creating new URI object")
|
||||
}
|
||||
@@ -140,8 +136,7 @@ func (u *URL) Hash() string {
|
||||
return u.hash
|
||||
}
|
||||
|
||||
hash := FastHash(u.String())
|
||||
u.hash = hash[len(hash)-URLHashLength:]
|
||||
u.hash = HashURL(u.String())
|
||||
|
||||
return u.hash
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user