Record and skip urls the crawler has seen before
This commit is contained in:
@@ -14,6 +14,7 @@ const (
|
||||
feedsKeyPrefix = "/feeds"
|
||||
sessionsKeyPrefix = "/sessions"
|
||||
usersKeyPrefix = "/users"
|
||||
urlsKeyPrefix = "/urls"
|
||||
tokensKeyPrefix = "/tokens"
|
||||
)
|
||||
|
||||
@@ -67,16 +68,6 @@ func (bs *BitcaskStore) Merge() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) HasFeed(name string) bool {
|
||||
key := []byte(fmt.Sprintf("%s/%s", feedsKeyPrefix, name))
|
||||
return bs.db.Has(key)
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) DelFeed(name string) error {
|
||||
key := []byte(fmt.Sprintf("%s/%s", feedsKeyPrefix, name))
|
||||
return bs.db.Delete(key)
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) HasUser(username string) bool {
|
||||
key := []byte(fmt.Sprintf("%s/%s", usersKeyPrefix, username))
|
||||
return bs.db.Has(key)
|
||||
@@ -160,6 +151,72 @@ func (bs *BitcaskStore) GetAllUsers() ([]*User, error) {
|
||||
return users, nil
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) HasURL(hash string) bool {
|
||||
key := []byte(fmt.Sprintf("%s/%s", urlsKeyPrefix, hash))
|
||||
return bs.db.Has(key)
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) DelURL(hash string) error {
|
||||
key := []byte(fmt.Sprintf("%s/%s", urlsKeyPrefix, hash))
|
||||
return bs.db.Delete(key)
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) GetURL(hash string) (*URL, error) {
|
||||
key := []byte(fmt.Sprintf("%s/%s", urlsKeyPrefix, hash))
|
||||
data, err := bs.db.Get(key)
|
||||
if err == bitcask.ErrKeyNotFound {
|
||||
return nil, ErrURLNotFound
|
||||
}
|
||||
return LoadURL(data)
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) SetURL(hash string, url *URL) error {
|
||||
data, err := url.Bytes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
key := []byte(fmt.Sprintf("%s/%s", urlsKeyPrefix, hash))
|
||||
if err := bs.db.Put(key, data); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) URLCount() int64 {
|
||||
var count int64
|
||||
|
||||
if err := bs.db.Scan([]byte(urlsKeyPrefix), func(_ []byte) error {
|
||||
count++
|
||||
return nil
|
||||
}); err != nil {
|
||||
log.WithError(err).Error("error scanning")
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) ForEachURL(f func(url *URL) error) error {
|
||||
err := bs.db.Scan([]byte(urlsKeyPrefix), func(key []byte) error {
|
||||
data, err := bs.db.Get(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
url, err := LoadURL(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return f(url)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bs *BitcaskStore) GetSession(sid string) (*session.Session, error) {
|
||||
key := []byte(fmt.Sprintf("%s/%s", sessionsKeyPrefix, sid))
|
||||
data, err := bs.db.Get(key)
|
||||
|
||||
Reference in New Issue
Block a user