Record and skip urls the crawler has seen before
This commit is contained in:
@@ -42,6 +42,8 @@ const (
|
||||
WeekAgo = DayAgo * 7
|
||||
MonthAgo = DayAgo * 30
|
||||
YearAgo = MonthAgo * 12
|
||||
|
||||
URLHashLength = 15
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -73,6 +75,11 @@ func FastHash(s string) string {
|
||||
return hash
|
||||
}
|
||||
|
||||
func HashURL(url string) string {
|
||||
hash := FastHash(url)
|
||||
return hash[len(hash)-URLHashLength:]
|
||||
}
|
||||
|
||||
func IntPow(x, y int) int {
|
||||
return int(math.Pow(float64(x), float64(y)))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user