Record and skip urls the crawler has seen before

This commit is contained in:
James Mills
2021-02-01 23:46:26 +10:00
parent b398a3a709
commit 8a1161cf77
7 changed files with 126 additions and 36 deletions

View File

@@ -42,6 +42,8 @@ const (
WeekAgo = DayAgo * 7
MonthAgo = DayAgo * 30
YearAgo = MonthAgo * 12
URLHashLength = 15
)
var (
@@ -73,6 +75,11 @@ func FastHash(s string) string {
return hash
}
func HashURL(url string) string {
hash := FastHash(url)
return hash[len(hash)-URLHashLength:]
}
func IntPow(x, y int) int {
return int(math.Pow(float64(x), float64(y)))
}