415 lines
8.9 KiB
Go
415 lines
8.9 KiB
Go
package internal
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"git.mills.io/prologic/observe"
|
|
"github.com/NYTimes/gziphandler"
|
|
"github.com/justinas/nosurf"
|
|
"github.com/robfig/cron"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/unrolled/logger"
|
|
|
|
"git.mills.io/prologic/spyda"
|
|
"git.mills.io/prologic/spyda/internal/session"
|
|
"git.mills.io/prologic/spyda/internal/static"
|
|
)
|
|
|
|
var (
|
|
metrics *observe.Metrics
|
|
)
|
|
|
|
func init() {
|
|
metrics = observe.NewMetrics("spyda")
|
|
}
|
|
|
|
// Server ...
|
|
type Server struct {
|
|
bind string
|
|
config *Config
|
|
tmplman *TemplateManager
|
|
router *Router
|
|
server *http.Server
|
|
|
|
// Indexer
|
|
indexer Indexer
|
|
|
|
// Crawler
|
|
crawler Crawler
|
|
|
|
// Data Store
|
|
db Store
|
|
|
|
// Scheduler
|
|
cron *cron.Cron
|
|
|
|
// Dispatcher
|
|
tasks *Dispatcher
|
|
|
|
// Sessions
|
|
sc session.Store
|
|
sm *session.Manager
|
|
}
|
|
|
|
func (s *Server) render(name string, w http.ResponseWriter, ctx *Context) {
|
|
buf, err := s.tmplman.Exec(name, ctx)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
_, err = buf.WriteTo(w)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
// AddRouter ...
|
|
func (s *Server) AddRoute(method, path string, handler http.Handler) {
|
|
s.router.Handler(method, path, handler)
|
|
}
|
|
|
|
// AddShutdownHook ...
|
|
func (s *Server) AddShutdownHook(f func()) {
|
|
s.server.RegisterOnShutdown(f)
|
|
}
|
|
|
|
// Shutdown ...
|
|
func (s *Server) Shutdown(ctx context.Context) error {
|
|
s.cron.Stop()
|
|
s.tasks.Stop()
|
|
s.crawler.Stop()
|
|
|
|
if err := s.server.Shutdown(ctx); err != nil {
|
|
log.WithError(err).Error("error shutting down server")
|
|
return err
|
|
}
|
|
|
|
if err := s.db.Close(); err != nil {
|
|
log.WithError(err).Error("error closing store")
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Run ...
|
|
func (s *Server) Run() (err error) {
|
|
idleConnsClosed := make(chan struct{})
|
|
|
|
go func() {
|
|
if err = s.ListenAndServe(); err != http.ErrServerClosed {
|
|
// Error starting or closing listener:
|
|
log.WithError(err).Fatal("HTTP server ListenAndServe")
|
|
}
|
|
}()
|
|
|
|
sigch := make(chan os.Signal, 1)
|
|
signal.Notify(sigch, syscall.SIGINT, syscall.SIGTERM)
|
|
sig := <-sigch
|
|
log.Infof("Received signal %s", sig)
|
|
|
|
log.Info("Shutting down...")
|
|
|
|
// We received an interrupt signal, shut down.
|
|
if err = s.Shutdown(context.Background()); err != nil {
|
|
// Error from closing listeners, or context timeout:
|
|
log.WithError(err).Fatal("Error shutting down HTTP server")
|
|
}
|
|
close(idleConnsClosed)
|
|
|
|
<-idleConnsClosed
|
|
|
|
return
|
|
}
|
|
|
|
// ListenAndServe ...
|
|
func (s *Server) ListenAndServe() error {
|
|
return s.server.ListenAndServe()
|
|
}
|
|
|
|
// AddCronJob ...
|
|
func (s *Server) AddCronJob(spec string, job cron.Job) error {
|
|
return s.cron.AddJob(spec, job)
|
|
}
|
|
|
|
func (s *Server) setupMetrics() {
|
|
ctime := time.Now()
|
|
|
|
// server uptime counter
|
|
metrics.NewCounterFunc(
|
|
"server", "uptime",
|
|
"Number of nanoseconds the server has been running",
|
|
func() float64 {
|
|
return float64(time.Since(ctime).Nanoseconds())
|
|
},
|
|
)
|
|
|
|
metrics.NewGaugeFunc(
|
|
"db", "urls",
|
|
"Number of database /urls keys",
|
|
func() float64 {
|
|
return float64(s.db.URLCount())
|
|
},
|
|
)
|
|
|
|
// Crawler stats
|
|
metrics.NewCounter(
|
|
"crawler", "crawled",
|
|
"Number of links crawled by the crawler",
|
|
)
|
|
metrics.NewCounter(
|
|
"crawler", "scraped",
|
|
"Number of links scraped by the crawler",
|
|
)
|
|
metrics.NewGauge(
|
|
"crawler", "duration",
|
|
"Duration of crawler tasks",
|
|
)
|
|
|
|
// Index stats
|
|
metrics.NewGaugeFunc(
|
|
"index", "size",
|
|
"Size of index (number of indexed entries)",
|
|
func() float64 {
|
|
return float64(s.indexer.Size())
|
|
},
|
|
)
|
|
|
|
// server info
|
|
metrics.NewGaugeVec(
|
|
"server", "info",
|
|
"Server information",
|
|
[]string{"full_version", "version", "commit"},
|
|
)
|
|
metrics.GaugeVec("server", "info").
|
|
With(map[string]string{
|
|
"full_version": spyda.FullVersion(),
|
|
"version": spyda.Version,
|
|
"commit": spyda.Commit,
|
|
}).Set(1)
|
|
metrics.NewCounter(
|
|
"server", "queries",
|
|
"Number of queries processed",
|
|
)
|
|
|
|
s.AddRoute("GET", "/metrics", metrics.Handler())
|
|
}
|
|
|
|
func (s *Server) setupCronJobs() error {
|
|
for name, jobSpec := range Jobs {
|
|
if jobSpec.Schedule == "" {
|
|
continue
|
|
}
|
|
|
|
job := jobSpec.Factory(s.config, s.db)
|
|
if err := s.cron.AddJob(jobSpec.Schedule, job); err != nil {
|
|
return err
|
|
}
|
|
log.Infof("Started background job %s (%s)", name, jobSpec.Schedule)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) runStartupJobs() {
|
|
time.Sleep(time.Second * 5)
|
|
|
|
log.Info("running startup jobs")
|
|
for name, jobSpec := range StartupJobs {
|
|
job := jobSpec.Factory(s.config, s.db)
|
|
log.Infof("running %s now...", name)
|
|
job.Run()
|
|
}
|
|
|
|
// Merge store
|
|
if err := s.db.Merge(); err != nil {
|
|
log.WithError(err).Error("error merging store")
|
|
}
|
|
}
|
|
|
|
func (s *Server) initRoutes() {
|
|
if s.config.Debug {
|
|
s.router.ServeFiles("/css/*filepath", http.Dir("./internal/static/css"))
|
|
s.router.ServeFiles("/img/*filepath", http.Dir("./internal/static/img"))
|
|
s.router.ServeFiles("/js/*filepath", http.Dir("./internal/static/js"))
|
|
} else {
|
|
cssFS := static.GetSubFilesystem("css")
|
|
imgFS := static.GetSubFilesystem("img")
|
|
jsFS := static.GetSubFilesystem("js")
|
|
s.router.ServeFilesWithCacheControl("/css/:commit/*filepath", cssFS)
|
|
s.router.ServeFilesWithCacheControl("/img/:commit/*filepath", imgFS)
|
|
s.router.ServeFilesWithCacheControl("/js/:commit/*filepath", jsFS)
|
|
}
|
|
|
|
s.router.NotFound = http.HandlerFunc(s.NotFoundHandler)
|
|
|
|
s.router.GET("/about", s.PageHandler("about"))
|
|
s.router.GET("/help", s.PageHandler("help"))
|
|
s.router.GET("/privacy", s.PageHandler("privacy"))
|
|
|
|
s.router.GET("/", s.IndexHandler())
|
|
s.router.HEAD("/", s.IndexHandler())
|
|
|
|
s.router.GET("/opensearch.xml", s.OpenSearchHandler())
|
|
|
|
s.router.GET("/robots.txt", s.RobotsHandler())
|
|
s.router.HEAD("/robots.txt", s.RobotsHandler())
|
|
|
|
s.router.GET("/search", s.SearchHandler())
|
|
|
|
s.router.GET("/cache/:hash", s.CacheHandler())
|
|
s.router.HEAD("/cache/:hash", s.CacheHandler())
|
|
|
|
// Task State
|
|
s.router.GET("/tasks", s.TasksHandler())
|
|
s.router.GET("/task/:uuid", s.TaskHandler())
|
|
|
|
s.router.GET("/add", s.AddHandler())
|
|
s.router.POST("/add", s.AddHandler())
|
|
|
|
// Support
|
|
s.router.GET("/support", s.SupportHandler())
|
|
s.router.POST("/support", s.SupportHandler())
|
|
s.router.GET("/_captcha", s.CaptchaHandler())
|
|
}
|
|
|
|
// NewServer ...
|
|
func NewServer(bind string, options ...Option) (*Server, error) {
|
|
config := NewConfig()
|
|
|
|
for _, opt := range options {
|
|
if err := opt(config); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if err := config.Validate(); err != nil {
|
|
log.WithError(err).Error("error validating config")
|
|
return nil, fmt.Errorf("error validating config: %w", err)
|
|
}
|
|
|
|
db, err := NewStore(config.Store)
|
|
if err != nil {
|
|
log.WithError(err).Error("error creating store")
|
|
return nil, err
|
|
}
|
|
|
|
if err := db.Merge(); err != nil {
|
|
log.WithError(err).Error("error merging store")
|
|
return nil, err
|
|
}
|
|
|
|
tmplman, err := NewTemplateManager(config)
|
|
if err != nil {
|
|
log.WithError(err).Error("error creating template manager")
|
|
return nil, err
|
|
}
|
|
|
|
router := NewRouter()
|
|
|
|
tasks := NewDispatcher(2, 10) // TODO: Make this configurable?
|
|
|
|
sc := session.NewMemoryStore(config.SessionExpiry)
|
|
sm := session.NewManager(
|
|
session.NewOptions(
|
|
config.Name,
|
|
config.CookieSecret,
|
|
config.LocalURL().Scheme == "https",
|
|
config.SessionExpiry,
|
|
),
|
|
sc,
|
|
)
|
|
|
|
indexer, err := NewIndexer(config)
|
|
if err != nil {
|
|
log.WithError(err).Error("error creating indexer")
|
|
return nil, err
|
|
}
|
|
|
|
crawler, err := NewCrawler(config, tasks, db, indexer)
|
|
if err != nil {
|
|
log.WithError(err).Error("error creating crawler")
|
|
return nil, err
|
|
}
|
|
|
|
csrfHandler := nosurf.New(router)
|
|
csrfHandler.ExemptGlob("/api/v1/*")
|
|
|
|
server := &Server{
|
|
bind: bind,
|
|
config: config,
|
|
router: router,
|
|
tmplman: tmplman,
|
|
|
|
server: &http.Server{
|
|
Addr: bind,
|
|
Handler: logger.New(logger.Options{
|
|
Prefix: "spyda",
|
|
RemoteAddressHeaders: []string{"X-Forwarded-For"},
|
|
}).Handler(
|
|
gziphandler.GzipHandler(
|
|
sm.Handler(csrfHandler),
|
|
),
|
|
),
|
|
},
|
|
|
|
// Indexer
|
|
indexer: indexer,
|
|
|
|
// Crawler
|
|
crawler: crawler,
|
|
|
|
// Data Store
|
|
db: db,
|
|
|
|
// Schedular
|
|
cron: cron.New(),
|
|
|
|
// Dispatcher
|
|
tasks: tasks,
|
|
|
|
// Session Manager
|
|
sc: sc,
|
|
sm: sm,
|
|
}
|
|
|
|
if err := server.setupCronJobs(); err != nil {
|
|
log.WithError(err).Error("error setting up background jobs")
|
|
return nil, err
|
|
}
|
|
server.cron.Start()
|
|
log.Info("started background jobs")
|
|
|
|
server.tasks.Start()
|
|
log.Info("started task dispatcher")
|
|
|
|
server.crawler.Start()
|
|
log.Infof("started crawler")
|
|
|
|
server.setupMetrics()
|
|
log.Infof("serving metrics endpoint at %s/metrics", server.config.BaseURL)
|
|
|
|
// Log interesting configuration options
|
|
log.Infof("Instance Name: %s", server.config.Name)
|
|
log.Infof("Base URL: %s", server.config.BaseURL)
|
|
log.Infof("Admin Name: %s", server.config.AdminName)
|
|
log.Infof("Admin Email: %s", server.config.AdminEmail)
|
|
log.Infof("SMTP Host: %s", server.config.SMTPHost)
|
|
log.Infof("SMTP Port: %d", server.config.SMTPPort)
|
|
log.Infof("SMTP User: %s", server.config.SMTPUser)
|
|
log.Infof("SMTP From: %s", server.config.SMTPFrom)
|
|
|
|
server.initRoutes()
|
|
|
|
go server.runStartupJobs()
|
|
|
|
return server, nil
|
|
}
|