101 lines
2.4 KiB
Go
101 lines
2.4 KiB
Go
package fetcher
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
// PageResult contains raw page data and the after token.
|
|
type PageResult struct {
|
|
Posts []JSONPost
|
|
After string
|
|
Latency time.Duration
|
|
}
|
|
|
|
// buildSearchURL constructs the Reddit search.json URL.
|
|
func buildSearchURL(query string, after string, limit int) (string, error) {
|
|
u, err := url.Parse("https://www.reddit.com/search.json")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
q := u.Query()
|
|
q.Set("q", query)
|
|
q.Set("sort", "new")
|
|
q.Set("type", "link")
|
|
q.Set("limit", fmt.Sprintf("%d", limit))
|
|
q.Set("restrict_sr", "false")
|
|
if after != "" {
|
|
q.Set("after", after)
|
|
}
|
|
u.RawQuery = q.Encode()
|
|
return u.String(), nil
|
|
}
|
|
|
|
// FetchPage fetches a single search.json page and returns PageResult.
|
|
func (c *Client) FetchPage(ctx context.Context, query string, after string, limit int) (PageResult, error) {
|
|
var res PageResult
|
|
urlStr, err := buildSearchURL(query, after, limit)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
// wait for rate limiter
|
|
if c.limiter != nil {
|
|
if err := c.limiter.Wait(ctx); err != nil {
|
|
return res, err
|
|
}
|
|
}
|
|
start := time.Now()
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
req.Header.Set("User-Agent", c.userAgent)
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := c.httpClient.Do(req)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
defer resp.Body.Close()
|
|
res.Latency = time.Since(start)
|
|
|
|
if resp.StatusCode == http.StatusTooManyRequests {
|
|
// honor Retry-After if given (seconds or HTTP-date)
|
|
if ra := resp.Header.Get("Retry-After"); ra != "" {
|
|
// Try seconds first
|
|
if secs, perr := strconv.Atoi(ra); perr == nil {
|
|
time.Sleep(time.Duration(secs) * time.Second)
|
|
} else if d, derr := http.ParseTime(ra); derr == nil {
|
|
// If HTTP-date, compute duration until then
|
|
dur := time.Until(d)
|
|
if dur > 0 {
|
|
time.Sleep(dur)
|
|
}
|
|
}
|
|
}
|
|
return res, fmt.Errorf("rate limited: 429")
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return res, fmt.Errorf("http %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var api APIResponse
|
|
dec := json.NewDecoder(resp.Body)
|
|
if err := dec.Decode(&api); err != nil {
|
|
return res, err
|
|
}
|
|
for _, child := range api.Data.Children {
|
|
res.Posts = append(res.Posts, child.Data)
|
|
}
|
|
res.After = api.Data.After
|
|
return res, nil
|
|
}
|