chore: bootstrap project skeleton with README, .env.example, .gitignore and minimal main
This commit is contained in:
commit
a2acaaadc8
10
.env.example
Normal file
10
.env.example
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Example env for reddit-scraper
|
||||||
|
KEYWORD=golang
|
||||||
|
LIMIT=1000
|
||||||
|
CONCURRENCY=4
|
||||||
|
RETRY_LIMIT=5
|
||||||
|
RATE_LIMIT_DELAY=2s
|
||||||
|
OUTPUT_DIR=./data
|
||||||
|
DEDUP_CACHE_PATH=./data/dedup_ids.txt
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
USER_AGENT=reddit-scraper-mvp/0.1 (+contact: you@example.com)
|
||||||
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
/bin/
|
||||||
|
/data/
|
||||||
|
*.log
|
||||||
|
*.db
|
||||||
|
*.env
|
||||||
|
.DS_Store
|
||||||
|
/tmp/
|
||||||
|
*.exe
|
||||||
|
*.exe~
|
||||||
|
*.out
|
||||||
11
README.md
Normal file
11
README.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Reddit Scraper MVP
|
||||||
|
|
||||||
|
Lightweight Reddit post scraper.
|
||||||
|
|
||||||
|
Usage example:
|
||||||
|
|
||||||
|
go run ./cmd/reddit-scraper -keyword "golang" -limit 1000
|
||||||
|
|
||||||
|
Configuration via flags or `.env` file.
|
||||||
|
|
||||||
|
Outputs written to `data/{keyword}_{timestamp}.jsonl`.
|
||||||
35
cmd/reddit-scraper/main.go
Normal file
35
cmd/reddit-scraper/main.go
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
keyword := flag.String("keyword", "", "Search keyword (required)")
|
||||||
|
limit := flag.Int("limit", 100, "Max posts to fetch")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if *keyword == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "-keyword is required")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// placeholder: in future wire up controller
|
||||||
|
fmt.Printf("Starting crawl for keyword=%s limit=%d\n", *keyword, *limit)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-time.After(1 * time.Second):
|
||||||
|
fmt.Println("Done (placeholder)")
|
||||||
|
case <-ctx.Done():
|
||||||
|
fmt.Println("Cancelled")
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user