commit a2acaaadc8417a32d664e03a95fc4fe84274e43a Author: Sirin Puenggun Date: Mon Oct 13 23:23:52 2025 +0700 chore: bootstrap project skeleton with README, .env.example, .gitignore and minimal main diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..eab35ab --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +# Example env for reddit-scraper +KEYWORD=golang +LIMIT=1000 +CONCURRENCY=4 +RETRY_LIMIT=5 +RATE_LIMIT_DELAY=2s +OUTPUT_DIR=./data +DEDUP_CACHE_PATH=./data/dedup_ids.txt +LOG_LEVEL=INFO +USER_AGENT=reddit-scraper-mvp/0.1 (+contact: you@example.com) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..55846fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/bin/ +/data/ +*.log +*.db +*.env +.DS_Store +/tmp/ +*.exe +*.exe~ +*.out diff --git a/README.md b/README.md new file mode 100644 index 0000000..114de84 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Reddit Scraper MVP + +Lightweight Reddit post scraper. + +Usage example: + + go run ./cmd/reddit-scraper -keyword "golang" -limit 1000 + +Configuration via flags or `.env` file. + +Outputs written to `data/{keyword}_{timestamp}.jsonl`. diff --git a/cmd/reddit-scraper/main.go b/cmd/reddit-scraper/main.go new file mode 100644 index 0000000..57c221f --- /dev/null +++ b/cmd/reddit-scraper/main.go @@ -0,0 +1,35 @@ +package main + +import ( + "context" + "flag" + "fmt" + "os" + "os/signal" + "syscall" + "time" +) + +func main() { + keyword := flag.String("keyword", "", "Search keyword (required)") + limit := flag.Int("limit", 100, "Max posts to fetch") + flag.Parse() + + if *keyword == "" { + fmt.Fprintln(os.Stderr, "-keyword is required") + os.Exit(2) + } + + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + // placeholder: in future wire up controller + fmt.Printf("Starting crawl for keyword=%s limit=%d\n", *keyword, *limit) + + select { + case <-time.After(1 * time.Second): + fmt.Println("Done (placeholder)") + case <-ctx.Done(): + fmt.Println("Cancelled") + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..538cd33 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/yourname/reddit-scraper + +go 1.25.2