chore: bootstrap project skeleton with README, .env.example, .gitignore and minimal main

This commit is contained in:
Sirin Puenggun 2025-10-13 23:23:52 +07:00
commit a2acaaadc8
5 changed files with 69 additions and 0 deletions

10
.env.example Normal file
View File

@ -0,0 +1,10 @@
# Example env for reddit-scraper
KEYWORD=golang
LIMIT=1000
CONCURRENCY=4
RETRY_LIMIT=5
RATE_LIMIT_DELAY=2s
OUTPUT_DIR=./data
DEDUP_CACHE_PATH=./data/dedup_ids.txt
LOG_LEVEL=INFO
USER_AGENT=reddit-scraper-mvp/0.1 (+contact: you@example.com)

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
/bin/
/data/
*.log
*.db
*.env
.DS_Store
/tmp/
*.exe
*.exe~
*.out

11
README.md Normal file
View File

@ -0,0 +1,11 @@
# Reddit Scraper MVP
Lightweight Reddit post scraper.
Usage example:
go run ./cmd/reddit-scraper -keyword "golang" -limit 1000
Configuration via flags or `.env` file.
Outputs written to `data/{keyword}_{timestamp}.jsonl`.

View File

@ -0,0 +1,35 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"time"
)
func main() {
keyword := flag.String("keyword", "", "Search keyword (required)")
limit := flag.Int("limit", 100, "Max posts to fetch")
flag.Parse()
if *keyword == "" {
fmt.Fprintln(os.Stderr, "-keyword is required")
os.Exit(2)
}
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
// placeholder: in future wire up controller
fmt.Printf("Starting crawl for keyword=%s limit=%d\n", *keyword, *limit)
select {
case <-time.After(1 * time.Second):
fmt.Println("Done (placeholder)")
case <-ctx.Done():
fmt.Println("Cancelled")
}
}

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module github.com/yourname/reddit-scraper
go 1.25.2