diff --git a/cmd/reddit-scraper/main.go b/cmd/reddit-scraper/main.go index 57c221f..c4a70f2 100644 --- a/cmd/reddit-scraper/main.go +++ b/cmd/reddit-scraper/main.go @@ -2,34 +2,34 @@ package main import ( "context" - "flag" "fmt" "os" "os/signal" "syscall" "time" + + "github.com/yourname/reddit-scraper/internal/config" + "github.com/yourname/reddit-scraper/internal/logging" ) func main() { - keyword := flag.String("keyword", "", "Search keyword (required)") - limit := flag.Int("limit", 100, "Max posts to fetch") - flag.Parse() - - if *keyword == "" { - fmt.Fprintln(os.Stderr, "-keyword is required") + cfg, err := config.Load() + if err != nil { + fmt.Fprintln(os.Stderr, "config load:", err) os.Exit(2) } + logger := logging.Init(cfg.LogLevel) + logger.Info("starting", "keyword", cfg.Keyword, "limit", cfg.Limit, "concurrency", cfg.Concurrency) + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() // placeholder: in future wire up controller - fmt.Printf("Starting crawl for keyword=%s limit=%d\n", *keyword, *limit) - select { case <-time.After(1 * time.Second): - fmt.Println("Done (placeholder)") + logger.Info("done (placeholder)") case <-ctx.Done(): - fmt.Println("Cancelled") + logger.Info("cancelled") } } diff --git a/internal/config/config.go b/internal/config/config.go index 6637577..499d813 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -62,6 +62,28 @@ func Load() (*Config, error) { return nil, fmt.Errorf("creating output dir: %w", err) } + // Basic validation + if cfg.Limit <= 0 { + return nil, fmt.Errorf("limit must be > 0") + } + if cfg.Concurrency <= 0 { + return nil, fmt.Errorf("concurrency must be > 0") + } + if cfg.RetryLimit < 0 { + return nil, fmt.Errorf("retry-limit must be >= 0") + } + if cfg.RateLimitDelay <= 0 { + return nil, fmt.Errorf("rate-limit delay must be > 0") + } + + // Ensure dedup cache parent dir exists + dedupDir := filepath.Dir(cfg.DedupCachePath) + if dedupDir != "" && dedupDir != "." { + if err := os.MkdirAll(filepath.Clean(dedupDir), 0o755); err != nil { + return nil, fmt.Errorf("creating dedup cache dir: %w", err) + } + } + return cfg, nil } diff --git a/internal/logging/logger.go b/internal/logging/logger.go index 35a50b0..9284cc4 100644 --- a/internal/logging/logger.go +++ b/internal/logging/logger.go @@ -3,13 +3,30 @@ package logging import ( "log/slog" "os" + "strings" ) +func parseLevel(s string) slog.Level { + s = strings.ToUpper(strings.TrimSpace(s)) + switch s { + case "TRACE": + return slog.LevelDebug + case "DEBUG": + return slog.LevelDebug + case "INFO": + return slog.LevelInfo + case "WARN", "WARNING": + return slog.LevelWarn + case "ERROR": + return slog.LevelError + default: + return slog.LevelInfo + } +} + // Init initializes a global logger and returns it. Level is a string like "INFO" or "DEBUG". func Init(level string) *slog.Logger { - handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{AddSource: false}) - logger := slog.New(handler) - // Note: slog's level filtering is configured per Handler via HandlerOptions in Go 1.25; - // here we rely on consumer code to not spam at debug when level is INFO. - return logger + lvl := parseLevel(level) + handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{AddSource: false, Level: lvl}) + return slog.New(handler) }