reddit-scrapper/internal/storage/jsonl_writer.go

82 lines
1.6 KiB
Go

package storage
import (
"bufio"
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
)
// JSONLWriter writes records as line-delimited JSON and rotates files by size.
type JSONLWriter struct {
keyword string
outDir string
file *os.File
writer *bufio.Writer
maxBytes int64
written int64
part int
}
// NewJSONLWriter creates a writer for a keyword in outDir. maxBytes triggers rotation.
func NewJSONLWriter(keyword, outDir string, maxBytes int64) (*JSONLWriter, error) {
w := &JSONLWriter{keyword: keyword, outDir: outDir, maxBytes: maxBytes}
if err := w.rotate(); err != nil {
return nil, err
}
return w, nil
}
func (w *JSONLWriter) rotate() error {
if w.writer != nil {
w.writer.Flush()
}
if w.file != nil {
w.file.Close()
}
w.part++
ts := time.Now().UTC().Format("20060102T150405Z")
name := fmt.Sprintf("%s_%s_part%d.jsonl", w.keyword, ts, w.part)
path := filepath.Join(w.outDir, name)
f, err := os.Create(path)
if err != nil {
return err
}
w.file = f
w.writer = bufio.NewWriterSize(f, 16*1024)
w.written = 0
return nil
}
// Write marshals v to JSON and writes it as a newline-delimited record.
func (w *JSONLWriter) Write(v interface{}) error {
b, err := json.Marshal(v)
if err != nil {
return err
}
n, err := w.writer.Write(append(b, '\n'))
if err != nil {
return err
}
w.written += int64(n)
if w.maxBytes > 0 && w.written >= w.maxBytes {
return w.rotate()
}
return nil
}
// Close flushes and closes the underlying file.
func (w *JSONLWriter) Close() error {
if w.writer != nil {
if err := w.writer.Flush(); err != nil {
return err
}
}
if w.file != nil {
return w.file.Close()
}
return nil
}