-
Notifications
You must be signed in to change notification settings - Fork 1
fix: stream file rewrites with bounded memory #88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "io" | ||
| ) | ||
|
|
||
| const streamBufferSize = 256 * 1024 | ||
|
|
||
| // streamReplace copies from r to w, replacing every occurrence of find with replace. | ||
| // It returns whether any replacement was made. Memory use is bounded by streamBufferSize | ||
| // plus len(find) bytes of carry-over between reads. | ||
| func streamReplace(r io.Reader, w io.Writer, find, replace []byte) (bool, error) { | ||
| if len(find) == 0 { | ||
| return false, nil | ||
| } | ||
|
|
||
| buf := make([]byte, streamBufferSize) | ||
| var pending []byte | ||
| var changed bool | ||
|
|
||
| for { | ||
| n, readErr := r.Read(buf) | ||
| if n > 0 { | ||
| data := append(pending, buf[:n]...) | ||
| isFinal := readErr == io.EOF | ||
| if !isFinal && len(data) < streamBufferSize { | ||
| pending = data | ||
| continue | ||
| } | ||
| out, rest, chunkChanged := replaceChunk(data, find, replace, isFinal) | ||
| if chunkChanged { | ||
| changed = true | ||
| } | ||
| if len(out) > 0 { | ||
| if _, err := w.Write(out); err != nil { | ||
| return changed, err | ||
| } | ||
| } | ||
| pending = rest | ||
| } | ||
| if readErr == io.EOF { | ||
| break | ||
| } | ||
| if readErr != nil { | ||
| return changed, readErr | ||
| } | ||
| } | ||
|
|
||
| if len(pending) > 0 { | ||
| out, _, chunkChanged := replaceChunk(pending, find, replace, true) | ||
| if chunkChanged { | ||
| changed = true | ||
| } | ||
| if len(out) > 0 { | ||
| if _, err := w.Write(out); err != nil { | ||
| return changed, err | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return changed, nil | ||
| } | ||
|
|
||
| func replaceChunk(data, find, replace []byte, final bool) (out []byte, rest []byte, changed bool) { | ||
| if len(data) == 0 { | ||
| return nil, nil, false | ||
| } | ||
|
|
||
| if final { | ||
| replaced := bytes.Replace(data, find, replace, -1) | ||
| return replaced, nil, !bytes.Equal(replaced, data) | ||
| } | ||
|
|
||
| overlap := len(find) - 1 | ||
| if overlap >= len(data) { | ||
| return nil, append([]byte(nil), data...), false | ||
| } | ||
|
|
||
| split := len(data) - overlap | ||
| process := data[:split] | ||
| rest = append([]byte(nil), data[split:]...) | ||
|
Comment on lines
+80
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For non-final chunks, Useful? React with 👍 / 👎. |
||
|
|
||
| replaced := bytes.Replace(process, find, replace, -1) | ||
| return replaced, rest, !bytes.Equal(replaced, process) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "io" | ||
| "strings" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestStreamReplace(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| input string | ||
| find string | ||
| replace string | ||
| want string | ||
| }{ | ||
| {name: "no match", input: "hello", find: "z", replace: "q", want: "hello"}, | ||
| {name: "simple", input: "foo bar foo", find: "foo", replace: "baz", want: "baz bar baz"}, | ||
| {name: "span boundary", input: "xxababc", find: "ab", replace: "X", want: "xxXXc"}, | ||
| {name: "empty input", input: "", find: "a", replace: "b", want: ""}, | ||
| } | ||
|
|
||
| for _, tc := range tests { | ||
| tc := tc | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| t.Parallel() | ||
| var out bytes.Buffer | ||
| changed, err := streamReplace(strings.NewReader(tc.input), &out, []byte(tc.find), []byte(tc.replace)) | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| if tc.input != tc.want && !changed { | ||
| t.Fatal("expected changed=true") | ||
| } | ||
| if tc.input == tc.want && changed { | ||
| t.Fatal("expected changed=false") | ||
| } | ||
| if out.String() != tc.want { | ||
| t.Fatalf("got %q; want %q", out.String(), tc.want) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestStreamReplaceLargeWithSmallReads(t *testing.T) { | ||
| find := "needle" | ||
| replace := "pin" | ||
| input := strings.Repeat("hay", 1000) + find + strings.Repeat("stack", 1000) | ||
| want := strings.Replace(input, find, replace, 1) | ||
|
|
||
| var out bytes.Buffer | ||
| r := &smallReader{data: []byte(input), step: 3} | ||
| changed, err := streamReplace(r, &out, []byte(find), []byte(replace)) | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| if !changed { | ||
| t.Fatal("expected replacement") | ||
| } | ||
| if out.String() != want { | ||
| t.Fatalf("output length %d; want %d", out.Len(), len(want)) | ||
| } | ||
| } | ||
|
|
||
| type smallReader struct { | ||
| data []byte | ||
| step int | ||
| off int | ||
| } | ||
|
|
||
| func (r *smallReader) Read(p []byte) (int, error) { | ||
| if r.off >= len(r.data) { | ||
| return 0, io.EOF | ||
| } | ||
| n := r.step | ||
| if n > len(p) { | ||
| n = len(p) | ||
| } | ||
| if n > len(r.data)-r.off { | ||
| n = len(r.data) - r.off | ||
| } | ||
| copy(p, r.data[r.off:r.off+n]) | ||
| r.off += n | ||
| return n, nil | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This opens the randomly named temp path with
O_TRUNCbefore knowing whether the file contains a match, so if the generated name already exists in the target directory, a no-op replacement can truncate that unrelated file and then remove it at the!changedcleanup path. The previousReplaceContentsreturned before creating any temp file when there was no match; use an exclusive temp-file primitive such asos.CreateTemp/O_EXCL, and preferably defer temp creation until a replacement is actually needed.Useful? React with 👍 / 👎.