Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions find_replace.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ type findReplace struct {
find string
replace string

renames []renamePlan
renameMu sync.Mutex

// errs accumulates non-fatal errors that occurred during a walk. The
// walker logs each error at the point of failure (preserving the
// operator-visible UX) and appends it here so main can surface a
Expand Down Expand Up @@ -94,7 +97,7 @@ func run(args []string, stderr io.Writer) int {
fmt.Fprintln(stderr, err)
return 1
}
fr.WalkDir(root)
fr.walkAndRename(root)

if err := fr.errs.err(); err != nil {
// Each individual error has already been printed at the point of
Expand Down Expand Up @@ -170,8 +173,11 @@ func (fr *findReplace) HandleFile(f *File) error {
}
}

// Rename the file now that we're otherwise done with it.
return fr.RenameFile(f)
newBaseName := strings.ReplaceAll(f.Base(), fr.find, fr.replace)
if f.Base() != newBaseName {
fr.queueRename(f.Path, filepath.Join(f.Dir(), newBaseName))
}
return nil
}

// RenameFile renames f to its post-replacement name if (a) the name actually
Expand All @@ -182,19 +188,17 @@ func (fr *findReplace) RenameFile(f *File) error {
if f.Base() == newBaseName {
return nil
}
return fr.renamePath(f.Path, filepath.Join(f.Dir(), newBaseName))
}

newPath := filepath.Join(f.Dir(), newBaseName)
if _, err := os.Stat(newPath); err == nil {
return fmt.Errorf("refusing to rename %v to %v: %v already exists", f.Path, newBaseName, newPath)
} else if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("stat rename destination %v: %w", newPath, err)
}

log.Printf("Renaming %v to %v", f.Path, newBaseName)
if err := os.Rename(f.Path, newPath); err != nil {
return fmt.Errorf("rename %v to %v: %w", f.Path, newBaseName, err)
// walkAndRename walks the tree depth-first, then applies queued renames in a
// collision-safe order (two-phase when source and target names overlap).
func (fr *findReplace) walkAndRename(root *File) {
fr.WalkDir(root)
if err := fr.applyRenames(); err != nil {
log.Print(err)
fr.errs.add(err)
}
return nil
}

// ReplaceContents rewrites the file at f if its contents contain the find
Expand Down
39 changes: 35 additions & 4 deletions find_replace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func TestWalkDir(t *testing.T) {
defer os.Remove(f1.Path)

fr := findReplace{find: find, replace: replace}
fr.WalkDir(d)
fr.walkAndRename(d)
if err := fr.errs.err(); err != nil {
t.Fatalf("WalkDir reported errors: %v", err)
}
Expand Down Expand Up @@ -234,6 +234,9 @@ func TestHandleFileWithDir(t *testing.T) {
if err := fr.HandleFile(f); err != nil {
t.Fatalf("HandleFile(%q): %v", f.Path, err)
}
if err := fr.applyRenames(); err != nil {
t.Fatal(err)
}
assertPathExistsAfterRename(t, f, expectedPath)
}

Expand Down Expand Up @@ -278,6 +281,9 @@ func TestHandleFileWithFile(t *testing.T) {
if err := fr.HandleFile(f); err != nil {
t.Fatalf("HandleFile(%q): %v", f.Path, err)
}
if err := fr.applyRenames(); err != nil {
t.Fatal(err)
}
assertPathExistsAfterRename(t, f, expectedPath)

got := readOrFatal(t, newFileOrFatal(t, expectedPath))
Expand Down Expand Up @@ -434,7 +440,7 @@ func TestWalkDir_PermissionDeniedSubdirContinues(t *testing.T) {

rootFile := newFileOrFatal(t, root)
fr := findReplace{find: "alpha", replace: "beta"}
fr.WalkDir(rootFile)
fr.walkAndRename(rootFile)

// The sibling file should have been rewritten despite the denied subtree.
got, err := os.ReadFile(siblingFile)
Expand Down Expand Up @@ -517,7 +523,7 @@ func TestWalkDir_BadRenameTargetDoesNotAbortSiblings(t *testing.T) {

rootFile := newFileOrFatal(t, root)
fr := findReplace{find: "alpha", replace: "beta"}
fr.WalkDir(rootFile)
fr.walkAndRename(rootFile)

// The free file should have been renamed.
freeRenamed := filepath.Join(root, "free-beta")
Expand Down Expand Up @@ -675,12 +681,37 @@ func CloneRepoToTestDir(b *testing.B, repoUrl string) *File {
return d
}

func TestApplyRenamesSwap(t *testing.T) {
root := newTestDir(t, "", "*")
defer os.Remove(root.Path)

a := newTestFile(t, root.Path, "foo", "a")
defer os.Remove(a.Path)
b := newTestFile(t, root.Path, "bar", "b")
defer os.Remove(b.Path)

fr := findReplace{find: "foo", replace: "bar"}
fr.queueRename(a.Path, filepath.Join(root.Path, "bar"))
fr.queueRename(b.Path, filepath.Join(root.Path, "foo"))

if err := fr.applyRenames(); err != nil {
t.Fatal(err)
}

if _, err := os.Stat(filepath.Join(root.Path, "bar")); err != nil {
t.Fatalf("expected former foo file at bar: %v", err)
}
if _, err := os.Stat(filepath.Join(root.Path, "foo")); err != nil {
t.Fatalf("expected former bar file at foo: %v", err)
}
}

func BenchmarkNova(b *testing.B) {
for n := 0; n < b.N; n++ {
b.StopTimer()
d := CloneRepoToTestDir(b, "git@github.com:openstack/nova.git")
fr := findReplace{find: RandomString(2), replace: RandomString(2)}
b.StartTimer()
fr.WalkDir(d)
fr.walkAndRename(d)
}
}
129 changes: 129 additions & 0 deletions rename_plan.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package main

import (
"errors"
"fmt"
"log"
"os"
"path/filepath"
"sort"
)

type renamePlan struct {
from string
to string
temp string
}

func (fr *findReplace) queueRename(from, to string) {
fr.renameMu.Lock()
fr.renames = append(fr.renames, renamePlan{from: from, to: to})
fr.renameMu.Unlock()
}

func (fr *findReplace) applyRenames() error {
if len(fr.renames) == 0 {
return nil
}

active := make([]renamePlan, 0, len(fr.renames))
sources := make(map[string]bool, len(fr.renames))
targets := make(map[string]bool, len(fr.renames))
for _, r := range fr.renames {
if r.from == r.to {
continue
}
if targets[r.to] {
return fmt.Errorf("duplicate rename target %q", r.to)
}
active = append(active, r)
sources[r.from] = true
targets[r.to] = true
}

if len(active) == 0 {
return nil
}

needsTwoPhase := false
for _, r := range active {
if sources[r.to] {
needsTwoPhase = true
break
}
}

sort.Slice(active, func(i, j int) bool {
if len(active[i].from) != len(active[j].from) {
return len(active[i].from) > len(active[j].from)
}
return active[i].from < active[j].from
})

if needsTwoPhase {
return fr.applyRenamesTwoPhase(active)
}
return fr.applyRenamesDirect(active)
}

func (fr *findReplace) applyRenamesDirect(plans []renamePlan) error {
var errs []error
for _, r := range plans {
if err := fr.renamePath(r.from, r.to); err != nil {
errs = append(errs, err)
}
}
return errors.Join(errs...)
}

func (fr *findReplace) applyRenamesTwoPhase(plans []renamePlan) error {
rollback := func() {
for _, r := range plans {
if r.temp != "" {
_ = os.Remove(r.temp)
}
}
}

seq := 0
for i := range plans {
r := &plans[i]
r.temp = filepath.Join(filepath.Dir(r.from), fmt.Sprintf(".find-replace.tmp.%d.%d", os.Getpid(), seq))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep temp paths out of directories being renamed

The phase-1 temp is created inside the source's parent directory, so a descendant rename can be invalidated when an ancestor directory is also renamed later in the same two-phase batch. For example, find-replace a aa with a directory a/ containing file a and a sibling aa/ queues both a/a -> a/aa and a -> aa; phase 1 moves a/a to a/.find-replace.tmp..., then moves a itself, so phase 2 later looks for the child temp at the old a/... path and fails, leaving data stranded in the directory temp.

Useful? React with 👍 / 👎.

seq++
log.Printf("Renaming %v to %v (phase 1)", r.from, filepath.Base(r.temp))
if err := os.Rename(r.from, r.temp); err != nil {
rollback()
return fmt.Errorf("phase 1 rename %q: %w", r.from, err)
}
}

phase2 := append([]renamePlan(nil), plans...)
sort.Slice(phase2, func(i, j int) bool {
if len(phase2[i].to) != len(phase2[j].to) {
return len(phase2[i].to) > len(phase2[j].to)
}
return phase2[i].to < phase2[j].to
})

for _, r := range phase2 {
log.Printf("Renaming %v to %v (phase 2)", filepath.Base(r.temp), filepath.Base(r.to))
if err := os.Rename(r.temp, r.to); err != nil {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Refuse occupied non-source targets in two-phase renames

When any source/target chain triggers the two-phase path, phase 2 uses raw os.Rename, which overwrites an existing destination that was not one of the queued sources. For example, with files a, aa, and aaaa, running find-replace a aa queues a -> aa and aa -> aaaa; because aa is a source this takes the two-phase branch, and the phase-2 rename to aaaa silently clobbers the untouched aaaa file instead of reporting the same collision that renamePath would in the direct path.

Useful? React with 👍 / 👎.

return fmt.Errorf("phase 2 rename %q -> %q: %w", r.temp, r.to, err)
}
}
return nil
}

func (fr *findReplace) renamePath(from, to string) error {
if _, err := os.Stat(to); err == nil {
return fmt.Errorf("refusing to rename %v to %v: %v already exists", from, filepath.Base(to), to)
} else if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("stat rename destination %v: %w", to, err)
}

log.Printf("Renaming %v to %v", from, filepath.Base(to))
if err := os.Rename(from, to); err != nil {
return fmt.Errorf("rename %v to %v: %w", from, filepath.Base(to), err)
}
return nil
}