diff --git a/find_replace.go b/find_replace.go index 9bdbe79..e5aa5e9 100644 --- a/find_replace.go +++ b/find_replace.go @@ -17,6 +17,9 @@ type findReplace struct { find string replace string + renames []renamePlan + renameMu sync.Mutex + // errs accumulates non-fatal errors that occurred during a walk. The // walker logs each error at the point of failure (preserving the // operator-visible UX) and appends it here so main can surface a @@ -94,7 +97,7 @@ func run(args []string, stderr io.Writer) int { fmt.Fprintln(stderr, err) return 1 } - fr.WalkDir(root) + fr.walkAndRename(root) if err := fr.errs.err(); err != nil { // Each individual error has already been printed at the point of @@ -170,8 +173,11 @@ func (fr *findReplace) HandleFile(f *File) error { } } - // Rename the file now that we're otherwise done with it. - return fr.RenameFile(f) + newBaseName := strings.ReplaceAll(f.Base(), fr.find, fr.replace) + if f.Base() != newBaseName { + fr.queueRename(f.Path, filepath.Join(f.Dir(), newBaseName)) + } + return nil } // RenameFile renames f to its post-replacement name if (a) the name actually @@ -182,19 +188,17 @@ func (fr *findReplace) RenameFile(f *File) error { if f.Base() == newBaseName { return nil } + return fr.renamePath(f.Path, filepath.Join(f.Dir(), newBaseName)) +} - newPath := filepath.Join(f.Dir(), newBaseName) - if _, err := os.Stat(newPath); err == nil { - return fmt.Errorf("refusing to rename %v to %v: %v already exists", f.Path, newBaseName, newPath) - } else if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("stat rename destination %v: %w", newPath, err) - } - - log.Printf("Renaming %v to %v", f.Path, newBaseName) - if err := os.Rename(f.Path, newPath); err != nil { - return fmt.Errorf("rename %v to %v: %w", f.Path, newBaseName, err) +// walkAndRename walks the tree depth-first, then applies queued renames in a +// collision-safe order (two-phase when source and target names overlap). +func (fr *findReplace) walkAndRename(root *File) { + fr.WalkDir(root) + if err := fr.applyRenames(); err != nil { + log.Print(err) + fr.errs.add(err) } - return nil } // ReplaceContents rewrites the file at f if its contents contain the find diff --git a/find_replace_test.go b/find_replace_test.go index 9a54564..4661789 100644 --- a/find_replace_test.go +++ b/find_replace_test.go @@ -170,7 +170,7 @@ func TestWalkDir(t *testing.T) { defer os.Remove(f1.Path) fr := findReplace{find: find, replace: replace} - fr.WalkDir(d) + fr.walkAndRename(d) if err := fr.errs.err(); err != nil { t.Fatalf("WalkDir reported errors: %v", err) } @@ -234,6 +234,9 @@ func TestHandleFileWithDir(t *testing.T) { if err := fr.HandleFile(f); err != nil { t.Fatalf("HandleFile(%q): %v", f.Path, err) } + if err := fr.applyRenames(); err != nil { + t.Fatal(err) + } assertPathExistsAfterRename(t, f, expectedPath) } @@ -278,6 +281,9 @@ func TestHandleFileWithFile(t *testing.T) { if err := fr.HandleFile(f); err != nil { t.Fatalf("HandleFile(%q): %v", f.Path, err) } + if err := fr.applyRenames(); err != nil { + t.Fatal(err) + } assertPathExistsAfterRename(t, f, expectedPath) got := readOrFatal(t, newFileOrFatal(t, expectedPath)) @@ -434,7 +440,7 @@ func TestWalkDir_PermissionDeniedSubdirContinues(t *testing.T) { rootFile := newFileOrFatal(t, root) fr := findReplace{find: "alpha", replace: "beta"} - fr.WalkDir(rootFile) + fr.walkAndRename(rootFile) // The sibling file should have been rewritten despite the denied subtree. got, err := os.ReadFile(siblingFile) @@ -517,7 +523,7 @@ func TestWalkDir_BadRenameTargetDoesNotAbortSiblings(t *testing.T) { rootFile := newFileOrFatal(t, root) fr := findReplace{find: "alpha", replace: "beta"} - fr.WalkDir(rootFile) + fr.walkAndRename(rootFile) // The free file should have been renamed. freeRenamed := filepath.Join(root, "free-beta") @@ -675,12 +681,37 @@ func CloneRepoToTestDir(b *testing.B, repoUrl string) *File { return d } +func TestApplyRenamesSwap(t *testing.T) { + root := newTestDir(t, "", "*") + defer os.Remove(root.Path) + + a := newTestFile(t, root.Path, "foo", "a") + defer os.Remove(a.Path) + b := newTestFile(t, root.Path, "bar", "b") + defer os.Remove(b.Path) + + fr := findReplace{find: "foo", replace: "bar"} + fr.queueRename(a.Path, filepath.Join(root.Path, "bar")) + fr.queueRename(b.Path, filepath.Join(root.Path, "foo")) + + if err := fr.applyRenames(); err != nil { + t.Fatal(err) + } + + if _, err := os.Stat(filepath.Join(root.Path, "bar")); err != nil { + t.Fatalf("expected former foo file at bar: %v", err) + } + if _, err := os.Stat(filepath.Join(root.Path, "foo")); err != nil { + t.Fatalf("expected former bar file at foo: %v", err) + } +} + func BenchmarkNova(b *testing.B) { for n := 0; n < b.N; n++ { b.StopTimer() d := CloneRepoToTestDir(b, "git@github.com:openstack/nova.git") fr := findReplace{find: RandomString(2), replace: RandomString(2)} b.StartTimer() - fr.WalkDir(d) + fr.walkAndRename(d) } } diff --git a/rename_plan.go b/rename_plan.go new file mode 100644 index 0000000..30b2c18 --- /dev/null +++ b/rename_plan.go @@ -0,0 +1,129 @@ +package main + +import ( + "errors" + "fmt" + "log" + "os" + "path/filepath" + "sort" +) + +type renamePlan struct { + from string + to string + temp string +} + +func (fr *findReplace) queueRename(from, to string) { + fr.renameMu.Lock() + fr.renames = append(fr.renames, renamePlan{from: from, to: to}) + fr.renameMu.Unlock() +} + +func (fr *findReplace) applyRenames() error { + if len(fr.renames) == 0 { + return nil + } + + active := make([]renamePlan, 0, len(fr.renames)) + sources := make(map[string]bool, len(fr.renames)) + targets := make(map[string]bool, len(fr.renames)) + for _, r := range fr.renames { + if r.from == r.to { + continue + } + if targets[r.to] { + return fmt.Errorf("duplicate rename target %q", r.to) + } + active = append(active, r) + sources[r.from] = true + targets[r.to] = true + } + + if len(active) == 0 { + return nil + } + + needsTwoPhase := false + for _, r := range active { + if sources[r.to] { + needsTwoPhase = true + break + } + } + + sort.Slice(active, func(i, j int) bool { + if len(active[i].from) != len(active[j].from) { + return len(active[i].from) > len(active[j].from) + } + return active[i].from < active[j].from + }) + + if needsTwoPhase { + return fr.applyRenamesTwoPhase(active) + } + return fr.applyRenamesDirect(active) +} + +func (fr *findReplace) applyRenamesDirect(plans []renamePlan) error { + var errs []error + for _, r := range plans { + if err := fr.renamePath(r.from, r.to); err != nil { + errs = append(errs, err) + } + } + return errors.Join(errs...) +} + +func (fr *findReplace) applyRenamesTwoPhase(plans []renamePlan) error { + rollback := func() { + for _, r := range plans { + if r.temp != "" { + _ = os.Remove(r.temp) + } + } + } + + seq := 0 + for i := range plans { + r := &plans[i] + r.temp = filepath.Join(filepath.Dir(r.from), fmt.Sprintf(".find-replace.tmp.%d.%d", os.Getpid(), seq)) + seq++ + log.Printf("Renaming %v to %v (phase 1)", r.from, filepath.Base(r.temp)) + if err := os.Rename(r.from, r.temp); err != nil { + rollback() + return fmt.Errorf("phase 1 rename %q: %w", r.from, err) + } + } + + phase2 := append([]renamePlan(nil), plans...) + sort.Slice(phase2, func(i, j int) bool { + if len(phase2[i].to) != len(phase2[j].to) { + return len(phase2[i].to) > len(phase2[j].to) + } + return phase2[i].to < phase2[j].to + }) + + for _, r := range phase2 { + log.Printf("Renaming %v to %v (phase 2)", filepath.Base(r.temp), filepath.Base(r.to)) + if err := os.Rename(r.temp, r.to); err != nil { + return fmt.Errorf("phase 2 rename %q -> %q: %w", r.temp, r.to, err) + } + } + return nil +} + +func (fr *findReplace) renamePath(from, to string) error { + if _, err := os.Stat(to); err == nil { + return fmt.Errorf("refusing to rename %v to %v: %v already exists", from, filepath.Base(to), to) + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("stat rename destination %v: %w", to, err) + } + + log.Printf("Renaming %v to %v", from, filepath.Base(to)) + if err := os.Rename(from, to); err != nil { + return fmt.Errorf("rename %v to %v: %w", from, filepath.Base(to), err) + } + return nil +}