Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 38 additions & 13 deletions file_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"os"
"path/filepath"
"strings"

"golang.org/x/tools/godoc/util"
"unicode/utf8"
"bytes"
)

type File struct {
Expand Down Expand Up @@ -60,30 +60,55 @@ func (f *File) Mode() (os.FileMode, error) {
}

// Read reads the file into a string, or returns the empty string for binary
// files. An error indicates the file could not be opened or fully read; the
// caller should log-and-skip rather than abort.
// files (NUL bytes or invalid UTF-8). An error indicates the file could not be
// opened or fully read; the caller should log-and-skip rather than abort.
func (f *File) Read() (string, error) {
handle, err := os.Open(f.Path)
if err != nil {
return "", fmt.Errorf("open %v: %w", f.Path, err)
}
defer handle.Close()

// Check if the file looks like text before reading the entire file.
var buf [1024]byte
n, err := handle.Read(buf[0:])
if err != nil || !util.IsText(buf[0:n]) {
n, err := handle.Read(buf[:])
if err != nil && err != io.EOF {
return "", fmt.Errorf("read %v: %w", f.Path, err)
}
if n == 0 {
return "", nil
}

// Reset file handle so we can read the entire file.
if _, err := handle.Seek(0, io.SeekStart); err != nil {
return "", fmt.Errorf("seek to start of %v: %w", f.Path, err)
if !isTextBytes(buf[:n]) {
return "", nil
}
if err == io.EOF {
return string(buf[:n]), nil
}

builder := new(strings.Builder)
if _, err := io.Copy(builder, handle); err != nil {
return "", fmt.Errorf("read %v: %w", f.Path, err)
if _, wErr := builder.Write(buf[:n]); wErr != nil {
return "", fmt.Errorf("read %v: %w", f.Path, wErr)
}

chunk := make([]byte, 32*1024)
for {
readN, readErr := handle.Read(chunk)
if readN > 0 {
if bytes.IndexByte(chunk[:readN], 0) >= 0 {
return "", nil
}
if !utf8.Valid(chunk[:readN]) {
return "", nil
Comment on lines +99 to +100
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve UTF-8 state across read chunks

When a valid UTF-8 file has a multi-byte rune split across a 32 KiB read boundary after the initial 1 KiB probe, this per-chunk utf8.Valid check returns false because neither half is valid alone, causing Read to return empty and ReplaceContents to silently skip the text file. For example, a file with 1024 ASCII bytes, then 32767 ASCII bytes, then é followed by the search string will no longer be rewritten even though it is valid UTF-8; validate across chunk boundaries or after accumulating the bytes instead.

Useful? React with 👍 / 👎.

}
if _, wErr := builder.Write(chunk[:readN]); wErr != nil {
return "", fmt.Errorf("read %v: %w", f.Path, wErr)
}
}
if readErr == io.EOF {
break
}
if readErr != nil {
return "", fmt.Errorf("read %v: %w", f.Path, readErr)
}
}
return builder.String(), nil
}
Expand Down
42 changes: 42 additions & 0 deletions file_handling_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"os"
"path/filepath"
"testing"
)
Expand Down Expand Up @@ -77,3 +78,44 @@ func TestNewFile(t *testing.T) {
})
}
}

func TestReadSkipsBinaryWithNUL(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "mixed.txt")
content := []byte("text prefix\x00binary suffix")
if err := os.WriteFile(path, content, 0o644); err != nil {
t.Fatal(err)
}

f, err := NewFile(path)
if err != nil {
t.Fatal(err)
}
got, err := f.Read()
if err != nil {
t.Fatal(err)
}
if got != "" {
t.Fatalf("Read() = %q; want empty for NUL-containing file", got)
}
}

func TestReadReturnsShortTextFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "short.txt")
if err := os.WriteFile(path, []byte("hello"), 0o644); err != nil {
t.Fatal(err)
}
f, err := NewFile(path)
if err != nil {
t.Fatal(err)
}
got, err := f.Read()
if err != nil {
t.Fatal(err)
}
if got != "hello" {
t.Fatalf("Read() = %q; want %q", got, "hello")
}
}

2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
module github.com/dolph/find-replace

go 1.20

require golang.org/x/tools v0.7.0
4 changes: 0 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
golang.org/x/tools v0.1.9 h1:j9KsMiaP1c3B0OTQGth0/k+miLGTgLsAFUCrF2vLcF8=
golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU=
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
17 changes: 17 additions & 0 deletions text_detect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package main

import (
"bytes"
"unicode/utf8"
)

// isTextBytes reports whether b is valid UTF-8 and contains no NUL bytes.
func isTextBytes(b []byte) bool {
if len(b) == 0 {
return true
}
if bytes.IndexByte(b, 0) >= 0 {
return false
}
return utf8.Valid(b)
}
24 changes: 24 additions & 0 deletions text_detect_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package main

import "testing"

func TestIsTextBytes(t *testing.T) {
cases := []struct {
name string
in []byte
want bool
}{
{name: "empty", in: nil, want: true},
{name: "ascii", in: []byte("hello"), want: true},
{name: "utf8", in: []byte("héllo"), want: true},
{name: "nul", in: []byte("a\x00b"), want: false},
{name: "invalid utf8", in: []byte{0xff, 0xfe}, want: false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := isTextBytes(tc.in); got != tc.want {
t.Fatalf("isTextBytes(%q) = %v; want %v", tc.in, got, tc.want)
}
})
}
}