Use a smarter result collection datatype

Avoids unnecessary copying of result structs on realloc
This commit is contained in:
Kovid Goyal
2025-06-27 14:32:08 +05:30
parent 75f10d140f
commit 41346f2e5b
3 changed files with 106 additions and 38 deletions

View File

@@ -0,0 +1,81 @@
package choose_files
import (
"fmt"
"io/fs"
)
var _ = fmt.Print
type CollectionIndex struct {
Slice, Pos int
}
func (c CollectionIndex) Compare(o CollectionIndex) int {
if c.Slice == o.Slice {
return c.Pos - o.Pos
}
return c.Slice - o.Slice
}
type ResultCollection struct {
slices [][]ResultItem
append_idx CollectionIndex
batch_size int
}
func NewResultCollection(batch_size int) (ans *ResultCollection) {
batch_size = max(1, batch_size)
return &ResultCollection{
batch_size: batch_size,
slices: [][]ResultItem{make([]ResultItem, batch_size)},
}
}
func (c *ResultCollection) Len() int {
return c.batch_size*(len(c.slices)-1) + c.append_idx.Pos
}
func (c *ResultCollection) NextAppendPointer() (ans *ResultItem) {
s := c.slices[c.append_idx.Slice]
ans = &s[c.append_idx.Pos]
if c.append_idx.Pos+1 < len(s) {
c.append_idx.Pos++
} else if c.append_idx.Slice+1 < len(c.slices) {
c.append_idx.Slice++
c.append_idx.Pos = 0
} else {
c.slices = append(c.slices, make([]ResultItem, 4096))
c.append_idx.Slice++
c.append_idx.Pos = 0
}
return
}
func (c *ResultCollection) Batch(offset *CollectionIndex) (ans []ResultItem) {
if offset.Slice == c.append_idx.Slice {
if offset.Pos < c.append_idx.Pos {
ans = c.slices[offset.Slice][offset.Pos:c.append_idx.Pos]
offset.Pos = c.append_idx.Pos
}
} else if offset.Slice < c.append_idx.Slice {
ans = c.slices[offset.Slice][offset.Pos:]
offset.Slice++
offset.Pos = 0
}
return
}
func (c *ResultCollection) NextDir(offset *CollectionIndex) (ans string) {
for ans == "" && offset.Compare(c.append_idx) < 0 {
if c.slices[offset.Slice][offset.Pos].ftype&fs.ModeDir != 0 {
ans = c.slices[offset.Slice][offset.Pos].text
}
offset.Pos++
if offset.Pos >= len(c.slices[offset.Slice]) {
offset.Slice++
offset.Pos = 0
}
}
return
}

View File

@@ -61,13 +61,13 @@ type FileSystemScanner struct {
in_progress, keep_going atomic.Bool
root_dir string
mutex sync.Mutex
results []ResultItem
collection *ResultCollection
dir_reader func(path string) ([]fs.DirEntry, error)
err error
}
func NewFileSystemScanner(root_dir string, notify chan bool) (fss *FileSystemScanner) {
ans := &FileSystemScanner{root_dir: root_dir, listeners: []chan bool{notify}, results: make([]ResultItem, 0, 1024)}
ans := &FileSystemScanner{root_dir: root_dir, listeners: []chan bool{notify}, collection: NewResultCollection(4096)}
ans.in_progress.Store(true)
ans.keep_going.Store(true)
ans.dir_reader = os.ReadDir
@@ -79,7 +79,7 @@ type Scanner interface {
Cancel()
AddListener(chan bool)
Len() int
Batch(offset int) []ResultItem
Batch(offset *CollectionIndex) []ResultItem
Finished() bool
Error() error
}
@@ -114,17 +114,13 @@ func (fss *FileSystemScanner) AddListener(x chan bool) {
func (fss *FileSystemScanner) Len() int {
fss.lock()
defer fss.unlock()
return len(fss.results)
return fss.collection.Len()
}
func (fss *FileSystemScanner) Batch(offset int) []ResultItem {
func (fss *FileSystemScanner) Batch(offset *CollectionIndex) []ResultItem {
fss.lock()
defer fss.unlock()
if offset >= len(fss.results) {
return nil
}
limit := min(len(fss.results), offset+4096)
return fss.results[offset:limit]
return fss.collection.Batch(offset)
}
func (fss *FileSystemScanner) Finished() bool {
@@ -202,12 +198,12 @@ func (fss *FileSystemScanner) worker() {
}
}()
root_dir, _ := filepath.Abs(fss.root_dir)
dir := root_dir
if !strings.HasSuffix(dir, string(os.PathSeparator)) {
dir += string(os.PathSeparator)
if !strings.HasSuffix(root_dir, string(os.PathSeparator)) {
root_dir += string(os.PathSeparator)
}
dir := root_dir
base := ""
pos := 0
pos := &CollectionIndex{}
var arena []sortable_dir_entry
var sortable []*sortable_dir_entry
var idx uint32
@@ -252,22 +248,14 @@ func (fss *FileSystemScanner) worker() {
sortable[i] = &arena[i]
}
slices.SortFunc(sortable, func(a, b *sortable_dir_entry) int { return bytes.Compare(a.sort_key, b.sort_key) })
ns := fss.results
new_sz := len(ns) + len(entries)
if cap(ns) < new_sz {
ns = make([]ResultItem, len(ns), max(1024, new_sz, cap(ns)*2))
copy(ns, fss.results)
}
new_items := ns[len(ns):new_sz]
for i, e := range sortable {
new_items[i].ftype = e.ftype
new_items[i].text = base + e.name
new_items[i].score.Set_index(idx)
fss.lock()
for _, e := range sortable {
i := fss.collection.NextAppendPointer()
i.ftype = e.ftype
i.text = base + e.name
i.score.Set_index(idx)
idx++
}
ns = ns[0:new_sz]
fss.lock()
fss.results = ns
listeners := fss.listeners
fss.unlock()
for _, l := range listeners {
@@ -276,13 +264,11 @@ func (fss *FileSystemScanner) worker() {
default:
}
}
dir = ""
for pos < len(fss.results) && dir == "" {
if fss.results[pos].ftype&fs.ModeDir != 0 {
base = fss.results[pos].text + string(os.PathSeparator)
dir = root_dir + string(os.PathSeparator) + base
}
pos++
if relpath := fss.collection.NextDir(pos); relpath != "" {
base = relpath + string(os.PathSeparator)
dir = root_dir + base
} else {
dir = ""
}
is_root = false
}
@@ -418,14 +404,13 @@ func (fss *FileSystemScorer) worker(on_results chan bool, worker_wait *sync.Wait
return
}
offset := 0
offset := &CollectionIndex{}
for range on_results {
if !fss.keep_going.Load() {
break
}
results := fss.scanner.Batch(offset)
if len(results) > 0 || fss.scanner.Error() != nil {
offset += len(results)
fss.on_results(handle_batch(results), false)
}
}
@@ -434,7 +419,6 @@ func (fss *FileSystemScorer) worker(on_results chan bool, worker_wait *sync.Wait
if len(b) == 0 {
break
}
offset += len(b)
fss.on_results(handle_batch(b), false)
}
}

View File

@@ -8,6 +8,7 @@ import (
"strings"
"sync"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/kovidgoyal/kitty/tools/utils"
@@ -177,6 +178,8 @@ func run_scoring(b *testing.B, depth, breadth int, query string) {
s.Start()
wg.Wait()
}
fmt.Println("\nnumber of iterations: ", b.N)
fmt.Println("time per iteration:", b.Elapsed()/time.Duration(b.N))
}
// To run this benchmark with profiling use: