disk-cache: Improve hole management

Make coalescing of neighboring holes robust. Speed up hole finding.
Cost is we replace a single array tracking holes with three hashmaps
tracking size->[pos] pos->size and endpos->size.
This commit is contained in:
Kovid Goyal
2024-07-15 19:42:07 +05:30
parent 2058cac203
commit 24e6dda0bc
2 changed files with 137 additions and 51 deletions

View File

@@ -53,9 +53,22 @@ typedef struct Hole {
off_t pos, size;
} Hole;
#define NAME hole_pos_map
#define KEY_TY off_t
#define VAL_TY off_t
#include "kitty-verstable.h"
typedef struct PosList { size_t count, capacity; off_t *positions; } PosList;
#define NAME hole_size_map
#define KEY_TY off_t
#define VAL_TY PosList
static void free_pos_list(PosList p) { free(p.positions); }
#define VAL_DTOR_FN free_pos_list
#include "kitty-verstable.h"
typedef struct Holes {
Hole *items;
size_t capacity, count;
hole_pos_map pos_map, end_pos_map;
hole_size_map size_map;
off_t largest_hole_size;
} Holes;
@@ -150,6 +163,14 @@ keydup(CacheKey k) {
return ans;
}
static void
cleanup_holes(Holes *holes) {
vt_cleanup(&holes->size_map);
vt_cleanup(&holes->pos_map);
vt_cleanup(&holes->end_pos_map);
holes->largest_hole_size = 0;
}
static void
defrag(DiskCache *self) {
int new_cache_file = -1;
@@ -198,8 +219,7 @@ defrag(DiskCache *self) {
e->new_offset = current_pos;
current_pos += e->data_sz;
}
self->holes.count = 0;
self->holes.largest_hole_size = 0;
cleanup_holes(&self->holes);
ok = true;
cleanup:
@@ -217,32 +237,68 @@ cleanup:
if (new_cache_file > -1) safe_close(new_cache_file, __FILE__, __LINE__);
}
static void
append_position(PosList *p, off_t pos) {
ensure_space_for(p, positions, off_t, p->count + 1, capacity, 8, false);
p->positions[p->count++] = pos;
}
static void
add_hole_to_maps(Holes *holes, Hole h) {
if (vt_is_end(vt_insert(&holes->pos_map, h.pos, h.size))) fatal("Out of memory");
if (vt_is_end(vt_insert(&holes->end_pos_map, h.pos + h.size, h.size))) fatal("Out of memory");
hole_size_map_itr i = vt_get_or_insert(&holes->size_map, h.size, (PosList){0});
if (vt_is_end(i)) fatal("Out of memory");
append_position(&(i.data->val), h.pos);
holes->largest_hole_size = MAX(h.size, holes->largest_hole_size);
}
static void
update_largest_hole_size(Holes *holes) {
holes->largest_hole_size = 0;
for (hole_size_map_itr i = vt_first(&holes->size_map); !vt_is_end(i); i = vt_next(i)) {
if (i.data->key > holes->largest_hole_size) holes->largest_hole_size = i.data->key;
}
}
static void
remove_hole_from_maps_itr(Holes *holes, Hole h, hole_size_map_itr i, size_t pos_in_sizes_array) {
vt_erase(&holes->pos_map, h.pos); vt_erase(&holes->end_pos_map, h.pos + h.size);
if (i.data->val.count <= 1) {
vt_erase_itr(&holes->size_map, i);
if (h.size > holes->largest_hole_size) update_largest_hole_size(holes);
} else remove_i_from_array(i.data->val.positions, pos_in_sizes_array, i.data->val.count);
}
static void
remove_hole_from_maps(Holes *holes, Hole h) {
hole_size_map_itr i = vt_get(&holes->size_map, h.size);
for (size_t x = 0; x < i.data->val.count; x++) {
if (i.data->val.positions[x] == h.pos) {
remove_hole_from_maps_itr(holes, h, vt_get(&holes->size_map, h.size), x);
return;
}
}
}
static bool
find_hole_to_use(DiskCache *self, const off_t required_sz) {
if (self->holes.largest_hole_size < required_sz) return false;
off_t largest_hole_size = 0;
bool found = false;
ssize_t remove_at = -1;
for (size_t i = 0; i < self->holes.count; i++) {
Hole *h = self->holes.items + i;
if (!found && h->size >= required_sz) {
found = true;
self->currently_writing.val.pos_in_cache_file = h->pos;
bool was_largest_hole = h->size == self->holes.largest_hole_size;
h->size -= required_sz;
h->pos += required_sz;
if (h->size <= self->small_hole_threshold) remove_at = i;
if (!was_largest_hole) {
if (remove_at < 0) return found;
largest_hole_size = self->holes.largest_hole_size;
break;
}
hole_size_map_itr i = vt_get(&self->holes.size_map, required_sz);
if (vt_is_end(i)) {
for (i = vt_first(&self->holes.size_map); !vt_is_end(i); i = vt_next(i)) {
if (i.data->key >= required_sz) break;
}
if (h->size > largest_hole_size) largest_hole_size = h->size;
}
self->holes.largest_hole_size = largest_hole_size;
if (remove_at > -1) remove_i_from_array(self->holes.items, (size_t)remove_at, self->holes.count);
return found;
if (vt_is_end(i)) return false;
Hole h = {.pos=i.data->val.positions[i.data->val.count-1], .size=i.data->key};
remove_hole_from_maps_itr(&self->holes, h, i, i.data->val.count-1);
self->currently_writing.val.pos_in_cache_file = h.pos;
if (required_sz < h.size) {
h.pos += required_sz; h.size -= required_sz;
if (h.size > self->small_hole_threshold) add_hole_to_maps(&self->holes, h);
}
return true;
}
static inline bool
@@ -252,25 +308,46 @@ needs_defrag(DiskCache *self) {
}
static void
add_hole(DiskCache *self, off_t pos, off_t size) {
add_hole(DiskCache *self, const off_t pos, const off_t size) {
if (size <= self->small_hole_threshold) return;
Hole *h;
if (self->holes.count) {
// see if we can find a hole that ends where we start and merge
// look through the prev at most 128 holes for a match
h = &self->holes.items[self->holes.count-1];
for (size_t i = 0; i < MIN(self->holes.count, 128u); i++, h--) {
if (h->pos + h->size == pos) {
h->size += size;
self->holes.largest_hole_size = MAX(self->holes.largest_hole_size, h->size);
return;
if (vt_size(&self->holes.pos_map)) {
// See if we can find a neighboring hole to merge this hole into
// First look for a hole after us
off_t end_pos = pos + size;
hole_pos_map_itr i = vt_get(&self->holes.pos_map, end_pos);
Hole original_hole, new_hole;
bool found = false;
if (vt_is_end(i)) {
// Now look for a hole before us
i = vt_get(&self->holes.end_pos_map, pos);
if (!vt_is_end(i)) {
original_hole.pos = i.data->key - i.data->val; original_hole.size = i.data->val;
new_hole.pos = original_hole.pos; new_hole.size = original_hole.size + size;
found = true;
}
} else {
original_hole.pos = i.data->key; original_hole.size = i.data->val;
new_hole.pos = pos; new_hole.size = original_hole.size + size;
found = true;
// there could be a hole before us as well
i = vt_get(&self->holes.end_pos_map, pos);
if (!vt_is_end(i)) {
self->holes.largest_hole_size = MAX(self->holes.largest_hole_size, new_hole.size);
remove_hole_from_maps(&self->holes, original_hole);
original_hole.pos = i.data->key - i.data->val; original_hole.size = i.data->val;
new_hole.pos = original_hole.pos; new_hole.size += original_hole.size;
}
}
if (found) {
// prevent remove_hole_from_maps updating largest hole size
self->holes.largest_hole_size = MAX(self->holes.largest_hole_size, new_hole.size);
remove_hole_from_maps(&self->holes, original_hole);
add_hole_to_maps(&self->holes, new_hole);
return;
}
}
ensure_space_for(&self->holes, items, Hole, self->holes.count + 16, capacity, 64, false);
h = &self->holes.items[self->holes.count++];
h->pos = pos; h->size = size;
self->holes.largest_hole_size = MAX(self->holes.largest_hole_size, h->size);
Hole h = {.pos=pos, .size=size };
add_hole_to_maps(&self->holes, h);
}
static void
@@ -442,7 +519,7 @@ ensure_state(DiskCache *self) {
return false;
}
}
vt_init(&self->map);
vt_init(&self->map); vt_init(&self->holes.pos_map); vt_init(&self->holes.size_map); vt_init(&self->holes.end_pos_map);
self->fully_initialized = true;
return true;
}
@@ -471,12 +548,11 @@ dealloc(DiskCache* self) {
free_loop_data(&self->loop_data);
self->loop_data_inited = false;
}
vt_cleanup(&self->map);
vt_cleanup(&self->map); cleanup_holes(&self->holes);
if (self->cache_file_fd > -1) {
safe_close(self->cache_file_fd, __FILE__, __LINE__);
self->cache_file_fd = -1;
}
free(self->holes.items); zero_at_ptr(&self->holes);
if (self->currently_writing.val.data) free(self->currently_writing.val.data);
free(self->cache_dir); self->cache_dir = NULL;
Py_TYPE(self)->tp_free((PyObject*)self);
@@ -554,9 +630,8 @@ clear_disk_cache(PyObject *self_) {
if (!ensure_state(self)) return;
mutex(lock);
vt_cleanup(&self->map);
cleanup_holes(&self->holes);
self->total_size = 0;
self->holes.count = 0;
self->holes.largest_hole_size = 0;
if (self->cache_file_fd > -1) add_hole(self, 0, size_of_cache_file(self));
mutex(unlock);
wakeup_write_loop(self);
@@ -737,15 +812,15 @@ static PyObject*
holes(PyObject *self_, PyObject *args UNUSED) {
DiskCache *self = (DiskCache*)self_;
mutex(lock);
RAII_PyObject(ans, PyTuple_New(self->holes.count));
RAII_PyObject(ans, PyFrozenSet_New(NULL));
if (ans) {
for (size_t i = 0; i < self->holes.count; i++) {
PyObject *t = Py_BuildValue("LL", (long long)self->holes.items[i].pos, (long long)self->holes.items[i].size);
if (!t) return NULL;
PyTuple_SetItem(ans, i, t);
for (hole_pos_map_itr i = vt_first(&self->holes.pos_map); !vt_is_end(i); i = vt_next(i)) {
RAII_PyObject(t, Py_BuildValue("LL", (long long)i.data->key, (long long)i.data->val));
if (!t || PySet_Add(ans, t) != 0) break;
}
}
mutex(unlock);
if (PyErr_Occurred()) return NULL;
Py_INCREF(ans);
return ans;
}

View File

@@ -263,7 +263,7 @@ class TestGraphics(BaseTest):
self.assertEqual(sz, dc.size_on_disk())
self.assertEqual(holes, {x[1] for x in dc.holes()})
self.assertEqual(sz, dc.size_on_disk())
# fill holes largest first to ensure small one doesnt go into large accidentally causing fragmentation
# fill holes largest first to ensure small one doesn't go into large accidentally causing fragmentation
for i, x in enumerate(sorted(holes, reverse=True)):
x = 'ABCDEFGH'[i] * x
add(x, x)
@@ -341,6 +341,17 @@ class TestGraphics(BaseTest):
dc.wait_for_write()
self.ae(sz, dc.size_on_disk())
# test hole coalescing
reset()
for i in range(1, 6):
self.assertIsNone(add(i, str(i)*i))
dc.wait_for_write()
remove(2)
remove(4)
self.assertEqual(dc.holes(), {(1, 2), (6, 4)})
remove(3)
self.assertEqual(dc.holes(), {(1, 9)})
def test_suppressing_gr_command_responses(self):
s, g, pl, sl = load_helpers(self)
self.ae(pl('abcd', s=10, v=10, q=1), 'ENODATA:Insufficient image data: 4 < 400')