move rsync signature hashmap to verstable

This commit is contained in:
Kovid Goyal
2024-07-11 20:37:17 +05:30
parent 50ad685ed9
commit dca2663500

View File

@@ -8,7 +8,6 @@
#include "data-types.h" #include "data-types.h"
#include "binary.h" #include "binary.h"
#include "kitty-uthash.h"
#include <math.h> #include <math.h>
#include <xxhash.h> #include <xxhash.h>
@@ -433,12 +432,16 @@ PyTypeObject Patcher_Type = {
// Differ {{{ // Differ {{{
typedef struct Signature { uint64_t index, strong_hash; } Signature; typedef struct Signature { uint64_t index, strong_hash; } Signature;
typedef struct SignatureMap { typedef struct SignatureVal {
int weak_hash;
Signature sig, *weak_hash_collisions; Signature sig, *weak_hash_collisions;
size_t len, cap; size_t len, cap;
UT_hash_handle hh; } SignatureVal;
} SignatureMap; #define NAME SignatureMap
#define KEY_TY int
#define VAL_TY SignatureVal
static void free_signature_val(SignatureVal x) { free(x.weak_hash_collisions); }
#define VAL_DTOR_FN free_signature_val
#include "kitty-verstable.h"
typedef struct Differ { typedef struct Differ {
PyObject_HEAD PyObject_HEAD
@@ -447,7 +450,7 @@ typedef struct Differ {
Rsync rsync; Rsync rsync;
bool signature_header_parsed; bool signature_header_parsed;
buffer buf; buffer buf;
SignatureMap *signature_map; SignatureMap signature_map;
PyObject *read, *write; PyObject *read, *write;
bool written, finished; bool written, finished;
@@ -458,30 +461,21 @@ typedef struct Differ {
static int static int
Differ_init(PyObject *s, PyObject *args, PyObject *kwds) { Differ_init(PyObject *s, PyObject *args, PyObject *kwds) {
Patcher *self = (Patcher*)s; Differ *self = (Differ*)s;
static char *kwlist[] = {NULL}; static char *kwlist[] = {NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return -1; if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return -1;
const char *err = init_rsync(&self->rsync, default_block_size, 0, 0); const char *err = init_rsync(&self->rsync, default_block_size, 0, 0);
if (err != NULL) { PyErr_SetString(RsyncError, err); return -1; } if (err != NULL) { PyErr_SetString(RsyncError, err); return -1; }
vt_init(&self->signature_map);
return 0; return 0;
} }
static void
free_sigmap(SignatureMap *map) {
SignatureMap *current, *tmp;
HASH_ITER(hh, map, current, tmp) {
HASH_DEL(map, current);
free(current->weak_hash_collisions);
free(current);
}
}
static void static void
Differ_dealloc(PyObject *self) { Differ_dealloc(PyObject *self) {
Differ *p = (Differ*)self; Differ *p = (Differ*)self;
if (p->buf.data) free(p->buf.data); if (p->buf.data) free(p->buf.data);
free_rsync(&p->rsync); free_rsync(&p->rsync);
if (p->signature_map) free_sigmap(p->signature_map); vt_cleanup(&p->signature_map);
Py_TYPE(self)->tp_free(self); Py_TYPE(self)->tp_free(self);
} }
@@ -510,7 +504,7 @@ parse_signature_header(Differ *self) {
} }
static bool static bool
add_collision(SignatureMap *sm, Signature s) { add_collision(SignatureVal *sm, Signature s) {
if (sm->cap < sm->len + 1) { if (sm->cap < sm->len + 1) {
size_t new_cap = MAX(sm->cap * 2, 8u); size_t new_cap = MAX(sm->cap * 2, 8u);
sm->weak_hash_collisions = realloc(sm->weak_hash_collisions, new_cap * sizeof(sm->weak_hash_collisions[0])); sm->weak_hash_collisions = realloc(sm->weak_hash_collisions, new_cap * sizeof(sm->weak_hash_collisions[0]));
@@ -525,17 +519,14 @@ static size_t
parse_signature_block(Differ *self, uint8_t *data, size_t len) { parse_signature_block(Differ *self, uint8_t *data, size_t len) {
if (len < 20) return 0; if (len < 20) return 0;
int weak_hash = le32dec(data + 8); int weak_hash = le32dec(data + 8);
SignatureMap *sm = NULL; SignatureMap_itr i = vt_get(&self->signature_map, weak_hash);
HASH_FIND_INT(self->signature_map, &weak_hash, sm); if (vt_is_end(i)) {
if (sm == NULL) { SignatureVal s = {0};
sm = calloc(1, sizeof(SignatureMap)); s.sig.index = le64dec(data);
if (sm == NULL) { PyErr_NoMemory(); return 0; } s.sig.strong_hash = le64dec(data+12);
sm->weak_hash = weak_hash; vt_insert(&self->signature_map, weak_hash, s);
sm->sig.index = le64dec(data);
sm->sig.strong_hash = le64dec(data+12);
HASH_ADD_INT(self->signature_map, weak_hash, sm);
} else { } else {
if (!add_collision(sm, (Signature){.index=le64dec(data), .strong_hash=le64dec(data+12)})) return 0; if (!add_collision(&i.data->val, (Signature){.index=le64dec(data), .strong_hash=le64dec(data+12)})) return 0;
} }
return 20; return 20;
} }
@@ -659,7 +650,7 @@ ensure_idx_valid(Differ *self, size_t idx) {
} }
static bool static bool
find_strong_hash(SignatureMap *sm, uint64_t q, uint64_t *block_index) { find_strong_hash(const SignatureVal *sm, uint64_t q, uint64_t *block_index) {
if (sm->sig.strong_hash == q) { *block_index = sm->sig.index; return true; } if (sm->sig.strong_hash == q) { *block_index = sm->sig.index; return true; }
for (size_t i = 0; i < sm->len; i++) { for (size_t i = 0; i < sm->len; i++) {
if (sm->weak_hash_collisions[i].strong_hash == q) { *block_index = sm->weak_hash_collisions[i].index; return true; } if (sm->weak_hash_collisions[i].strong_hash == q) { *block_index = sm->weak_hash_collisions[i].index; return true; }
@@ -734,11 +725,10 @@ read_next(Differ *self) {
self->window.sz = self->rsync.block_size; self->window.sz = self->rsync.block_size;
rolling_checksum_full(&self->rc, self->buf.data + self->window.pos, self->window.sz); rolling_checksum_full(&self->rc, self->buf.data + self->window.pos, self->window.sz);
} }
SignatureMap *sm = NULL;
int weak_hash = self->rc.val; int weak_hash = self->rc.val;
uint64_t block_index = 0; uint64_t block_index = 0;
HASH_FIND_INT(self->signature_map, &weak_hash, sm); SignatureMap_itr i = vt_get(&self->signature_map, weak_hash);
if (sm != NULL && find_strong_hash(sm, self->rsync.hasher.oneshot64(self->buf.data + self->window.pos, self->window.sz), &block_index)) { if (!vt_is_end(i) && find_strong_hash(&i.data->val, self->rsync.hasher.oneshot64(self->buf.data + self->window.pos, self->window.sz), &block_index)) {
if (!send_data(self)) return false; if (!send_data(self)) return false;
if (!enqueue(self, (Operation){.type=OpBlock, .block_index=block_index})) return false; if (!enqueue(self, (Operation){.type=OpBlock, .block_index=block_index})) return false;
self->window.pos += self->window.sz; self->window.pos += self->window.sz;