Generator for loading signatures

Also dont waste memory when no output is expected
This commit is contained in:
Kovid Goyal
2021-09-18 13:34:20 +05:30
parent 77508bfe0d
commit bcd1837924
3 changed files with 33 additions and 8 deletions

View File

@@ -3,9 +3,12 @@
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
import os
from typing import Iterator
from typing import Generator, Iterator, Optional
from .rsync import IO_BUFFER_SIZE, RsyncError, begin_create_signature, iter_job
from .rsync import (
IO_BUFFER_SIZE, RsyncError, SignatureCapsule, begin_create_signature,
begin_load_signature, iter_job, make_hash_table
)
def signature_of_file(path: str) -> Iterator[bytes]:
@@ -22,9 +25,28 @@ def signature_of_file(path: str) -> Iterator[bytes]:
if prev_unused_input:
input_data = prev_unused_input + input_data
prev_unused_input = b''
output, finished, sz_of_unused_input = iter_job(job, input_data)
output, finished, sz_of_unused_input = iter_job(job, input_data, no_more_data)
if sz_of_unused_input > 0 and not finished:
if no_more_data:
raise RsyncError(f"{sz_of_unused_input} bytes of input data were not used")
prev_unused_input = input_data[-sz_of_unused_input:]
yield output
def load_signature() -> Generator[Optional[SignatureCapsule], bytes, None]:
job, signature = begin_load_signature()
finished = False
prev_unused_input = b''
while not finished:
input_data = yield None
no_more_data = not input_data
if prev_unused_input:
input_data = prev_unused_input + input_data
prev_unused_input = b''
output, finished, sz_of_unused_input = iter_job(job, input_data, no_more_data, False)
if sz_of_unused_input > 0 and not finished:
if no_more_data:
raise RsyncError(f"{sz_of_unused_input} bytes of input data were not used")
prev_unused_input = input_data[-sz_of_unused_input:]
make_hash_table(signature)
yield signature

View File

@@ -54,12 +54,15 @@ static PyObject*
iter_job(PyObject *self UNUSED, PyObject *args) {
Py_ssize_t input_data_size;
char *input_data;
int eof = -1;
int eof = -1, expecting_output = 1;
PyObject *job_capsule;
if (!PyArg_ParseTuple(args, "O!y#|p", &PyCapsule_Type, &job_capsule, &input_data, &input_data_size, &eof)) return NULL;
if (!PyArg_ParseTuple(args, "O!y#|pp", &PyCapsule_Type, &job_capsule, &input_data, &input_data_size, &eof, expecting_output)) return NULL;
GET_JOB_FROM_CAPSULE;
if (eof == -1) eof = input_data_size > 0 ? 0 : 1;
rs_buffers_t buffer = {.avail_in=input_data_size, .next_in = input_data, .eof_in=eof, .avail_out=MAX(IO_BUFFER_SIZE, 2 * (size_t)input_data_size)};
rs_buffers_t buffer = {
.avail_in=input_data_size, .next_in = input_data, .eof_in=eof,
.avail_out=expecting_output ? (MAX(IO_BUFFER_SIZE, 2 * (size_t)input_data_size)) : 64
};
PyObject *ans = PyBytes_FromStringAndSize(NULL, buffer.avail_out);
if (!ans) return NULL;
buffer.next_out = PyBytes_AS_STRING(ans);
@@ -71,7 +74,7 @@ iter_job(PyObject *self UNUSED, PyObject *args) {
output_size += before - buffer.avail_out;
if (result == RS_DONE || result == RS_BLOCKED) break;
if (buffer.avail_in) {
if (_PyBytes_Resize(&ans, PyBytes_GET_SIZE(ans) * 2) != 0) return NULL;
if (_PyBytes_Resize(&ans, MAX(IO_BUFFER_SIZE, (size_t)PyBytes_GET_SIZE(ans) * 2)) != 0) return NULL;
buffer.avail_out = PyBytes_GET_SIZE(ans) - output_size;
buffer.next_out = PyBytes_AS_STRING(ans) + output_size;
continue;

View File

@@ -27,5 +27,5 @@ def make_hash_table(sig: SignatureCapsule) -> None:
pass
def iter_job(job_capsule: JobCapsule, input_data: bytes, eof: Optional[bool] = None) -> Tuple[bytes, bool, int]:
def iter_job(job_capsule: JobCapsule, input_data: bytes, eof: Optional[bool] = None, expecting_output: bool = True) -> Tuple[bytes, bool, int]:
pass