Skip to content

gh-128213: fast path for bytes creation from list and tuple #132590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6f699b5
gh-128213: fast path for bytes creation from list and tuple
blhsing Dec 24, 2024
18c8e4a
coerce long to char after validation of integer in byte range
blhsing Dec 24, 2024
406fbdb
📜🤖 Added by blurb_it.
blurb-it[bot] Dec 24, 2024
4912a05
Update 2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst
blhsing Dec 24, 2024
56f802e
updated for thread-safety, style choices, function choices and benchm…
blhsing Dec 25, 2024
4e1e3e6
revert to PyLong_AsLongAndOverflow for easier overflow handling
blhsing Dec 25, 2024
bf96d06
fixed issue of a label at the end of a compound statment; revert to u…
blhsing Dec 26, 2024
f3a9423
Add FT test for bytes from list
eendebakpt Mar 26, 2025
8bbc021
Merge branch 'main' into fast-bytes-creation-from-list-tuple-2
eendebakpt Apr 16, 2025
8260c7a
Merge branch 'test_bytes_from_list' into fast-bytes-creation-from-lis…
eendebakpt Apr 16, 2025
bc6f8f2
refactor
eendebakpt Apr 16, 2025
970c10b
refactor
eendebakpt Apr 16, 2025
cb664fe
refactor
eendebakpt Apr 16, 2025
c357217
Update Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issu…
eendebakpt Apr 16, 2025
5d53346
Merge branch 'main' into fast-bytes-creation-from-list-tuple-2
eendebakpt Apr 16, 2025
739987e
Merge branch 'main' into fast-bytes-creation-from-list-tuple-2
eendebakpt May 23, 2025
8b7c5e6
Merge branch 'main' into fast-bytes-creation-from-list-tuple-2
eendebakpt Jun 18, 2025
ab82e24
lint
eendebakpt Jul 1, 2025
2e5c3c1
Merge branch 'main' into fast-bytes-creation-from-list-tuple-2
eendebakpt Jul 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions Lib/test/test_free_threading/test_bytes_object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import unittest
from threading import Thread, Barrier
from test.support import threading_helper

threading_helper.requires_working_threading(module=True)


class BytesThreading(unittest.TestCase):
@threading_helper.reap_threads
def test_conversion_from_list(self):
number_of_threads = 10
number_of_iterations = 10
barrier = Barrier(number_of_threads)

x = [1, 2, 3, 4, 5]
extends = [(ii,) * (2 + ii) for ii in range(number_of_threads)]

def work(ii):
barrier.wait()
for _ in range(1000):
bytes(x)
x.extend(extends[ii])
if len(x) > 10:
x[:] = [0]

for it in range(number_of_iterations):
worker_threads = []
for ii in range(number_of_threads):
worker_threads.append(Thread(target=work, args=[ii]))
with threading_helper.start_threads(worker_threads):
pass

barrier.reset()


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers by 27-31%.

Patch by Ben Hsing and Pieter Eendebak
98 changes: 28 additions & 70 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_ceval.h" // _PyEval_GetBuiltin()
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST()
#include "pycore_format.h" // F_LJUST
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
#include "pycore_initconfig.h" // _PyStatus_OK()
Expand Down Expand Up @@ -2859,82 +2860,35 @@ _PyBytes_FromBuffer(PyObject *x)
}

static PyObject*
_PyBytes_FromList(PyObject *x)
_PyBytes_FromSequence_lock_held(PyObject *x)
{
Py_ssize_t i, size = PyList_GET_SIZE(x);
Py_ssize_t value;
char *str;
PyObject *item;
_PyBytesWriter writer;

_PyBytesWriter_Init(&writer);
str = _PyBytesWriter_Alloc(&writer, size);
if (str == NULL)
Py_ssize_t size = PySequence_Fast_GET_SIZE(x);
PyObject *bytes = _PyBytes_FromSize(size, 0);
if (bytes == NULL) {
return NULL;
writer.overallocate = 1;
size = writer.allocated;

for (i = 0; i < PyList_GET_SIZE(x); i++) {
item = PyList_GET_ITEM(x, i);
Py_INCREF(item);
value = PyNumber_AsSsize_t(item, NULL);
Py_DECREF(item);
if (value == -1 && PyErr_Occurred())
goto error;

if (value < 0 || value >= 256) {
PyErr_SetString(PyExc_ValueError,
"bytes must be in range(0, 256)");
goto error;
}
char *str = PyBytes_AS_STRING(bytes);
PyObject *const *items = PySequence_Fast_ITEMS(x);
for (Py_ssize_t i = 0; i < size; i++) {
if (!PyLong_Check(items[i])) {
Py_DECREF(bytes);
/* Py_None as a fallback sentinel to the slow path */
Py_RETURN_NONE;
}

if (i >= size) {
str = _PyBytesWriter_Resize(&writer, str, size+1);
if (str == NULL)
return NULL;
size = writer.allocated;
Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL);
if (value == -1 && PyErr_Occurred()) {
Py_DECREF(bytes);
return NULL;
}
*str++ = (char) value;
}
return _PyBytesWriter_Finish(&writer, str);

error:
_PyBytesWriter_Dealloc(&writer);
return NULL;
}

static PyObject*
_PyBytes_FromTuple(PyObject *x)
{
PyObject *bytes;
Py_ssize_t i, size = PyTuple_GET_SIZE(x);
Py_ssize_t value;
char *str;
PyObject *item;

bytes = PyBytes_FromStringAndSize(NULL, size);
if (bytes == NULL)
return NULL;
str = ((PyBytesObject *)bytes)->ob_sval;

for (i = 0; i < size; i++) {
item = PyTuple_GET_ITEM(x, i);
value = PyNumber_AsSsize_t(item, NULL);
if (value == -1 && PyErr_Occurred())
goto error;

if (value < 0 || value >= 256) {
PyErr_SetString(PyExc_ValueError,
"bytes must be in range(0, 256)");
goto error;
Py_DECREF(bytes);
return NULL;
}
*str++ = (char) value;
}
return bytes;

error:
Py_DECREF(bytes);
return NULL;
}

static PyObject *
Expand Down Expand Up @@ -3017,11 +2971,15 @@ PyBytes_FromObject(PyObject *x)
if (PyObject_CheckBuffer(x))
return _PyBytes_FromBuffer(x);

if (PyList_CheckExact(x))
return _PyBytes_FromList(x);

if (PyTuple_CheckExact(x))
return _PyBytes_FromTuple(x);
if (PyList_CheckExact(x) || PyTuple_CheckExact(x)) {
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(x);
result = _PyBytes_FromSequence_lock_held(x);
Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
/* Py_None as a fallback sentinel to the slow path */
if (result != Py_None) {
return result;
}
}

if (!PyUnicode_Check(x)) {
it = PyObject_GetIter(x);
Expand Down
Loading