Mailing List Archive

gh-117431: Adapt bytes and bytearray .startswith() and .endswith() to Argument Clinic (#117495)
https://github.com/python/cpython/commit/595bb496b0504429cf01a76fd1ada718d9dd25ca
commit: 595bb496b0504429cf01a76fd1ada718d9dd25ca
branch: main
author: Erlend E. Aasland <erlend@python.org>
committer: erlend-aasland <erlend.aasland@protonmail.com>
date: 2024-04-03T13:11:14+02:00
summary:

gh-117431: Adapt bytes and bytearray .startswith() and .endswith() to Argument Clinic (#117495)

This change gives a significant speedup, as the METH_FASTCALL calling
convention is now used.

files:
A Misc/NEWS.d/next/Core and Builtins/2024-04-03-09-49-15.gh-issue-117431.WAqRgc.rst
M Include/internal/pycore_bytes_methods.h
M Objects/bytearrayobject.c
M Objects/bytes_methods.c
M Objects/bytesobject.c
M Objects/clinic/bytearrayobject.c.h
M Objects/clinic/bytesobject.c.h

diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h
index 11e8ab20e91367..b9c0a4e2b2f77d 100644
--- a/Include/internal/pycore_bytes_methods.h
+++ b/Include/internal/pycore_bytes_methods.h
@@ -32,8 +32,12 @@ extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args
extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args);
extern int _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg);
-extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args);
-extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args);
+extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len,
+ PyObject *subobj, Py_ssize_t start,
+ Py_ssize_t end);
+extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len,
+ PyObject *subobj, Py_ssize_t start,
+ Py_ssize_t end);

/* The maketrans() static method. */
extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to);
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-03-09-49-15.gh-issue-117431.WAqRgc.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-09-49-15.gh-issue-117431.WAqRgc.rst
new file mode 100644
index 00000000000000..17374d0d5c575b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-09-49-15.gh-issue-117431.WAqRgc.rst
@@ -0,0 +1,6 @@
+Improve the performance of the following :class:`bytes` and
+:class:`bytearray` methods by adapting them to the :c:macro:`METH_FASTCALL`
+calling convention:
+
+* :meth:`!endswith`
+* :meth:`!startswith`
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 5e3b3affbc76c5..8639496727536a 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1186,16 +1186,52 @@ bytearray_contains(PyObject *self, PyObject *arg)
return _Py_bytes_contains(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), arg);
}

+/*[clinic input]
+@text_signature "($self, prefix[, start[, end]], /)"
+bytearray.startswith
+
+ prefix as subobj: object
+ A bytes or a tuple of bytes to try.
+ start: slice_index(accept={int, NoneType}, c_default='0') = None
+ Optional start position. Default: start of the bytearray.
+ end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
+ Optional stop position. Default: end of the bytearray.
+ /
+
+Return True if the bytearray starts with the specified prefix, False otherwise.
+[clinic start generated code]*/
+
static PyObject *
-bytearray_startswith(PyByteArrayObject *self, PyObject *args)
+bytearray_startswith_impl(PyByteArrayObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end)
+/*[clinic end generated code: output=a3d9b6d44d3662a6 input=76385e0b376b45c1]*/
{
- return _Py_bytes_startswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
+ return _Py_bytes_startswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
+ subobj, start, end);
}

+/*[clinic input]
+@text_signature "($self, suffix[, start[, end]], /)"
+bytearray.endswith
+
+ suffix as subobj: object
+ A bytes or a tuple of bytes to try.
+ start: slice_index(accept={int, NoneType}, c_default='0') = None
+ Optional start position. Default: start of the bytearray.
+ end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
+ Optional stop position. Default: end of the bytearray.
+ /
+
+Return True if the bytearray ends with the specified suffix, False otherwise.
+[clinic start generated code]*/
+
static PyObject *
-bytearray_endswith(PyByteArrayObject *self, PyObject *args)
+bytearray_endswith_impl(PyByteArrayObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end)
+/*[clinic end generated code: output=e75ea8c227954caa input=9b8baa879aa3d74b]*/
{
- return _Py_bytes_endswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
+ return _Py_bytes_endswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
+ subobj, start, end);
}

/*[clinic input]
@@ -2203,8 +2239,7 @@ bytearray_methods[] = {
{"count", (PyCFunction)bytearray_count, METH_VARARGS,
_Py_count__doc__},
BYTEARRAY_DECODE_METHODDEF
- {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS,
- _Py_endswith__doc__},
+ BYTEARRAY_ENDSWITH_METHODDEF
STRINGLIB_EXPANDTABS_METHODDEF
BYTEARRAY_EXTEND_METHODDEF
{"find", (PyCFunction)bytearray_find, METH_VARARGS,
@@ -2249,8 +2284,7 @@ bytearray_methods[] = {
BYTEARRAY_RSTRIP_METHODDEF
BYTEARRAY_SPLIT_METHODDEF
BYTEARRAY_SPLITLINES_METHODDEF
- {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
- _Py_startswith__doc__},
+ BYTEARRAY_STARTSWITH_METHODDEF
BYTEARRAY_STRIP_METHODDEF
{"swapcase", stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c
index c1bc6383df30ce..21b6668171bf61 100644
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -771,66 +771,47 @@ tailmatch(const char *str, Py_ssize_t len, PyObject *substr,

static PyObject *
_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
- const char *function_name, PyObject *args,
+ const char *function_name, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end,
int direction)
{
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *subobj = NULL;
- int result;
-
- if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
- return NULL;
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
- start, end, direction);
- if (result == -1)
+ PyObject *item = PyTuple_GET_ITEM(subobj, i);
+ int result = tailmatch(str, len, item, start, end, direction);
+ if (result < 0) {
return NULL;
+ }
else if (result) {
Py_RETURN_TRUE;
}
}
Py_RETURN_FALSE;
}
- result = tailmatch(str, len, subobj, start, end, direction);
+ int result = tailmatch(str, len, subobj, start, end, direction);
if (result == -1) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
PyErr_Format(PyExc_TypeError,
"%s first arg must be bytes or a tuple of bytes, "
"not %s",
function_name, Py_TYPE(subobj)->tp_name);
+ }
return NULL;
}
- else
- return PyBool_FromLong(result);
+ return PyBool_FromLong(result);
}

-PyDoc_STRVAR_shared(_Py_startswith__doc__,
-"B.startswith(prefix[, start[, end]]) -> bool\n\
-\n\
-Return True if B starts with the specified prefix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-prefix can also be a tuple of bytes to try.");
-
PyObject *
-_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
+_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end)
{
- return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
+ return _Py_bytes_tailmatch(str, len, "startswith", subobj, start, end, -1);
}

-PyDoc_STRVAR_shared(_Py_endswith__doc__,
-"B.endswith(suffix[, start[, end]]) -> bool\n\
-\n\
-Return True if B ends with the specified suffix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-suffix can also be a tuple of bytes to try.");
-
PyObject *
-_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
+_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end)
{
- return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
+ return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1);
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 256e01f54f0782..d7b0c6b7b01aa9 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -2285,16 +2285,52 @@ bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
return PyBytes_FromStringAndSize(self_start, self_len);
}

+/*[clinic input]
+@text_signature "($self, prefix[, start[, end]], /)"
+bytes.startswith
+
+ prefix as subobj: object
+ A bytes or a tuple of bytes to try.
+ start: slice_index(accept={int, NoneType}, c_default='0') = None
+ Optional start position. Default: start of the bytes.
+ end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
+ Optional stop position. Default: end of the bytes.
+ /
+
+Return True if the bytes starts with the specified prefix, False otherwise.
+[clinic start generated code]*/
+
static PyObject *
-bytes_startswith(PyBytesObject *self, PyObject *args)
+bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end)
+/*[clinic end generated code: output=b1e8da1cbd528e8c input=8a4165df8adfa6c9]*/
{
- return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
+ return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
+ subobj, start, end);
}

+/*[clinic input]
+@text_signature "($self, suffix[, start[, end]], /)"
+bytes.endswith
+
+ suffix as subobj: object
+ A bytes or a tuple of bytes to try.
+ start: slice_index(accept={int, NoneType}, c_default='0') = None
+ Optional start position. Default: start of the bytes.
+ end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
+ Optional stop position. Default: end of the bytes.
+ /
+
+Return True if the bytes ends with the specified suffix, False otherwise.
+[clinic start generated code]*/
+
static PyObject *
-bytes_endswith(PyBytesObject *self, PyObject *args)
+bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
+ Py_ssize_t end)
+/*[clinic end generated code: output=038b633111f3629d input=b5c3407a2a5c9aac]*/
{
- return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
+ return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
+ subobj, start, end);
}


@@ -2491,8 +2527,7 @@ bytes_methods[] = {
{"count", (PyCFunction)bytes_count, METH_VARARGS,
_Py_count__doc__},
BYTES_DECODE_METHODDEF
- {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
- _Py_endswith__doc__},
+ BYTES_ENDSWITH_METHODDEF
STRINGLIB_EXPANDTABS_METHODDEF
{"find", (PyCFunction)bytes_find, METH_VARARGS,
_Py_find__doc__},
@@ -2532,8 +2567,7 @@ bytes_methods[] = {
BYTES_RSTRIP_METHODDEF
BYTES_SPLIT_METHODDEF
BYTES_SPLITLINES_METHODDEF
- {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
- _Py_startswith__doc__},
+ BYTES_STARTSWITH_METHODDEF
BYTES_STRIP_METHODDEF
{"swapcase", stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h
index d95245067e2608..dabc2b16c94fce 100644
--- a/Objects/clinic/bytearrayobject.c.h
+++ b/Objects/clinic/bytearrayobject.c.h
@@ -137,6 +137,108 @@ bytearray_copy(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
return bytearray_copy_impl(self);
}

+PyDoc_STRVAR(bytearray_startswith__doc__,
+"startswith($self, prefix[, start[, end]], /)\n"
+"--\n"
+"\n"
+"Return True if the bytearray starts with the specified prefix, False otherwise.\n"
+"\n"
+" prefix\n"
+" A bytes or a tuple of bytes to try.\n"
+" start\n"
+" Optional start position. Default: start of the bytearray.\n"
+" end\n"
+" Optional stop position. Default: end of the bytearray.");
+
+#define BYTEARRAY_STARTSWITH_METHODDEF \
+ {"startswith", _PyCFunction_CAST(bytearray_startswith), METH_FASTCALL, bytearray_startswith__doc__},
+
+static PyObject *
+bytearray_startswith_impl(PyByteArrayObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end);
+
+static PyObject *
+bytearray_startswith(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *subobj;
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+
+ if (!_PyArg_CheckPositional("startswith", nargs, 1, 3)) {
+ goto exit;
+ }
+ subobj = args[0];
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[1], &start)) {
+ goto exit;
+ }
+ if (nargs < 3) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[2], &end)) {
+ goto exit;
+ }
+skip_optional:
+ return_value = bytearray_startswith_impl(self, subobj, start, end);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(bytearray_endswith__doc__,
+"endswith($self, suffix[, start[, end]], /)\n"
+"--\n"
+"\n"
+"Return True if the bytearray ends with the specified suffix, False otherwise.\n"
+"\n"
+" suffix\n"
+" A bytes or a tuple of bytes to try.\n"
+" start\n"
+" Optional start position. Default: start of the bytearray.\n"
+" end\n"
+" Optional stop position. Default: end of the bytearray.");
+
+#define BYTEARRAY_ENDSWITH_METHODDEF \
+ {"endswith", _PyCFunction_CAST(bytearray_endswith), METH_FASTCALL, bytearray_endswith__doc__},
+
+static PyObject *
+bytearray_endswith_impl(PyByteArrayObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end);
+
+static PyObject *
+bytearray_endswith(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *subobj;
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+
+ if (!_PyArg_CheckPositional("endswith", nargs, 1, 3)) {
+ goto exit;
+ }
+ subobj = args[0];
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[1], &start)) {
+ goto exit;
+ }
+ if (nargs < 3) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[2], &end)) {
+ goto exit;
+ }
+skip_optional:
+ return_value = bytearray_endswith_impl(self, subobj, start, end);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(bytearray_removeprefix__doc__,
"removeprefix($self, prefix, /)\n"
"--\n"
@@ -1261,4 +1363,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
{
return bytearray_sizeof_impl(self);
}
-/*[clinic end generated code: output=0797a5e03cda2a16 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0147908e97ebe882 input=a9049054013a1b77]*/
diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h
index 1e45be3e7aefb3..05e182778aece1 100644
--- a/Objects/clinic/bytesobject.c.h
+++ b/Objects/clinic/bytesobject.c.h
@@ -652,6 +652,108 @@ bytes_removesuffix(PyBytesObject *self, PyObject *arg)
return return_value;
}

+PyDoc_STRVAR(bytes_startswith__doc__,
+"startswith($self, prefix[, start[, end]], /)\n"
+"--\n"
+"\n"
+"Return True if the bytes starts with the specified prefix, False otherwise.\n"
+"\n"
+" prefix\n"
+" A bytes or a tuple of bytes to try.\n"
+" start\n"
+" Optional start position. Default: start of the bytes.\n"
+" end\n"
+" Optional stop position. Default: end of the bytes.");
+
+#define BYTES_STARTSWITH_METHODDEF \
+ {"startswith", _PyCFunction_CAST(bytes_startswith), METH_FASTCALL, bytes_startswith__doc__},
+
+static PyObject *
+bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
+ Py_ssize_t start, Py_ssize_t end);
+
+static PyObject *
+bytes_startswith(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *subobj;
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+
+ if (!_PyArg_CheckPositional("startswith", nargs, 1, 3)) {
+ goto exit;
+ }
+ subobj = args[0];
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[1], &start)) {
+ goto exit;
+ }
+ if (nargs < 3) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[2], &end)) {
+ goto exit;
+ }
+skip_optional:
+ return_value = bytes_startswith_impl(self, subobj, start, end);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(bytes_endswith__doc__,
+"endswith($self, suffix[, start[, end]], /)\n"
+"--\n"
+"\n"
+"Return True if the bytes ends with the specified suffix, False otherwise.\n"
+"\n"
+" suffix\n"
+" A bytes or a tuple of bytes to try.\n"
+" start\n"
+" Optional start position. Default: start of the bytes.\n"
+" end\n"
+" Optional stop position. Default: end of the bytes.");
+
+#define BYTES_ENDSWITH_METHODDEF \
+ {"endswith", _PyCFunction_CAST(bytes_endswith), METH_FASTCALL, bytes_endswith__doc__},
+
+static PyObject *
+bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
+ Py_ssize_t end);
+
+static PyObject *
+bytes_endswith(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *subobj;
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+
+ if (!_PyArg_CheckPositional("endswith", nargs, 1, 3)) {
+ goto exit;
+ }
+ subobj = args[0];
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[1], &start)) {
+ goto exit;
+ }
+ if (nargs < 3) {
+ goto skip_optional;
+ }
+ if (!_PyEval_SliceIndex(args[2], &end)) {
+ goto exit;
+ }
+skip_optional:
+ return_value = bytes_endswith_impl(self, subobj, start, end);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(bytes_decode__doc__,
"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
"--\n"
@@ -1029,4 +1131,4 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
exit:
return return_value;
}
-/*[clinic end generated code: output=8a49dbbd78914a6f input=a9049054013a1b77]*/
+/*[clinic end generated code: output=f2b10ccd2e3155c3 input=a9049054013a1b77]*/

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com