gh-129173: Use `_PyUnicodeError_GetParams` in `PyCodec_SurrogateEscapeErrors` (GH-129175)

This commit is contained in:
Bénédikt Tran 2025-02-20 14:18:47 +01:00 committed by GitHub
parent 519c2c6740
commit e24a1ac17c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 83 additions and 66 deletions

View File

@ -1359,77 +1359,92 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
} }
static PyObject * // --- handler: 'surrogateescape' ---------------------------------------------
PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { static PyObject *
char *outp; _PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc)
if (PyUnicodeEncodeError_GetStart(exc, &start)) {
return NULL; PyObject *obj;
if (PyUnicodeEncodeError_GetEnd(exc, &end)) Py_ssize_t start, end, slen;
return NULL; if (_PyUnicodeError_GetParams(exc,
if (!(object = PyUnicodeEncodeError_GetObject(exc))) &obj, NULL,
return NULL; &start, &end, &slen, false) < 0)
res = PyBytes_FromStringAndSize(NULL, end-start); {
if (!res) {
Py_DECREF(object);
return NULL; return NULL;
} }
outp = PyBytes_AsString(res);
for (i = start; i < end; i++) { PyObject *res = PyBytes_FromStringAndSize(NULL, slen);
/* object is guaranteed to be "ready" */ if (res == NULL) {
Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); Py_DECREF(obj);
return NULL;
}
char *outp = PyBytes_AsString(res);
for (Py_ssize_t i = start; i < end; i++) {
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
if (ch < 0xdc80 || ch > 0xdcff) { if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */ /* Not a UTF-8b surrogate, fail with original exception. */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc); Py_DECREF(obj);
Py_DECREF(res); Py_DECREF(res);
Py_DECREF(object); PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL; return NULL;
} }
*outp++ = ch - 0xdc00; *outp++ = ch - 0xdc00;
} }
restuple = Py_BuildValue("(On)", res, end); Py_DECREF(obj);
Py_DECREF(res);
Py_DECREF(object); return Py_BuildValue("(Nn)", res, end);
return restuple;
} }
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
PyObject *str;
const unsigned char *p; static PyObject *
_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc)
{
PyObject *obj;
Py_ssize_t start, end, slen;
if (_PyUnicodeError_GetParams(exc,
&obj, NULL,
&start, &end, &slen, true) < 0)
{
return NULL;
}
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
int consumed = 0; int consumed = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start)) const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
return NULL; while (consumed < 4 && consumed < slen) {
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
return NULL;
p = (const unsigned char*)PyBytes_AS_STRING(object);
while (consumed < 4 && consumed < end-start) {
/* Refuse to escape ASCII bytes. */ /* Refuse to escape ASCII bytes. */
if (p[start+consumed] < 128) if (p[start + consumed] < 128) {
break; break;
}
ch[consumed] = 0xdc00 + p[start + consumed]; ch[consumed] = 0xdc00 + p[start + consumed];
consumed++; consumed++;
} }
Py_DECREF(object); Py_DECREF(obj);
if (!consumed) {
/* codec complained about ASCII byte. */ if (consumed == 0) {
/* Codec complained about ASCII byte. */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc); PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL; return NULL;
} }
str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
if (str == NULL) PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
if (str == NULL) {
return NULL; return NULL;
}
return Py_BuildValue("(Nn)", str, start + consumed); return Py_BuildValue("(Nn)", str, start + consumed);
} }
static PyObject *
PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
if (_PyIsUnicodeEncodeError(exc)) {
return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc);
}
else if (_PyIsUnicodeDecodeError(exc)) {
return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc);
}
else { else {
wrong_exception_type(exc); wrong_exception_type(exc);
return NULL; return NULL;
@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
} }
static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) static inline PyObject *
surrogateescape_errors(PyObject *Py_UNUSED(self), PyObject *exc)
{ {
return PyCodec_SurrogateEscapeErrors(exc); return PyCodec_SurrogateEscapeErrors(exc);
} }
PyStatus PyStatus
_PyCodec_InitRegistry(PyInterpreterState *interp) _PyCodec_InitRegistry(PyInterpreterState *interp)
{ {