gh-129173: Use `_PyUnicodeError_GetParams` in `PyCodec_SurrogateEscapeErrors` (GH-129175)

This commit is contained in:
Bénédikt Tran 2025-02-20 14:18:47 +01:00 committed by GitHub
parent 519c2c6740
commit e24a1ac17c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 83 additions and 66 deletions

View File

@ -1359,76 +1359,91 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
} }
static PyObject * // --- handler: 'surrogateescape' ---------------------------------------------
PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { static PyObject *
char *outp; _PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc)
if (PyUnicodeEncodeError_GetStart(exc, &start)) {
return NULL; PyObject *obj;
if (PyUnicodeEncodeError_GetEnd(exc, &end)) Py_ssize_t start, end, slen;
return NULL; if (_PyUnicodeError_GetParams(exc,
if (!(object = PyUnicodeEncodeError_GetObject(exc))) &obj, NULL,
return NULL; &start, &end, &slen, false) < 0)
res = PyBytes_FromStringAndSize(NULL, end-start); {
if (!res) { return NULL;
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
for (i = start; i < end; i++) {
/* object is guaranteed to be "ready" */
Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Py_DECREF(res);
Py_DECREF(object);
return NULL;
}
*outp++ = ch - 0xdc00;
}
restuple = Py_BuildValue("(On)", res, end);
Py_DECREF(res);
Py_DECREF(object);
return restuple;
} }
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
PyObject *str; PyObject *res = PyBytes_FromStringAndSize(NULL, slen);
const unsigned char *p; if (res == NULL) {
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ Py_DECREF(obj);
int consumed = 0; return NULL;
if (PyUnicodeDecodeError_GetStart(exc, &start)) }
return NULL;
if (PyUnicodeDecodeError_GetEnd(exc, &end)) char *outp = PyBytes_AsString(res);
return NULL; for (Py_ssize_t i = start; i < end; i++) {
if (!(object = PyUnicodeDecodeError_GetObject(exc))) Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
return NULL; if (ch < 0xdc80 || ch > 0xdcff) {
p = (const unsigned char*)PyBytes_AS_STRING(object); /* Not a UTF-8b surrogate, fail with original exception. */
while (consumed < 4 && consumed < end-start) { Py_DECREF(obj);
/* Refuse to escape ASCII bytes. */ Py_DECREF(res);
if (p[start+consumed] < 128)
break;
ch[consumed] = 0xdc00 + p[start+consumed];
consumed++;
}
Py_DECREF(object);
if (!consumed) {
/* codec complained about ASCII byte. */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc); PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL; return NULL;
} }
str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); *outp++ = ch - 0xdc00;
if (str == NULL) }
return NULL; Py_DECREF(obj);
return Py_BuildValue("(Nn)", str, start+consumed);
return Py_BuildValue("(Nn)", res, end);
}
static PyObject *
_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc)
{
PyObject *obj;
Py_ssize_t start, end, slen;
if (_PyUnicodeError_GetParams(exc,
&obj, NULL,
&start, &end, &slen, true) < 0)
{
return NULL;
}
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
int consumed = 0;
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
while (consumed < 4 && consumed < slen) {
/* Refuse to escape ASCII bytes. */
if (p[start + consumed] < 128) {
break;
}
ch[consumed] = 0xdc00 + p[start + consumed];
consumed++;
}
Py_DECREF(obj);
if (consumed == 0) {
/* Codec complained about ASCII byte. */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
if (str == NULL) {
return NULL;
}
return Py_BuildValue("(Nn)", str, start + consumed);
}
static PyObject *
PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
if (_PyIsUnicodeEncodeError(exc)) {
return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc);
}
else if (_PyIsUnicodeDecodeError(exc)) {
return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc);
} }
else { else {
wrong_exception_type(exc); wrong_exception_type(exc);
@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
} }
static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) static inline PyObject *
surrogateescape_errors(PyObject *Py_UNUSED(self), PyObject *exc)
{ {
return PyCodec_SurrogateEscapeErrors(exc); return PyCodec_SurrogateEscapeErrors(exc);
} }
PyStatus PyStatus
_PyCodec_InitRegistry(PyInterpreterState *interp) _PyCodec_InitRegistry(PyInterpreterState *interp)
{ {