mirror of https://github.com/python/cpython.git
gh-129173: Use `_PyUnicodeError_GetParams` in `PyCodec_SurrogateEscapeErrors` (GH-129175)
This commit is contained in:
parent
519c2c6740
commit
e24a1ac17c
149
Python/codecs.c
149
Python/codecs.c
|
@ -1359,76 +1359,91 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
// --- handler: 'surrogateescape' ---------------------------------------------
|
||||||
PyCodec_SurrogateEscapeErrors(PyObject *exc)
|
|
||||||
{
|
|
||||||
PyObject *restuple;
|
|
||||||
PyObject *object;
|
|
||||||
Py_ssize_t i;
|
|
||||||
Py_ssize_t start;
|
|
||||||
Py_ssize_t end;
|
|
||||||
PyObject *res;
|
|
||||||
|
|
||||||
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
|
static PyObject *
|
||||||
char *outp;
|
_PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc)
|
||||||
if (PyUnicodeEncodeError_GetStart(exc, &start))
|
{
|
||||||
return NULL;
|
PyObject *obj;
|
||||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
Py_ssize_t start, end, slen;
|
||||||
return NULL;
|
if (_PyUnicodeError_GetParams(exc,
|
||||||
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
|
&obj, NULL,
|
||||||
return NULL;
|
&start, &end, &slen, false) < 0)
|
||||||
res = PyBytes_FromStringAndSize(NULL, end-start);
|
{
|
||||||
if (!res) {
|
return NULL;
|
||||||
Py_DECREF(object);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
outp = PyBytes_AsString(res);
|
|
||||||
for (i = start; i < end; i++) {
|
|
||||||
/* object is guaranteed to be "ready" */
|
|
||||||
Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
|
|
||||||
if (ch < 0xdc80 || ch > 0xdcff) {
|
|
||||||
/* Not a UTF-8b surrogate, fail with original exception */
|
|
||||||
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
|
||||||
Py_DECREF(res);
|
|
||||||
Py_DECREF(object);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
*outp++ = ch - 0xdc00;
|
|
||||||
}
|
|
||||||
restuple = Py_BuildValue("(On)", res, end);
|
|
||||||
Py_DECREF(res);
|
|
||||||
Py_DECREF(object);
|
|
||||||
return restuple;
|
|
||||||
}
|
}
|
||||||
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
|
|
||||||
PyObject *str;
|
PyObject *res = PyBytes_FromStringAndSize(NULL, slen);
|
||||||
const unsigned char *p;
|
if (res == NULL) {
|
||||||
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
|
Py_DECREF(obj);
|
||||||
int consumed = 0;
|
return NULL;
|
||||||
if (PyUnicodeDecodeError_GetStart(exc, &start))
|
}
|
||||||
return NULL;
|
|
||||||
if (PyUnicodeDecodeError_GetEnd(exc, &end))
|
char *outp = PyBytes_AsString(res);
|
||||||
return NULL;
|
for (Py_ssize_t i = start; i < end; i++) {
|
||||||
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
|
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
|
||||||
return NULL;
|
if (ch < 0xdc80 || ch > 0xdcff) {
|
||||||
p = (const unsigned char*)PyBytes_AS_STRING(object);
|
/* Not a UTF-8b surrogate, fail with original exception. */
|
||||||
while (consumed < 4 && consumed < end-start) {
|
Py_DECREF(obj);
|
||||||
/* Refuse to escape ASCII bytes. */
|
Py_DECREF(res);
|
||||||
if (p[start+consumed] < 128)
|
|
||||||
break;
|
|
||||||
ch[consumed] = 0xdc00 + p[start+consumed];
|
|
||||||
consumed++;
|
|
||||||
}
|
|
||||||
Py_DECREF(object);
|
|
||||||
if (!consumed) {
|
|
||||||
/* codec complained about ASCII byte. */
|
|
||||||
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
|
*outp++ = ch - 0xdc00;
|
||||||
if (str == NULL)
|
}
|
||||||
return NULL;
|
Py_DECREF(obj);
|
||||||
return Py_BuildValue("(Nn)", str, start+consumed);
|
|
||||||
|
return Py_BuildValue("(Nn)", res, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc)
|
||||||
|
{
|
||||||
|
PyObject *obj;
|
||||||
|
Py_ssize_t start, end, slen;
|
||||||
|
if (_PyUnicodeError_GetParams(exc,
|
||||||
|
&obj, NULL,
|
||||||
|
&start, &end, &slen, true) < 0)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
|
||||||
|
int consumed = 0;
|
||||||
|
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
|
||||||
|
while (consumed < 4 && consumed < slen) {
|
||||||
|
/* Refuse to escape ASCII bytes. */
|
||||||
|
if (p[start + consumed] < 128) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ch[consumed] = 0xdc00 + p[start + consumed];
|
||||||
|
consumed++;
|
||||||
|
}
|
||||||
|
Py_DECREF(obj);
|
||||||
|
|
||||||
|
if (consumed == 0) {
|
||||||
|
/* Codec complained about ASCII byte. */
|
||||||
|
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
|
||||||
|
if (str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return Py_BuildValue("(Nn)", str, start + consumed);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
PyCodec_SurrogateEscapeErrors(PyObject *exc)
|
||||||
|
{
|
||||||
|
if (_PyIsUnicodeEncodeError(exc)) {
|
||||||
|
return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc);
|
||||||
|
}
|
||||||
|
else if (_PyIsUnicodeDecodeError(exc)) {
|
||||||
|
return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
wrong_exception_type(exc);
|
wrong_exception_type(exc);
|
||||||
|
@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
|
static inline PyObject *
|
||||||
|
surrogateescape_errors(PyObject *Py_UNUSED(self), PyObject *exc)
|
||||||
{
|
{
|
||||||
return PyCodec_SurrogateEscapeErrors(exc);
|
return PyCodec_SurrogateEscapeErrors(exc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PyStatus
|
PyStatus
|
||||||
_PyCodec_InitRegistry(PyInterpreterState *interp)
|
_PyCodec_InitRegistry(PyInterpreterState *interp)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue