Mailing List Archive

python/dist/src/Objects unicodeobject.c,2.141,2.142
Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv22440

Modified Files:
unicodeobject.c
Log Message:
Back out 2.140.


Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.141
retrieving revision 2.142
diff -C2 -d -r2.141 -r2.142
*** unicodeobject.c 21 Apr 2002 03:26:37 -0000 2.141
--- unicodeobject.c 21 Apr 2002 09:59:45 -0000 2.142
***************
*** 1173,1176 ****
--- 1173,1182 ----
#endif

+ /* Allocation strategy: we default to Latin-1, then do one resize
+ whenever we hit an order boundary. The assumption is that
+ characters from higher orders usually occur often enough to warrant
+ this.
+ */
+
PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
***************
*** 1180,1222 ****
PyObject *v;
char *p;
! int allocated = 0;
! int i;
!
/* Short-cut for emtpy strings */
if (size == 0)
return PyString_FromStringAndSize(NULL, 0);

! for (i = 0; i < size; ) {
! Py_UCS4 ch = s[i++];
! if (ch < 0x80)
! allocated += 1;
! else if (ch < 0x0800)
! allocated += 2;
! else if (ch < 0x10000) {
! /* Check for high surrogate */
! if (0xD800 <= ch && ch <= 0xDBFF &&
! i != size &&
! 0xDC00 <= s[i] && s[i] <= 0xDFFF) {
! allocated += 1;
! i++;
! }
! allocated += 3;
! } else
! allocated += 4;
! }
!
! v = PyString_FromStringAndSize(NULL, allocated);
if (v == NULL)
return NULL;

p = PyString_AS_STRING(v);
! for (i = 0; i < size; ) {
Py_UCS4 ch = s[i++];

! if (ch < 0x80) {
*p++ = (char) ch;
- }

else if (ch < 0x0800) {
*p++ = (char)(0xc0 | (ch >> 6));
*p++ = (char)(0x80 | (ch & 0x3f));
--- 1186,1212 ----
PyObject *v;
char *p;
! int i = 0;
! int overalloc = 2;
! int len;
!
/* Short-cut for emtpy strings */
if (size == 0)
return PyString_FromStringAndSize(NULL, 0);

! v = PyString_FromStringAndSize(NULL, overalloc * size);
if (v == NULL)
return NULL;

p = PyString_AS_STRING(v);
!
! while (i < size) {
Py_UCS4 ch = s[i++];

! if (ch < 0x80)
! /* Encode ASCII */
*p++ = (char) ch;

else if (ch < 0x0800) {
+ /* Encode Latin-1 */
*p++ = (char)(0xc0 | (ch >> 6));
*p++ = (char)(0x80 | (ch & 0x3f));
***************
*** 1224,1258 ****

else {
!
if (ch < 0x10000) {
! /* Check for high surrogate */
if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
Py_UCS4 ch2 = s[i];
! /* Check for low surrogate */
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
! ch = ((ch - 0xD800)<<10 | (ch2-0xDC00))+0x10000;
! *p++ = (char)((ch >> 18) | 0xf0);
! *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
! *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
! *p++ = (char)(0x80 | (ch & 0x3f));
! i++;
! continue;
}
/* Fall through: handles isolated high surrogates */
}
*p++ = (char)(0xe0 | (ch >> 12));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
!
! } else {
! *p++ = (char)(0xf0 | (ch>>18));
! *p++ = (char)(0x80 | ((ch>>12) & 0x3f));
! *p++ = (char)(0x80 | ((ch>>6) & 0x3f));
! *p++ = (char)(0x80 | (ch & 0x3f));
}
}
}
! assert(p - PyString_AS_STRING(v) == allocated);
return v;
}

--- 1214,1270 ----

else {
! /* Encode UCS2 Unicode ordinals */
if (ch < 0x10000) {
!
! /* Special case: check for high surrogate */
if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
Py_UCS4 ch2 = s[i];
! /* Check for low surrogate and combine the two to
! form a UCS4 value */
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
! ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
! i++;
! goto encodeUCS4;
}
/* Fall through: handles isolated high surrogates */
}
+
+ if (overalloc < 3) {
+ len = (int)(p - PyString_AS_STRING(v));
+ overalloc = 3;
+ if (_PyString_Resize(&v, overalloc * size))
+ goto onError;
+ p = PyString_AS_STRING(v) + len;
+ }
*p++ = (char)(0xe0 | (ch >> 12));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
! continue;
! }
!
! /* Encode UCS4 Unicode ordinals */
! encodeUCS4:
! if (overalloc < 4) {
! len = (int)(p - PyString_AS_STRING(v));
! overalloc = 4;
! if (_PyString_Resize(&v, overalloc * size))
! goto onError;
! p = PyString_AS_STRING(v) + len;
}
+ *p++ = (char)(0xf0 | (ch >> 18));
+ *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
+ *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+ *p++ = (char)(0x80 | (ch & 0x3f));
}
}
! *p = '\0';
! assert((p - PyString_AS_STRING(v)) <= overalloc*size);
! if (_PyString_Resize(&v, (int)(p - PyString_AS_STRING(v))))
! goto onError;
return v;
+
+ onError:
+ Py_DECREF(v);
+ return NULL;
}