Mailing List Archive

SVN: zope.ucol/trunk/src/zope/ucol/ Updated API to match ICollator in
Log message for revision 40672:
Updated API to match ICollator in
http://dev.zope.org/Zope3/LocaleSpecificTextCollation

Also:

- Made the API accept strings, as long as they are ASCII.

- Added read-only attributes to quert a collator's locale and whether
default collation data was used.

- Changed the key-allocation strategy to require fewer
memory-allocation retries, at least for the (western) test data
used.

- Added code to overcome an apparent buffer-overflow bug in ICU. :/


Changed:
U zope.ucol/trunk/src/zope/ucol/__init__.py
U zope.ucol/trunk/src/zope/ucol/_zope_ucol.c
U zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx
U zope.ucol/trunk/src/zope/ucol/tests.py

-=-
Modified: zope.ucol/trunk/src/zope/ucol/__init__.py
===================================================================
--- zope.ucol/trunk/src/zope/ucol/__init__.py 2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/__init__.py 2005-12-09 21:13:42 UTC (rev 40672)
@@ -18,22 +18,50 @@
It provides locale-based text collation.

To perform collation, you need to create a collator key factory for
-your locale. We'll use the "root" locale:
+your locale. We'll use the special "root" locale in this example:

>>> import zope.ucol
- >>> key = zope.ucol.KeyFactory("root")
+ >>> collator = zope.ucol.Collator("root")

-The factory is a callable for creating collation keys from unicode
-strings. The factory can be passed as the key argument to list.sort
+The collator has a key method for creating collation keys from unicode
+strings. The method can be passed as the key argument to list.sort
or to the built-in sorted function.

>>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim',
- ... u'\U00023119', u'\u62d5'], key=key)
+ ... u'\U00023119', u'\u62d5'], key=collator.key)
[u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim',
u'\u62d5', u'\U00023119']

+There is a cmp method for comparing 2 unicode strings, which can also be
+used when sorting:

+ >>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim',
+ ... u'\U00023119', u'\u62d5'], collator.cmp)
+ [u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim',
+ u'\u62d5', u'\U00023119']
+
+Note that it is almost always more efficient to pass the key method to
+sorting functions, rather than the cmp method. The cmp method is more
+efficient in the special case that strings are long and few and when
+they tend to differ at their beginnings. This is because computing
+the entire key can be much more expensive than comparison when the
+order can be determined based on analyzing a small portion of the
+original strings.
+
+You can ask a collator for it's locale:
+
+ >>> collator.locale
+ 'root'
+
+and you can find out whether default collation information was used:
+
+ >>> collator.used_default_information
+ 0
+ >>> collator = zope.ucol.Collator("eek")
+ >>> collator.used_default_information
+ 1
+
$Id$
"""

-from _zope_ucol import KeyFactory
+from _zope_ucol import Collator

Modified: zope.ucol/trunk/src/zope/ucol/_zope_ucol.c
===================================================================
--- zope.ucol/trunk/src/zope/ucol/_zope_ucol.c 2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/_zope_ucol.c 2005-12-09 21:13:42 UTC (rev 40672)
@@ -1,4 +1,4 @@
-/* Generated by Pyrex 0.9.3 on Wed Dec 7 11:30:33 2005 */
+/* Generated by Pyrex 0.9.3.1 on Fri Dec 9 16:03:01 2005 */

#include "Python.h"
#include "structmember.h"
@@ -15,9 +15,7 @@
typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/
-static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/
static void __Pyx_AddTraceback(char *funcname); /*proto*/
-static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/
static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
@@ -42,33 +40,41 @@
int need_to_free;
};

-staticforward PyTypeObject __pyx_type_10_zope_ucol_KeyFactory;
+staticforward PyTypeObject __pyx_type_10_zope_ucol_Collator;

-struct __pyx_obj_10_zope_ucol_KeyFactory {
+struct __pyx_obj_10_zope_ucol_Collator {
PyObject_HEAD
UCollator (*collator);
+ PyObject *locale;
+ int used_default_information;
};

-static PyTypeObject *__pyx_ptype_10_zope_ucol_unicode = 0;
static PyTypeObject *__pyx_ptype_10_zope_ucol_UCharString = 0;
-static PyTypeObject *__pyx_ptype_10_zope_ucol_KeyFactory = 0;
+static PyTypeObject *__pyx_ptype_10_zope_ucol_Collator = 0;

/* Implementation of _zope_ucol */


static PyObject *__pyx_n_sys;

+static PyObject *__pyx_n_unicode;
+static PyObject *__pyx_n_TypeError;
+static PyObject *__pyx_n_MemoryError;
static PyObject *__pyx_n_ValueError;

static PyObject *__pyx_k2p;
+static PyObject *__pyx_k3p;

-static char (__pyx_k2[]) = "Couldn't convert Python unicode data to ICU unicode data.";
+static char (__pyx_k2[]) = "Expected unicode string";
+static char (__pyx_k3[]) = "Couldn't convert Python unicode data to ICU unicode data.";

static int __pyx_f_10_zope_ucol_11UCharString___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static int __pyx_f_10_zope_ucol_11UCharString___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
- PyUnicodeObject *__pyx_v_text = 0;
+ PyObject *__pyx_v_text = 0;
int32_t __pyx_v_buffsize;
- UErrorCode __pyx_v_status;
+ enum UErrorCode __pyx_v_status;
+ Py_UNICODE (*__pyx_v_str);
+ int __pyx_v_length;
int __pyx_r;
int __pyx_1;
PyObject *__pyx_2 = 0;
@@ -78,64 +84,134 @@
if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_text)) return -1;
Py_INCREF(__pyx_v_self);
Py_INCREF(__pyx_v_text);
- if (!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_text), __pyx_ptype_10_zope_ucol_unicode, 1, "text")) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; goto __pyx_L1;}

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":78 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":86 */
+ __pyx_1 = (!PyUnicode_Check(__pyx_v_text));
+ if (__pyx_1) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":87 */
+ __pyx_1 = PyString_Check(__pyx_v_text);
+ if (__pyx_1) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":88 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_unicode); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+ __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+ Py_INCREF(__pyx_v_text);
+ PyTuple_SET_ITEM(__pyx_3, 0, __pyx_v_text);
+ __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ Py_DECREF(__pyx_3); __pyx_3 = 0;
+ Py_DECREF(__pyx_v_text);
+ __pyx_v_text = __pyx_4;
+ __pyx_4 = 0;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":89 */
+ if (!PyUnicode_Check(__pyx_v_text)) {
+ PyErr_SetNone(PyExc_AssertionError);
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 89; goto __pyx_L1;}
+ }
+ goto __pyx_L3;
+ }
+ /*else*/ {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":91 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_TypeError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+ __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+ Py_INCREF(__pyx_k2p);
+ PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k2p);
+ __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ Py_DECREF(__pyx_3); __pyx_3 = 0;
+ __Pyx_Raise(__pyx_4, 0, 0);
+ Py_DECREF(__pyx_4); __pyx_4 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+ }
+ __pyx_L3:;
+ goto __pyx_L2;
+ }
+ __pyx_L2:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":93 */
+ __pyx_v_length = PyUnicode_GET_SIZE(__pyx_v_text);
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":94 */
+ __pyx_v_str = PyUnicode_AS_UNICODE(__pyx_v_text);
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":97 */
__pyx_1 = ((sizeof(Py_UNICODE )) == 2);
if (__pyx_1) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":79 */
- ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = __pyx_v_text->str;
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":98 */
+ ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = __pyx_v_str;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":80 */
- ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length = __pyx_v_text->length;
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":99 */
+ ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length = __pyx_v_length;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":81 */
- Py_INCREF(((PyObject *)__pyx_v_text));
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":100 */
+ Py_INCREF(__pyx_v_text);
Py_DECREF(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base);
- ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base = ((PyObject *)__pyx_v_text);
+ ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base = __pyx_v_text;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":82 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":101 */
((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free = 0;
- goto __pyx_L2;
+ goto __pyx_L4;
}
/*else*/ {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":84 */
- __pyx_v_buffsize = ((2 * __pyx_v_text->length) + 1);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":103 */
+ __pyx_v_buffsize = ((2 * __pyx_v_length) + 1);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":85 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":104 */
((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = ((UChar (*))PyMem_Malloc((__pyx_v_buffsize * (sizeof(UChar )))));

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":86 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":105 */
+ __pyx_1 = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data == 0);
+ if (__pyx_1) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":106 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; goto __pyx_L1;}
+ __Pyx_Raise(__pyx_2, 0, 0);
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; goto __pyx_L1;}
+ goto __pyx_L5;
+ }
+ __pyx_L5:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":107 */
__pyx_v_status = 0;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":87 */
- u_strFromUTF32(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data,__pyx_v_buffsize,(&((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length),((UChar32 (*))__pyx_v_text->str),__pyx_v_text->length,(&__pyx_v_status));
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":108 */
+ u_strFromUTF32(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data,__pyx_v_buffsize,(&((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length),((UChar32 (*))__pyx_v_str),__pyx_v_length,(&__pyx_v_status));

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":89 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":110 */
+ if (!(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length <= __pyx_v_buffsize)) {
+ PyErr_SetNone(PyExc_AssertionError);
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 110; goto __pyx_L1;}
+ }
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":111 */
((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free = 1;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":90 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":112 */
__pyx_1 = U_FAILURE(__pyx_v_status);
if (__pyx_1) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":91 */
- __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
- __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
- Py_INCREF(__pyx_k2p);
- PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k2p);
- __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
- Py_DECREF(__pyx_2); __pyx_2 = 0;
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":113 */
+ __pyx_3 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+ __pyx_4 = PyTuple_New(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+ Py_INCREF(__pyx_k3p);
+ PyTuple_SET_ITEM(__pyx_4, 0, __pyx_k3p);
+ __pyx_2 = PyObject_CallObject(__pyx_3, __pyx_4); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
Py_DECREF(__pyx_3); __pyx_3 = 0;
- __Pyx_Raise(__pyx_4, 0, 0);
Py_DECREF(__pyx_4); __pyx_4 = 0;
- {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
- goto __pyx_L3;
+ __Pyx_Raise(__pyx_2, 0, 0);
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+ goto __pyx_L6;
}
- __pyx_L3:;
+ __pyx_L6:;
}
- __pyx_L2:;
+ __pyx_L4:;

__pyx_r = 0;
goto __pyx_L0;
@@ -156,17 +232,17 @@
int __pyx_1;
Py_INCREF(__pyx_v_self);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":96 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":118 */
__pyx_1 = ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free;
if (__pyx_1) {
__pyx_1 = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data != 0);
}
if (__pyx_1) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":97 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":119 */
PyMem_Free(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":98 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":120 */
((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = 0;
goto __pyx_L2;
}
@@ -178,89 +254,131 @@
Py_DECREF(__pyx_v_self);
}

-static PyObject *__pyx_k3p;
+static PyObject *__pyx_k4p;
+static PyObject *__pyx_k5p;

-static char (__pyx_k3[]) = "Couldn't create a collator";
+static char (__pyx_k4[]) = "String locale expected";
+static char (__pyx_k5[]) = "Couldn't create a collator";

-static int __pyx_f_10_zope_ucol_10KeyFactory___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static int __pyx_f_10_zope_ucol_10KeyFactory___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
- char (*__pyx_v_locale);
+static int __pyx_f_10_zope_ucol_8Collator___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_f_10_zope_ucol_8Collator___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_locale = 0;
UCollator (*__pyx_v_collator);
- UErrorCode __pyx_v_status;
+ enum UErrorCode __pyx_v_status;
int __pyx_r;
int __pyx_1;
PyObject *__pyx_2 = 0;
PyObject *__pyx_3 = 0;
PyObject *__pyx_4 = 0;
static char *__pyx_argnames[] = {"locale",0};
- if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "s", __pyx_argnames, &__pyx_v_locale)) return -1;
+ if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_locale)) return -1;
Py_INCREF(__pyx_v_self);
+ Py_INCREF(__pyx_v_locale);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":110 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":135 */
+ __pyx_1 = (!PyString_Check(__pyx_v_locale));
+ if (__pyx_1) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":136 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_TypeError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+ __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+ Py_INCREF(__pyx_k4p);
+ PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k4p);
+ __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ Py_DECREF(__pyx_3); __pyx_3 = 0;
+ __Pyx_Raise(__pyx_4, 0, 0);
+ Py_DECREF(__pyx_4); __pyx_4 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+ goto __pyx_L2;
+ }
+ __pyx_L2:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":138 */
__pyx_v_status = U_ZERO_ERROR;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":111 */
- __pyx_v_collator = ucol_open(__pyx_v_locale,(&__pyx_v_status));
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":139 */
+ __pyx_v_collator = ucol_open(PyString_AS_STRING(__pyx_v_locale),(&__pyx_v_status));

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":112 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":140 */
__pyx_1 = U_FAILURE(__pyx_v_status);
if (__pyx_1) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":113 */
- __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
- __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
- Py_INCREF(__pyx_k3p);
- PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k3p);
- __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":141 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+ __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+ Py_INCREF(__pyx_k5p);
+ PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k5p);
+ __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
Py_DECREF(__pyx_2); __pyx_2 = 0;
Py_DECREF(__pyx_3); __pyx_3 = 0;
__Pyx_Raise(__pyx_4, 0, 0);
Py_DECREF(__pyx_4); __pyx_4 = 0;
- {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
- goto __pyx_L2;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+ goto __pyx_L3;
}
- __pyx_L2:;
+ __pyx_L3:;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":114 */
- ((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator = __pyx_v_collator;
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":142 */
+ ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator = __pyx_v_collator;

+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":143 */
+ Py_INCREF(__pyx_v_locale);
+ Py_DECREF(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->locale);
+ ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->locale = __pyx_v_locale;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":144 */
+ __pyx_1 = (__pyx_v_status == U_USING_DEFAULT_WARNING);
+ if (__pyx_1) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":145 */
+ __pyx_v_status = 1;
+ goto __pyx_L4;
+ }
+ __pyx_L4:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":146 */
+ ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->used_default_information = __pyx_v_status;
+
__pyx_r = 0;
goto __pyx_L0;
__pyx_L1:;
Py_XDECREF(__pyx_2);
Py_XDECREF(__pyx_3);
Py_XDECREF(__pyx_4);
- __Pyx_AddTraceback("_zope_ucol.KeyFactory.__new__");
+ __Pyx_AddTraceback("_zope_ucol.Collator.__new__");
__pyx_r = -1;
__pyx_L0:;
Py_DECREF(__pyx_v_self);
+ Py_DECREF(__pyx_v_locale);
return __pyx_r;
}

-static void __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(PyObject *__pyx_v_self); /*proto*/
-static void __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(PyObject *__pyx_v_self) {
+static void __pyx_f_10_zope_ucol_8Collator___dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_f_10_zope_ucol_8Collator___dealloc__(PyObject *__pyx_v_self) {
int __pyx_1;
Py_INCREF(__pyx_v_self);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":117 */
- __pyx_1 = (((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator != 0);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":149 */
+ __pyx_1 = (((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator != 0);
if (__pyx_1) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":118 */
- ucol_close(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":150 */
+ ucol_close(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator);
goto __pyx_L2;
}
__pyx_L2:;

goto __pyx_L0;
- __Pyx_AddTraceback("_zope_ucol.KeyFactory.__dealloc__");
+ __Pyx_AddTraceback("_zope_ucol.Collator.__dealloc__");
__pyx_L0:;
Py_DECREF(__pyx_v_self);
}

-static PyObject *__pyx_f_10_zope_ucol_10KeyFactory___call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static PyObject *__pyx_f_10_zope_ucol_10KeyFactory___call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
- PyUnicodeObject *__pyx_v_text = 0;
+static PyObject *__pyx_f_10_zope_ucol_8Collator_key(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_10_zope_ucol_8Collator_key[] = "Compute a collation key for the given unicode text.\n\n Of course, the key is only valid for the given locale.\n ";
+static PyObject *__pyx_f_10_zope_ucol_8Collator_key(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_text = 0;
char (*__pyx_v_buffer);
int32_t __pyx_v_bufsize;
int32_t __pyx_v_size;
@@ -276,55 +394,80 @@
Py_INCREF(__pyx_v_text);
__pyx_v_icutext = Py_None; Py_INCREF(__pyx_v_icutext);
__pyx_v_result = Py_None; Py_INCREF(__pyx_v_result);
- if (!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_text), __pyx_ptype_10_zope_ucol_unicode, 1, "text")) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":129 */
- __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; goto __pyx_L1;}
- Py_INCREF(((PyObject *)__pyx_v_text));
- PyTuple_SET_ITEM(__pyx_1, 0, ((PyObject *)__pyx_v_text));
- __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; goto __pyx_L1;}
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":161 */
+ __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; goto __pyx_L1;}
+ Py_INCREF(__pyx_v_text);
+ PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_text);
+ __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; goto __pyx_L1;}
Py_DECREF(__pyx_1); __pyx_1 = 0;
Py_DECREF(__pyx_v_icutext);
__pyx_v_icutext = __pyx_2;
__pyx_2 = 0;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":130 */
- __pyx_v_bufsize = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length * 2);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":162 */
+ __pyx_v_bufsize = ((((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length * 2) + 10);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":131 */
- __pyx_v_buffer = ((char (*))PyMem_Malloc(__pyx_v_bufsize));
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":165 */
+ __pyx_v_buffer = ((char (*))PyMem_Malloc((__pyx_v_bufsize + 1)));

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":132 */
- __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":166 */
+ __pyx_3 = (__pyx_v_buffer == 0);
+ if (__pyx_3) {

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":136 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":167 */
+ __pyx_1 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 167; goto __pyx_L1;}
+ __Pyx_Raise(__pyx_1, 0, 0);
+ Py_DECREF(__pyx_1); __pyx_1 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 167; goto __pyx_L1;}
+ goto __pyx_L2;
+ }
+ __pyx_L2:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":168 */
+ __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":172 */
while (1) {
__pyx_3 = (__pyx_v_size > __pyx_v_bufsize);
if (!__pyx_3) break;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":137 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":173 */
__pyx_v_bufsize = __pyx_v_size;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":138 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":174 */
PyMem_Free(__pyx_v_buffer);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":139 */
- __pyx_v_buffer = ((char (*))PyMem_Malloc(__pyx_v_bufsize));
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":175 */
+ __pyx_v_buffer = ((char (*))PyMem_Malloc((__pyx_v_bufsize + 1)));

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":140 */
- __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":176 */
+ __pyx_3 = (__pyx_v_buffer == 0);
+ if (__pyx_3) {
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":177 */
+ __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; goto __pyx_L1;}
+ __Pyx_Raise(__pyx_2, 0, 0);
+ Py_DECREF(__pyx_2); __pyx_2 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; goto __pyx_L1;}
+ goto __pyx_L5;
+ }
+ __pyx_L5:;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":178 */
+ __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
}

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":145 */
- __pyx_1 = PyString_FromStringAndSize(__pyx_v_buffer,__pyx_v_size); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 145; goto __pyx_L1;}
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":183 */
+ __pyx_1 = PyString_FromStringAndSize(__pyx_v_buffer,__pyx_v_size); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 183; goto __pyx_L1;}
Py_DECREF(__pyx_v_result);
__pyx_v_result = __pyx_1;
__pyx_1 = 0;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":146 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":184 */
PyMem_Free(__pyx_v_buffer);

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":147 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":185 */
Py_INCREF(__pyx_v_result);
__pyx_r = __pyx_v_result;
goto __pyx_L0;
@@ -334,7 +477,7 @@
__pyx_L1:;
Py_XDECREF(__pyx_1);
Py_XDECREF(__pyx_2);
- __Pyx_AddTraceback("_zope_ucol.KeyFactory.__call__");
+ __Pyx_AddTraceback("_zope_ucol.Collator.key");
__pyx_r = 0;
__pyx_L0:;
Py_DECREF(__pyx_v_icutext);
@@ -344,15 +487,79 @@
return __pyx_r;
}

+static PyObject *__pyx_f_10_zope_ucol_8Collator_cmp(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_10_zope_ucol_8Collator_cmp(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_o1 = 0;
+ PyObject *__pyx_v_o2 = 0;
+ PyObject *__pyx_v_u1;
+ PyObject *__pyx_v_u2;
+ PyObject *__pyx_r;
+ PyObject *__pyx_1 = 0;
+ PyObject *__pyx_2 = 0;
+ static char *__pyx_argnames[] = {"o1","o2",0};
+ if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OO", __pyx_argnames, &__pyx_v_o1, &__pyx_v_o2)) return 0;
+ Py_INCREF(__pyx_v_self);
+ Py_INCREF(__pyx_v_o1);
+ Py_INCREF(__pyx_v_o2);
+ __pyx_v_u1 = Py_None; Py_INCREF(__pyx_v_u1);
+ __pyx_v_u2 = Py_None; Py_INCREF(__pyx_v_u2);
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":188 */
+ __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
+ Py_INCREF(__pyx_v_o1);
+ PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_o1);
+ __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
+ Py_DECREF(__pyx_1); __pyx_1 = 0;
+ Py_DECREF(__pyx_v_u1);
+ __pyx_v_u1 = __pyx_2;
+ __pyx_2 = 0;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":189 */
+ __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
+ Py_INCREF(__pyx_v_o2);
+ PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_o2);
+ __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
+ Py_DECREF(__pyx_1); __pyx_1 = 0;
+ Py_DECREF(__pyx_v_u2);
+ __pyx_v_u2 = __pyx_2;
+ __pyx_2 = 0;
+
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":190 */
+ __pyx_1 = PyInt_FromLong(ucol_strcoll(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u1)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u1)->length,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u2)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u2)->length)); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; goto __pyx_L1;}
+ __pyx_r = __pyx_1;
+ __pyx_1 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; Py_INCREF(__pyx_r);
+ goto __pyx_L0;
+ __pyx_L1:;
+ Py_XDECREF(__pyx_1);
+ Py_XDECREF(__pyx_2);
+ __Pyx_AddTraceback("_zope_ucol.Collator.cmp");
+ __pyx_r = 0;
+ __pyx_L0:;
+ Py_DECREF(__pyx_v_u1);
+ Py_DECREF(__pyx_v_u2);
+ Py_DECREF(__pyx_v_self);
+ Py_DECREF(__pyx_v_o1);
+ Py_DECREF(__pyx_v_o2);
+ return __pyx_r;
+}
+
static __Pyx_InternTabEntry __pyx_intern_tab[] = {
+ {&__pyx_n_MemoryError, "MemoryError"},
+ {&__pyx_n_TypeError, "TypeError"},
{&__pyx_n_ValueError, "ValueError"},
{&__pyx_n_sys, "sys"},
+ {&__pyx_n_unicode, "unicode"},
{0, 0}
};

static __Pyx_StringTabEntry __pyx_string_tab[] = {
{&__pyx_k2p, __pyx_k2, sizeof(__pyx_k2)},
{&__pyx_k3p, __pyx_k3, sizeof(__pyx_k3)},
+ {&__pyx_k4p, __pyx_k4, sizeof(__pyx_k4)},
+ {&__pyx_k5p, __pyx_k5, sizeof(__pyx_k5)},
{0, 0, 0}
};

@@ -524,40 +731,60 @@
0, /*tp_weaklist*/
};

-static PyObject *__pyx_tp_new_10_zope_ucol_KeyFactory(PyTypeObject *t, PyObject *a, PyObject *k) {
+static PyObject *__pyx_tp_new_10_zope_ucol_Collator(PyTypeObject *t, PyObject *a, PyObject *k) {
PyObject *o = (*t->tp_alloc)(t, 0);
- if (__pyx_f_10_zope_ucol_10KeyFactory___new__(o, a, k) < 0) {
+ struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+ p->locale = Py_None; Py_INCREF(p->locale);
+ if (__pyx_f_10_zope_ucol_8Collator___new__(o, a, k) < 0) {
Py_DECREF(o); o = 0;
}
return o;
}

-static void __pyx_tp_dealloc_10_zope_ucol_KeyFactory(PyObject *o) {
+static void __pyx_tp_dealloc_10_zope_ucol_Collator(PyObject *o) {
+ struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
{
PyObject *etype, *eval, *etb;
PyErr_Fetch(&etype, &eval, &etb);
++o->ob_refcnt;
- __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(o);
+ __pyx_f_10_zope_ucol_8Collator___dealloc__(o);
if (PyErr_Occurred()) PyErr_WriteUnraisable(o);
--o->ob_refcnt;
PyErr_Restore(etype, eval, etb);
}
+ Py_XDECREF(p->locale);
(*o->ob_type->tp_free)(o);
}

-static int __pyx_tp_traverse_10_zope_ucol_KeyFactory(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_10_zope_ucol_Collator(PyObject *o, visitproc v, void *a) {
+ int e;
+ struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+ if (p->locale) {
+ e = (*v)(p->locale, a); if (e) return e;
+ }
return 0;
}

-static int __pyx_tp_clear_10_zope_ucol_KeyFactory(PyObject *o) {
+static int __pyx_tp_clear_10_zope_ucol_Collator(PyObject *o) {
+ struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+ Py_XDECREF(p->locale);
+ p->locale = Py_None; Py_INCREF(p->locale);
return 0;
}

-static struct PyMethodDef __pyx_methods_10_zope_ucol_KeyFactory[] = {
+static struct PyMethodDef __pyx_methods_10_zope_ucol_Collator[] = {
+ {"key", (PyCFunction)__pyx_f_10_zope_ucol_8Collator_key, METH_VARARGS|METH_KEYWORDS, __pyx_doc_10_zope_ucol_8Collator_key},
+ {"cmp", (PyCFunction)__pyx_f_10_zope_ucol_8Collator_cmp, METH_VARARGS|METH_KEYWORDS, 0},
{0, 0, 0, 0}
};

-static PyNumberMethods __pyx_tp_as_number_KeyFactory = {
+static struct PyMemberDef __pyx_members_10_zope_ucol_Collator[] = {
+ {"locale", T_OBJECT, offsetof(struct __pyx_obj_10_zope_ucol_Collator, locale), READONLY, 0},
+ {"used_default_information", T_INT, offsetof(struct __pyx_obj_10_zope_ucol_Collator, used_default_information), READONLY, 0},
+ {0, 0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number_Collator = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
@@ -598,7 +825,7 @@
0, /*nb_inplace_true_divide*/
};

-static PySequenceMethods __pyx_tp_as_sequence_KeyFactory = {
+static PySequenceMethods __pyx_tp_as_sequence_Collator = {
0, /*sq_length*/
0, /*sq_concat*/
0, /*sq_repeat*/
@@ -611,50 +838,50 @@
0, /*sq_inplace_repeat*/
};

-static PyMappingMethods __pyx_tp_as_mapping_KeyFactory = {
+static PyMappingMethods __pyx_tp_as_mapping_Collator = {
0, /*mp_length*/
0, /*mp_subscript*/
0, /*mp_ass_subscript*/
};

-static PyBufferProcs __pyx_tp_as_buffer_KeyFactory = {
+static PyBufferProcs __pyx_tp_as_buffer_Collator = {
0, /*bf_getreadbuffer*/
0, /*bf_getwritebuffer*/
0, /*bf_getsegcount*/
0, /*bf_getcharbuffer*/
};

-statichere PyTypeObject __pyx_type_10_zope_ucol_KeyFactory = {
+statichere PyTypeObject __pyx_type_10_zope_ucol_Collator = {
PyObject_HEAD_INIT(0)
0, /*ob_size*/
- "_zope_ucol.KeyFactory", /*tp_name*/
- sizeof(struct __pyx_obj_10_zope_ucol_KeyFactory), /*tp_basicsize*/
+ "_zope_ucol.Collator", /*tp_name*/
+ sizeof(struct __pyx_obj_10_zope_ucol_Collator), /*tp_basicsize*/
0, /*tp_itemsize*/
- __pyx_tp_dealloc_10_zope_ucol_KeyFactory, /*tp_dealloc*/
+ __pyx_tp_dealloc_10_zope_ucol_Collator, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
- &__pyx_tp_as_number_KeyFactory, /*tp_as_number*/
- &__pyx_tp_as_sequence_KeyFactory, /*tp_as_sequence*/
- &__pyx_tp_as_mapping_KeyFactory, /*tp_as_mapping*/
+ &__pyx_tp_as_number_Collator, /*tp_as_number*/
+ &__pyx_tp_as_sequence_Collator, /*tp_as_sequence*/
+ &__pyx_tp_as_mapping_Collator, /*tp_as_mapping*/
0, /*tp_hash*/
- __pyx_f_10_zope_ucol_10KeyFactory___call__, /*tp_call*/
+ 0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
- &__pyx_tp_as_buffer_KeyFactory, /*tp_as_buffer*/
- Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ &__pyx_tp_as_buffer_Collator, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
"Compute a collation key for a unicode string.\n ", /*tp_doc*/
- __pyx_tp_traverse_10_zope_ucol_KeyFactory, /*tp_traverse*/
- __pyx_tp_clear_10_zope_ucol_KeyFactory, /*tp_clear*/
+ __pyx_tp_traverse_10_zope_ucol_Collator, /*tp_traverse*/
+ __pyx_tp_clear_10_zope_ucol_Collator, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
- __pyx_methods_10_zope_ucol_KeyFactory, /*tp_methods*/
- 0, /*tp_members*/
+ __pyx_methods_10_zope_ucol_Collator, /*tp_methods*/
+ __pyx_members_10_zope_ucol_Collator, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
@@ -663,7 +890,7 @@
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
- __pyx_tp_new_10_zope_ucol_KeyFactory, /*tp_new*/
+ __pyx_tp_new_10_zope_ucol_Collator, /*tp_new*/
0, /*tp_free*/
0, /*tp_is_gc*/
0, /*tp_bases*/
@@ -687,21 +914,21 @@
if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
if (__Pyx_InternStrings(__pyx_intern_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
- __pyx_ptype_10_zope_ucol_unicode = __Pyx_ImportType("__builtin__", "unicode", sizeof(PyUnicodeObject)); if (!__pyx_ptype_10_zope_ucol_unicode) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; goto __pyx_L1;}
__pyx_type_10_zope_ucol_UCharString.tp_free = _PyObject_GC_Del;
- if (PyType_Ready(&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; goto __pyx_L1;}
- if (PyObject_SetAttrString(__pyx_m, "UCharString", (PyObject *)&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; goto __pyx_L1;}
+ if (PyType_Ready(&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; goto __pyx_L1;}
+ if (PyObject_SetAttrString(__pyx_m, "UCharString", (PyObject *)&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; goto __pyx_L1;}
__pyx_ptype_10_zope_ucol_UCharString = &__pyx_type_10_zope_ucol_UCharString;
- if (PyType_Ready(&__pyx_type_10_zope_ucol_KeyFactory) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; goto __pyx_L1;}
- if (PyObject_SetAttrString(__pyx_m, "KeyFactory", (PyObject *)&__pyx_type_10_zope_ucol_KeyFactory) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; goto __pyx_L1;}
- __pyx_ptype_10_zope_ucol_KeyFactory = &__pyx_type_10_zope_ucol_KeyFactory;
+ __pyx_type_10_zope_ucol_Collator.tp_free = _PyObject_GC_Del;
+ if (PyType_Ready(&__pyx_type_10_zope_ucol_Collator) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
+ if (PyObject_SetAttrString(__pyx_m, "Collator", (PyObject *)&__pyx_type_10_zope_ucol_Collator) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
+ __pyx_ptype_10_zope_ucol_Collator = &__pyx_type_10_zope_ucol_Collator;

/* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":18 */
__pyx_1 = __Pyx_Import(__pyx_n_sys, 0); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; goto __pyx_L1;}
if (PyObject_SetAttr(__pyx_m, __pyx_n_sys, __pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; goto __pyx_L1;}
Py_DECREF(__pyx_1); __pyx_1 = 0;

- /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":120 */
+ /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":187 */
return;
__pyx_L1:;
Py_XDECREF(__pyx_1);
@@ -715,19 +942,6 @@

/* Runtime support code */

-static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name) {
- if (!type) {
- PyErr_Format(PyExc_SystemError, "Missing type object");
- return 0;
- }
- if ((none_allowed && obj == Py_None) || PyObject_TypeCheck(obj, type))
- return 1;
- PyErr_Format(PyExc_TypeError,
- "Argument '%s' has incorrect type (expected %s, got %s)",
- name, type->tp_name, obj->ob_type->tp_name);
- return 0;
-}
-
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) {
PyObject *__import__ = 0;
PyObject *empty_list = 0;
@@ -851,56 +1065,6 @@
return 0;
}

-static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name,
- long size)
-{
- PyObject *py_module_name = 0;
- PyObject *py_class_name = 0;
- PyObject *py_name_list = 0;
- PyObject *py_module = 0;
- PyObject *result = 0;
-
- py_module_name = PyString_FromString(module_name);
- if (!py_module_name)
- goto bad;
- py_class_name = PyString_FromString(class_name);
- if (!py_class_name)
- goto bad;
- py_name_list = PyList_New(1);
- if (!py_name_list)
- goto bad;
- Py_INCREF(py_class_name);
- if (PyList_SetItem(py_name_list, 0, py_class_name) < 0)
- goto bad;
- py_module = __Pyx_Import(py_module_name, py_name_list);
- if (!py_module)
- goto bad;
- result = PyObject_GetAttr(py_module, py_class_name);
- if (!result)
- goto bad;
- if (!PyType_Check(result)) {
- PyErr_Format(PyExc_TypeError,
- "%s.%s is not a type object",
- module_name, class_name);
- goto bad;
- }
- if (((PyTypeObject *)result)->tp_basicsize != size) {
- PyErr_Format(PyExc_ValueError,
- "%s.%s does not appear to be the correct type object",
- module_name, class_name);
- goto bad;
- }
- goto done;
-bad:
- Py_XDECREF(result);
- result = 0;
-done:
- Py_XDECREF(py_module_name);
- Py_XDECREF(py_class_name);
- Py_XDECREF(py_name_list);
- return (PyTypeObject *)result;
-}
-
#include "compile.h"
#include "frameobject.h"
#include "traceback.h"

Modified: zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx
===================================================================
--- zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx 2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx 2005-12-09 21:13:42 UTC (rev 40672)
@@ -19,17 +19,20 @@

cdef extern from "unicode/utypes.h":

- ctypedef int UErrorCode
+ cdef enum UErrorCode:
+ U_USING_DEFAULT_WARNING = -127
ctypedef int int32_t
ctypedef char uint8_t
int U_FAILURE(UErrorCode status)
UErrorCode U_ZERO_ERROR

cdef extern from "unicode/utf.h":
+
ctypedef int UChar
ctypedef int UChar32

cdef extern from "unicode/ustring.h":
+
UChar *u_strFromUTF32(UChar *dest, int32_t destCapacity,
int32_t *pDestLength,
UChar32 *src, int32_t srcLength,
@@ -46,22 +49,25 @@
uint8_t *result,
int32_t resultLength
)
+ int ucol_strcoll(UCollator *coll,
+ UChar *source, int32_t sourceLength,
+ UChar *target, int32_t targetLength)

cdef extern from "Python.h":

- cdef int PyUnicode_Check(ob)
- cdef int PyString_Check(ob)
+ int PyUnicode_Check(ob)
+ int PyString_Check(ob)

ctypedef int Py_UNICODE
+ Py_UNICODE *PyUnicode_AS_UNICODE(ob)
+ int PyUnicode_GET_SIZE(ob)
+ char *PyString_AS_STRING(ob)

- ctypedef class __builtin__.unicode [object PyUnicodeObject]:
- cdef int length
- cdef Py_UNICODE *str
-
- void *PyMem_Malloc(int)
+ void *PyMem_Malloc(int size)
void PyMem_Free(void *p)
object PyString_FromStringAndSize(char *v, int l)

+
cdef class UCharString:
"""Wrapper for ICU UChar arrays
"""
@@ -71,21 +77,37 @@
cdef readonly object base
cdef readonly int need_to_free

- def __new__(self, unicode text):
+ def __new__(self, text):
cdef int32_t buffsize
cdef UErrorCode status
+ cdef Py_UNICODE *str
+ cdef int length

+ if not PyUnicode_Check(text):
+ if PyString_Check(text):
+ text = unicode(text)
+ assert PyUnicode_Check(text)
+ else:
+ raise TypeError("Expected unicode string")
+
+ length = PyUnicode_GET_SIZE(text)
+ str = PyUnicode_AS_UNICODE(text)
+
+
if sizeof(Py_UNICODE) == 2:
- self.data = text.str
- self.length = text.length
+ self.data = str
+ self.length = length
self.base = text
self.need_to_free = 0
else:
- buffsize = 2*text.length + 1
+ buffsize = 2*length + 1
self.data = <UChar*>PyMem_Malloc(buffsize*sizeof(UChar))
+ if self.data == NULL:
+ raise MemoryError
status = 0
u_strFromUTF32(self.data, buffsize, &(self.length),
- <UChar32*>text.str, text.length, &status)
+ <UChar32*>str, length, &status)
+ assert self.length <= buffsize
self.need_to_free = 1
if U_FAILURE(status):
raise ValueError(
@@ -98,26 +120,36 @@
self.data = NULL


-cdef class KeyFactory:
+cdef class Collator:
"""Compute a collation key for a unicode string.
"""

cdef UCollator *collator
+ cdef readonly object locale
+ cdef readonly int used_default_information

- def __new__(self, char *locale):
+ def __new__(self, locale):
cdef UCollator *collator
cdef UErrorCode status
+
+ if not PyString_Check(locale):
+ raise TypeError("String locale expected")
+
status = U_ZERO_ERROR
- collator = ucol_open(locale, &status)
+ collator = ucol_open(PyString_AS_STRING(locale), &status)
if U_FAILURE(status):
raise ValueError("Couldn't create a collator")
self.collator = collator
+ self.locale = locale
+ if status == U_USING_DEFAULT_WARNING:
+ status = 1
+ self.used_default_information = status

def __dealloc__(self):
if self.collator != NULL:
ucol_close(self.collator)

- def __call__(self, unicode text):
+ def key(self, text):
"""Compute a collation key for the given unicode text.

Of course, the key is only valid for the given locale.
@@ -127,8 +159,12 @@
cdef int32_t size

icutext = UCharString(text)
- bufsize = (<UCharString>icutext).length*2
- buffer = <char*>PyMem_Malloc(bufsize)
+ bufsize = (<UCharString>icutext).length*2+10
+
+ # the +1 below is needed to avoid an apprent buffer overflow bug in ICU
+ buffer = <char*>PyMem_Malloc(bufsize +1)
+ if buffer == NULL:
+ raise MemoryError
size = ucol_getSortKey(self.collator,
(<UCharString>icutext).data,
(<UCharString>icutext).length,
@@ -136,7 +172,9 @@
while size > bufsize:
bufsize = size
PyMem_Free(buffer)
- buffer = <char*>PyMem_Malloc(bufsize)
+ buffer = <char*>PyMem_Malloc(bufsize +1) # See above +1
+ if buffer == NULL:
+ raise MemoryError
size = ucol_getSortKey(self.collator,
(<UCharString>icutext).data,
(<UCharString>icutext).length,
@@ -145,3 +183,14 @@
result = PyString_FromStringAndSize(buffer, size)
PyMem_Free(buffer)
return result
+
+ def cmp(self, o1, o2):
+ u1 = UCharString(o1)
+ u2 = UCharString(o2)
+ return ucol_strcoll(
+ self.collator,
+ (<UCharString>u1).data,
+ (<UCharString>u1).length,
+ (<UCharString>u2).data,
+ (<UCharString>u2).length,
+ )

Modified: zope.ucol/trunk/src/zope/ucol/tests.py
===================================================================
--- zope.ucol/trunk/src/zope/ucol/tests.py 2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/tests.py 2005-12-09 21:13:42 UTC (rev 40672)
@@ -18,8 +18,48 @@
import unittest
from zope.testing import doctest

+def type_errors():
+ """
+You can pass unicode strings, or strings:
+
+ >>> from zope.ucol import Collator
+ >>> c = Collator('root')
+ >>> c.key(u"Hello") == c.key("Hello")
+ True
+ >>> c.cmp(u"Hello", "Hello")
+ 0
+
+As long as the strings can be decoded as ASCII:
+
+ >>> c.key("Hello\xfa")
+ Traceback (most recent call last):
+ ...
+ UnicodeDecodeError: 'ascii' codec can't decode byte
+ 0xfa in position 5: ordinal not in range(128)
+
+ >>> c.cmp(u"Hello", "Hello\xfa")
+ Traceback (most recent call last):
+ ...
+ UnicodeDecodeError: 'ascii' codec can't decode byte
+ 0xfa in position 5: ordinal not in range(128)
+
+And you can't pass a non-string:
+
+ >>> c.key(0)
+ Traceback (most recent call last):
+ ...
+ TypeError: Expected unicode string
+
+ >>> c.cmp(u"Hello", 0)
+ Traceback (most recent call last):
+ ...
+ TypeError: Expected unicode string
+
+"""
+
def test_suite():
return unittest.TestSuite((
+ doctest.DocTestSuite(optionflags=doctest.NORMALIZE_WHITESPACE),
doctest.DocTestSuite('zope.ucol',
optionflags=doctest.NORMALIZE_WHITESPACE),
))

_______________________________________________
Zope-CVS maillist - Zope-CVS@zope.org
http://mail.zope.org/mailman/listinfo/zope-cvs

Zope CVS instructions: http://dev.zope.org/CVS