Mailing List Archive

python/dist/src/Lib/encodings __init__.py,1.11,1.12
Update of /cvsroot/python/python/dist/src/Lib/encodings
In directory usw-pr-cvs1:/tmp/cvs-serv20044/Lib/encodings

Modified Files:
__init__.py
Log Message:
Extending the encoding name normalization to handle more non-alphanumeric
characters.



Index: __init__.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/__init__.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** __init__.py 8 Aug 2002 20:19:18 -0000 1.11
--- __init__.py 4 Oct 2002 11:45:38 -0000 1.12
***************
*** 4,10 ****
directory.

! Codec modules must have names corresponding to standard lower-case
! encoding names with hyphens mapped to underscores, e.g. 'utf-8' is
! implemented by the module 'utf_8.py'.

Each codec module must export the following interface:
--- 4,10 ----
directory.

! Codec modules must have names corresponding to normalized encoding
! names as defined in the normalize_encoding() function below, e.g.
! 'utf-8' must be implemented by the module 'utf_8.py'.

Each codec module must export the following interface:
***************
*** 19,25 ****
* getaliases() -> sequence of encoding name strings to use as aliases

! Alias names returned by getaliases() must be standard encoding
! names as defined above (lower-case, hyphens converted to
! underscores).

Written by Marc-Andre Lemburg (mal@lemburg.com).
--- 19,24 ----
* getaliases() -> sequence of encoding name strings to use as aliases

! Alias names returned by getaliases() must be normalized encoding
! names as defined by normalize_encoding().

Written by Marc-Andre Lemburg (mal@lemburg.com).
***************
*** 29,37 ****
"""#"

! import codecs,exceptions

_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']

class CodecRegistryError(exceptions.LookupError,
--- 28,37 ----
"""#"

! import codecs, exceptions, re

_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
+ _norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')

class CodecRegistryError(exceptions.LookupError,
***************
*** 39,42 ****
--- 39,54 ----
pass

+ def normalize_encoding(encoding):
+
+ """ Normalize an encoding name.
+
+ Normalization works as follows: all non-alphanumeric
+ characters except the dot used for Python package names are
+ collapsed and replaced with a single underscore, e.g. ' -;#'
+ becomes '_'.
+
+ """
+ return '_'.join(_norm_encoding_RE.split(encoding))
+
def search_function(encoding):

***************
*** 52,56 ****
# default import module lookup scheme with the alias name.
#
! modname = encoding.replace('-', '_')
try:
mod = __import__('encodings.' + modname,
--- 64,68 ----
# default import module lookup scheme with the alias name.
#
! modname = normalize_encoding(encoding)
try:
mod = __import__('encodings.' + modname,