Mailing List Archive

gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file (#117802)
https://github.com/python/cpython/commit/6dc661bc9f65e9923eafbcdbf18bcc57eebbf6a4
commit: 6dc661bc9f65e9923eafbcdbf18bcc57eebbf6a4
branch: main
author: Inada Naoki <songofacandy@gmail.com>
committer: methane <songofacandy@gmail.com>
date: 2024-04-16T12:56:16+09:00
summary:

gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file (#117802)

files:
A Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
M Doc/library/site.rst
M Doc/whatsnew/3.13.rst
M Lib/site.py

diff --git a/Doc/library/site.rst b/Doc/library/site.rst
index 2dc9fb09d727e2..e52bbd32d4d493 100644
--- a/Doc/library/site.rst
+++ b/Doc/library/site.rst
@@ -74,6 +74,10 @@ with ``import`` (followed by space or tab) are executed.
Limiting a code chunk to a single line is a deliberate measure
to discourage putting anything more complex here.

+.. versionchanged:: 3.13
+ The :file:`.pth` files are now decoded by UTF-8 at first and then by the
+ :term:`locale encoding` if it fails.
+
.. index::
single: package
triple: path; configuration; file
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 83c528814c967e..f957698ecb06d8 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -630,6 +630,13 @@ re
* Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity.
:exc:`!re.error` is kept for backward compatibility.

+site
+----
+
+* :file:`.pth` files are now decoded by UTF-8 first, and then by the
+ :term:`locale encoding` if the UTF-8 decoding fails.
+ (Contributed by Inada Naoki in :gh:`117802`.)
+
sqlite3
-------

diff --git a/Lib/site.py b/Lib/site.py
index 162bbec4f8f41b..93af9c453ac7bb 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
return
_trace(f"Processing .pth file: {fullname!r}")
try:
- # locale encoding is not ideal especially on Windows. But we have used
- # it for a long time. setuptools uses the locale encoding too.
- f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
+ with io.open_code(fullname) as f:
+ pth_content = f.read()
except OSError:
return
- with f:
- for n, line in enumerate(f):
- if line.startswith("#"):
- continue
- if line.strip() == "":
+
+ try:
+ pth_content = pth_content.decode()
+ except UnicodeDecodeError:
+ # Fallback to locale encoding for backward compatibility.
+ # We will deprecate this fallback in the future.
+ import locale
+ pth_content = pth_content.decode(locale.getencoding())
+ _trace(f"Cannot read {fullname!r} as UTF-8. "
+ f"Using fallback encoding {locale.getencoding()!r}")
+
+ for n, line in enumerate(pth_content.splitlines(), 1):
+ if line.startswith("#"):
+ continue
+ if line.strip() == "":
+ continue
+ try:
+ if line.startswith(("import ", "import\t")):
+ exec(line)
continue
- try:
- if line.startswith(("import ", "import\t")):
- exec(line)
- continue
- line = line.rstrip()
- dir, dircase = makepath(sitedir, line)
- if not dircase in known_paths and os.path.exists(dir):
- sys.path.append(dir)
- known_paths.add(dircase)
- except Exception as exc:
- print("Error processing line {:d} of {}:\n".format(n+1, fullname),
- file=sys.stderr)
- import traceback
- for record in traceback.format_exception(exc):
- for line in record.splitlines():
- print(' '+line, file=sys.stderr)
- print("\nRemainder of file ignored", file=sys.stderr)
- break
+ line = line.rstrip()
+ dir, dircase = makepath(sitedir, line)
+ if dircase not in known_paths and os.path.exists(dir):
+ sys.path.append(dir)
+ known_paths.add(dircase)
+ except Exception as exc:
+ print(f"Error processing line {n:d} of {fullname}:\n",
+ file=sys.stderr)
+ import traceback
+ for record in traceback.format_exception(exc):
+ for line in record.splitlines():
+ print(' '+line, file=sys.stderr)
+ print("\nRemainder of file ignored", file=sys.stderr)
+ break
if reset:
known_paths = None
return known_paths
diff --git a/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
new file mode 100644
index 00000000000000..6f91251126dc7b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
@@ -0,0 +1,3 @@
+:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
+:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
+only locale encoding before.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com