Mailing List Archive

gh-77102: site: try utf-8 and locale encoding when reading .pth file (GH-117802)
https://github.com/python/cpython/commit/2a58923687cbe102550b275ccf025a1b8d2b417e
commit: 2a58923687cbe102550b275ccf025a1b8d2b417e
branch: 3.12
author: Inada Naoki <songofacandy@gmail.com>
committer: methane <songofacandy@gmail.com>
date: 2024-04-16T18:51:06+09:00
summary:

gh-77102: site: try utf-8 and locale encoding when reading .pth file (GH-117802)

(cherry picked from commit 6dc661bc9f65e9923eafbcdbf18bcc57eebbf6a4)

files:
A Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
M Lib/site.py

diff --git a/Lib/site.py b/Lib/site.py
index 924b2460d96976..b3a4916161244a 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
return
_trace(f"Processing .pth file: {fullname!r}")
try:
- # locale encoding is not ideal especially on Windows. But we have used
- # it for a long time. setuptools uses the locale encoding too.
- f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
+ with io.open_code(fullname) as f:
+ pth_content = f.read()
except OSError:
return
- with f:
- for n, line in enumerate(f):
- if line.startswith("#"):
- continue
- if line.strip() == "":
+
+ try:
+ pth_content = pth_content.decode()
+ except UnicodeDecodeError:
+ # Fallback to locale encoding for backward compatibility.
+ # We will deprecate this fallback in the future.
+ import locale
+ pth_content = pth_content.decode(locale.getencoding())
+ _trace(f"Cannot read {fullname!r} as UTF-8. "
+ f"Using fallback encoding {locale.getencoding()!r}")
+
+ for n, line in enumerate(pth_content.splitlines(), 1):
+ if line.startswith("#"):
+ continue
+ if line.strip() == "":
+ continue
+ try:
+ if line.startswith(("import ", "import\t")):
+ exec(line)
continue
- try:
- if line.startswith(("import ", "import\t")):
- exec(line)
- continue
- line = line.rstrip()
- dir, dircase = makepath(sitedir, line)
- if not dircase in known_paths and os.path.exists(dir):
- sys.path.append(dir)
- known_paths.add(dircase)
- except Exception as exc:
- print("Error processing line {:d} of {}:\n".format(n+1, fullname),
- file=sys.stderr)
- import traceback
- for record in traceback.format_exception(exc):
- for line in record.splitlines():
- print(' '+line, file=sys.stderr)
- print("\nRemainder of file ignored", file=sys.stderr)
- break
+ line = line.rstrip()
+ dir, dircase = makepath(sitedir, line)
+ if dircase not in known_paths and os.path.exists(dir):
+ sys.path.append(dir)
+ known_paths.add(dircase)
+ except Exception as exc:
+ print(f"Error processing line {n:d} of {fullname}:\n",
+ file=sys.stderr)
+ import traceback
+ for record in traceback.format_exception(exc):
+ for line in record.splitlines():
+ print(' '+line, file=sys.stderr)
+ print("\nRemainder of file ignored", file=sys.stderr)
+ break
if reset:
known_paths = None
return known_paths
diff --git a/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
new file mode 100644
index 00000000000000..6f91251126dc7b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
@@ -0,0 +1,3 @@
+:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
+:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
+only locale encoding before.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com