Mailing List Archive

bpo-45952: Get the C analyzer tool working again. (gh-29882)
https://github.com/python/cpython/commit/ee94aa0850191712e6adfc1f4a9df08ec3240195
commit: ee94aa0850191712e6adfc1f4a9df08ec3240195
branch: main
author: Eric Snow <ericsnowcurrently@gmail.com>
committer: ericsnowcurrently <ericsnowcurrently@gmail.com>
date: 2021-12-01T11:20:20-07:00
summary:

bpo-45952: Get the C analyzer tool working again. (gh-29882)

There wasn't much that needed to be done. Mostly it was just a few new files that got added.

https://bugs.python.org/issue45952

files:
M Tools/c-analyzer/TODO
M Tools/c-analyzer/c_parser/__init__.py
M Tools/c-analyzer/c_parser/parser/__init__.py
M Tools/c-analyzer/c_parser/preprocessor/__main__.py
M Tools/c-analyzer/cpython/__main__.py
M Tools/c-analyzer/cpython/_parser.py

diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO
index 1fd8052268be0..4b9b2857e1d1e 100644
--- a/Tools/c-analyzer/TODO
+++ b/Tools/c-analyzer/TODO
@@ -1,3 +1,11 @@
+# For up-to-date results, run:
+# ./python Tools/c-analyzer/c-analyzer.py check --format summary
+# or
+# ./python Tools/c-analyzer/c-analyzer.py analyze
+
+
+#######################################
+# non-PyObject (61)

# allocator (16)
Objects/obmalloc.c:_PyMem static PyMemAllocatorEx _PyMem
@@ -32,12 +40,7 @@ Objects/dictobject.c:empty_keys_struct static PyDictKe
Python/fileutils.c:_Py_open_cloexec_works int _Py_open_cloexec_works


-# freelists
-Objects/dictobject.c:keys_free_list static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST]
-Objects/dictobject.c:numfreekeys static int numfreekeys
-
-
-# other non-object (43)
+# other non-object (40)
Modules/_tracemalloc.c:allocators static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } allocators
Modules/_tracemalloc.c:tables_lock static PyThread_type_lock tables_lock
Modules/_tracemalloc.c:tracemalloc_filenames static _Py_hashtable_t *tracemalloc_filenames
@@ -81,30 +84,7 @@ Python/pylifecycle.c:fatal_error():reentrant static int reen


#######################################
-# PyObject (960)
-
-# freelists (10 + 10)
-Modules/_collectionsmodule.c:freeblocks static block *freeblocks[MAXFREEBLOCKS]
-Modules/_collectionsmodule.c:numfreeblocks static Py_ssize_t numfreeblocks
-Objects/dictobject.c:free_list static PyDictObject *free_list[PyDict_MAXFREELIST]
-Objects/dictobject.c:numfree static int numfree
-Objects/exceptions.c:memerrors_freelist static PyBaseExceptionObject *memerrors_freelist
-Objects/exceptions.c:memerrors_numfree static int memerrors_numfree
-Objects/floatobject.c:free_list static PyFloatObject *free_list
-Objects/floatobject.c:numfree static int numfree
-Objects/frameobject.c:free_list static PyFrameObject *free_list
-Objects/frameobject.c:numfree static int numfree
-Objects/genobject.c:ag_asend_freelist static PyAsyncGenASend *ag_asend_freelist[_PyAsyncGen_MAXFREELIST]
-Objects/genobject.c:ag_asend_freelist_free static int ag_asend_freelist_free
-Objects/genobject.c:ag_value_freelist static _PyAsyncGenWrappedValue *ag_value_freelist[_PyAsyncGen_MAXFREELIST]
-Objects/genobject.c:ag_value_freelist_free static int ag_value_freelist_free
-Objects/listobject.c:free_list static PyListObject *free_list[PyList_MAXFREELIST]
-Objects/listobject.c:numfree static int numfree
-Objects/tupleobject.c:free_list static PyTupleObject *free_list[PyTuple_MAXSAVESIZE]
-Objects/tupleobject.c:numfree static int numfree[PyTuple_MAXSAVESIZE]
-Python/context.c:ctx_freelist static PyContext *ctx_freelist
-Python/context.c:ctx_freelist_len static int ctx_freelist_len
-
+# PyObject (919)

# singletons (7)
Objects/boolobject.c:_Py_FalseStruct static struct _longobject _Py_FalseStruct
@@ -116,16 +96,8 @@ Objects/object.c:_Py_NotImplementedStruct PyObject _Py_No
Objects/sliceobject.c:_Py_EllipsisObject PyObject _Py_EllipsisObject


-# module vars (9)
-Modules/_functoolsmodule.c:kwd_mark static PyObject *kwd_mark
-Modules/_localemodule.c:Error static PyObject *Error
-Modules/_threadmodule.c:ThreadError static PyObject *ThreadError
+# module vars (1)
Modules/_tracemalloc.c:unknown_filename static PyObject *unknown_filename
-Modules/signalmodule.c:DefaultHandler static PyObject *DefaultHandler
-Modules/signalmodule.c:IgnoreHandler static PyObject *IgnoreHandler
-Modules/signalmodule.c:IntHandler static PyObject *IntHandler
-Modules/signalmodule.c:ItimerError static PyObject *ItimerError
-Objects/exceptions.c:errnomap static PyObject *errnomap


# other (non-cache) (5)
@@ -136,26 +108,15 @@ Modules/signalmodule.c:Handlers static volatile
Objects/setobject.c:_dummy_struct static PyObject _dummy_struct


-# caches (5)
-Modules/posixmodule.c:posix_putenv_garbage static PyObject *posix_putenv_garbage
-Objects/sliceobject.c:slice_cache static PySliceObject *slice_cache
-Objects/typeobject.c:method_cache static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP]
-Objects/unicodeobject.c:interned static PyObject *interned
+# caches (1)
Python/import.c:extensions static PyObject *extensions


-# cached constants - non-str (15)
+# cached constants - non-str (6)
Modules/_io/_iomodule.c:_PyIO_empty_bytes PyObject *_PyIO_empty_bytes
Modules/_io/bufferedio.c:_PyIO_trap_eintr():eintr_int static PyObject *eintr_int
-Modules/posixmodule.c:billion static PyObject *billion
-Modules/posixmodule.c:wait_helper():struct_rusage static PyObject *struct_rusage
-Objects/bytesobject.c:characters static PyBytesObject *characters[UCHAR_MAX + 1]
-Objects/bytesobject.c:nullstring static PyBytesObject *nullstring
-Objects/codeobject.c:PyCode_NewEmpty():nulltuple static PyObject *nulltuple
-Objects/dictobject.c:empty_values static PyObject *empty_values[1]
+Objects/dictobject.c:empty_values_struct static PyDictValues
Objects/listobject.c:indexerr static PyObject *indexerr
-Objects/longobject.c:small_ints static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]
-Objects/setobject.c:emptyfrozenset static PyObject *emptyfrozenset
Python/context.c:_token_missing static PyObject *_token_missing
Python/hamt.c:_empty_hamt static PyHamtObject *_empty_hamt

@@ -662,15 +623,6 @@ Modules/itertoolsmodule.c:takewhile_type static PyTypeOb
Modules/itertoolsmodule.c:tee_type static PyTypeObject tee_type
Modules/itertoolsmodule.c:teedataobject_type static PyTypeObject teedataobject_type
Modules/itertoolsmodule.c:ziplongest_type static PyTypeObject ziplongest_type
-Modules/posixmodule.c:DirEntryType static PyTypeObject DirEntryType
-Modules/posixmodule.c:ScandirIteratorType static PyTypeObject ScandirIteratorType
-Modules/posixmodule.c:SchedParamType static PyTypeObject* SchedParamType
-Modules/posixmodule.c:StatResultType static PyTypeObject* StatResultType
-Modules/posixmodule.c:StatVFSResultType static PyTypeObject* StatVFSResultType
-Modules/posixmodule.c:TerminalSizeType static PyTypeObject* TerminalSizeType
-Modules/posixmodule.c:TimesResultType static PyTypeObject* TimesResultType
-Modules/posixmodule.c:UnameResultType static PyTypeObject* UnameResultType
-Modules/posixmodule.c:WaitidResultType static PyTypeObject* WaitidResultType
Modules/signalmodule.c:SiginfoType static PyTypeObject SiginfoType
Modules/timemodule.c:StructTimeType static PyTypeObject StructTimeType
Modules/xxsubtype.c:spamdict_type static PyTypeObject spamdict_type
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py
index 39455ddbf1a0c..fc10aff94505d 100644
--- a/Tools/c-analyzer/c_parser/__init__.py
+++ b/Tools/c-analyzer/c_parser/__init__.py
@@ -1,3 +1,4 @@
+from c_common.fsutil import match_glob as _match_glob
from .parser import parse as _parse
from .preprocessor import get_preprocessor as _get_preprocessor

@@ -5,23 +6,32 @@
def parse_file(filename, *,
match_kind=None,
get_file_preprocessor=None,
+ file_maxsizes=None,
):
if get_file_preprocessor is None:
get_file_preprocessor = _get_preprocessor()
- yield from _parse_file(filename, match_kind, get_file_preprocessor)
+ yield from _parse_file(
+ filename, match_kind, get_file_preprocessor, file_maxsizes)


def parse_files(filenames, *,
match_kind=None,
get_file_preprocessor=None,
+ file_maxsizes=None,
):
if get_file_preprocessor is None:
get_file_preprocessor = _get_preprocessor()
for filename in filenames:
- yield from _parse_file(filename, match_kind, get_file_preprocessor)
+ yield from _parse_file(
+ filename, match_kind, get_file_preprocessor, file_maxsizes)


-def _parse_file(filename, match_kind, get_file_preprocessor):
+def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes):
+ srckwargs = {}
+ maxsize = _resolve_max_size(filename, maxsizes)
+ if maxsize:
+ srckwargs['maxtext'], srckwargs['maxlines'] = maxsize
+
# Preprocess the file.
preprocess = get_file_preprocessor(filename)
preprocessed = preprocess()
@@ -30,7 +40,7 @@ def _parse_file(filename, match_kind, get_file_preprocessor):

# Parse the lines.
srclines = ((l.file, l.data) for l in preprocessed if l.kind == 'source')
- for item in _parse(srclines):
+ for item in _parse(srclines, **srckwargs):
if match_kind is not None and not match_kind(item.kind):
continue
if not item.filename:
@@ -38,6 +48,22 @@ def _parse_file(filename, match_kind, get_file_preprocessor):
yield item


+def _resolve_max_size(filename, maxsizes):
+ for pattern, maxsize in (maxsizes.items() if maxsizes else ()):
+ if _match_glob(filename, pattern):
+ break
+ else:
+ return None
+ if not maxsize:
+ return None, None
+ maxtext, maxlines = maxsize
+ if maxtext is not None:
+ maxtext = int(maxtext)
+ if maxlines is not None:
+ maxlines = int(maxlines)
+ return maxtext, maxlines
+
+
def parse_signature(text):
raise NotImplementedError

diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py
index df70aae66b776..b5eae2ed92d0d 100644
--- a/Tools/c-analyzer/c_parser/parser/__init__.py
+++ b/Tools/c-analyzer/c_parser/parser/__init__.py
@@ -120,12 +120,12 @@
from ._info import SourceInfo


-def parse(srclines):
+def parse(srclines, **srckwargs):
if isinstance(srclines, str): # a filename
raise NotImplementedError

anon_name = anonymous_names()
- for result in _parse(srclines, anon_name):
+ for result in _parse(srclines, anon_name, **srckwargs):
yield ParsedItem.from_raw(result)


@@ -152,17 +152,19 @@ def anon_name(prefix='anon-'):
_logger = logging.getLogger(__name__)


-def _parse(srclines, anon_name):
+def _parse(srclines, anon_name, **srckwargs):
from ._global import parse_globals

- source = _iter_source(srclines)
- #source = _iter_source(srclines, showtext=True)
+ source = _iter_source(srclines, **srckwargs)
for result in parse_globals(source, anon_name):
# XXX Handle blocks here instead of in parse_globals().
yield result


-def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
+# We use defaults that cover most files. Files with bigger declarations
+# are covered elsewhere (MAX_SIZES in cpython/_parser.py).
+
+def _iter_source(lines, *, maxtext=10_000, maxlines=200, showtext=False):
maxtext = maxtext if maxtext and maxtext > 0 else None
maxlines = maxlines if maxlines and maxlines > 0 else None
filestack = []
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
index bfc61949a76e4..55aa8752dce72 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
@@ -43,7 +43,7 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
def process_args(args, *, argv):
ns = vars(args)

- process_fail_arg(args, argv)
+ process_fail_arg(args, argv=argv)
ignore_exc = ns.pop('ignore_exc')
# We later pass ignore_exc to _get_preprocessor().

diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 06ec871ba75e3..be331d50427d5 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -111,6 +111,7 @@ def cmd_parse(filenames=None, **kwargs):
c_parser.cmd_parse(
filenames,
relroot=REPO_ROOT,
+ file_maxsizes=_parser.MAX_SIZES,
**kwargs
)

@@ -127,6 +128,7 @@ def cmd_check(filenames=None, **kwargs):
relroot=REPO_ROOT,
_analyze=_analyzer.analyze,
_CHECKS=CHECKS,
+ file_maxsizes=_parser.MAX_SIZES,
**kwargs
)

@@ -141,6 +143,7 @@ def cmd_analyze(filenames=None, **kwargs):
relroot=REPO_ROOT,
_analyze=_analyzer.analyze,
formats=formats,
+ file_maxsizes=_parser.MAX_SIZES,
**kwargs
)

diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index 8526b2af15a23..90b470c8196c0 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -14,6 +14,10 @@
GLOB_ALL = '**/*'


+def _abs(relfile):
+ return os.path.join(REPO_ROOT, relfile)
+
+
def clean_lines(text):
"""Clear out comments, blank lines, and leading/trailing whitespace."""
lines = (line.strip() for line in text.splitlines())
@@ -22,7 +26,7 @@ def clean_lines(text):
if line and not line.startswith('#'))
glob_all = f'{GLOB_ALL} '
lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
- lines = (os.path.join(REPO_ROOT, line) for line in lines)
+ lines = (_abs(line) for line in lines)
return list(lines)


@@ -55,26 +59,31 @@ def clean_lines(text):

# Windows
Modules/_winapi.c # windows.h
+Modules/expat/winconfig.h
Modules/overlapped.c # winsock.h
Python/dynload_win.c # windows.h
-Modules/expat/winconfig.h
Python/thread_nt.h

# other OS-dependent
+Python/dynload_aix.c # sys/ldr.h
Python/dynload_dl.c # dl.h
Python/dynload_hpux.c # dl.h
-Python/dynload_aix.c # sys/ldr.h
Python/thread_pthread.h

# only huge constants (safe but parsing is slow)
+Modules/_blake2/impl/blake2-kat.h
Modules/_ssl_data.h
+Modules/_ssl_data_300.h
+Modules/_ssl_data_111.h
+Modules/cjkcodecs/mappings_*.h
Modules/unicodedata_db.h
Modules/unicodename_db.h
-Modules/cjkcodecs/mappings_*.h
Objects/unicodetype_db.h
-Python/importlib.h
-Python/importlib_external.h
-Python/importlib_zipimport.h
+
+# generated
+Python/frozen_modules/*.h
+Python/opcode_targets.h
+Python/stdlib_module_names.h

# @end=conf@
''')
@@ -126,35 +135,40 @@ def clean_lines(text):
Parser/**/*.c Py_BUILD_CORE 1
Objects/**/*.c Py_BUILD_CORE 1

-Modules/faulthandler.c Py_BUILD_CORE 1
+Modules/_asynciomodule.c Py_BUILD_CORE 1
+Modules/_collectionsmodule.c Py_BUILD_CORE 1
+Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1
+Modules/_ctypes/cfield.c Py_BUILD_CORE 1
+Modules/_cursesmodule.c Py_BUILD_CORE 1
+Modules/_datetimemodule.c Py_BUILD_CORE 1
Modules/_functoolsmodule.c Py_BUILD_CORE 1
-Modules/gcmodule.c Py_BUILD_CORE 1
-Modules/getpath.c Py_BUILD_CORE 1
+Modules/_heapqmodule.c Py_BUILD_CORE 1
Modules/_io/*.c Py_BUILD_CORE 1
-Modules/itertoolsmodule.c Py_BUILD_CORE 1
Modules/_localemodule.c Py_BUILD_CORE 1
-Modules/main.c Py_BUILD_CORE 1
-Modules/posixmodule.c Py_BUILD_CORE 1
-Modules/signalmodule.c Py_BUILD_CORE 1
+Modules/_operator.c Py_BUILD_CORE 1
+Modules/_posixsubprocess.c Py_BUILD_CORE 1
+Modules/_sre.c Py_BUILD_CORE 1
Modules/_threadmodule.c Py_BUILD_CORE 1
Modules/_tracemalloc.c Py_BUILD_CORE 1
-Modules/_asynciomodule.c Py_BUILD_CORE 1
-Modules/mathmodule.c Py_BUILD_CORE 1
-Modules/cmathmodule.c Py_BUILD_CORE 1
Modules/_weakref.c Py_BUILD_CORE 1
+Modules/_zoneinfo.c Py_BUILD_CORE 1
+Modules/atexitmodule.c Py_BUILD_CORE 1
+Modules/cmathmodule.c Py_BUILD_CORE 1
+Modules/faulthandler.c Py_BUILD_CORE 1
+Modules/gcmodule.c Py_BUILD_CORE 1
+Modules/getpath.c Py_BUILD_CORE 1
+Modules/itertoolsmodule.c Py_BUILD_CORE 1
+Modules/main.c Py_BUILD_CORE 1
+Modules/mathmodule.c Py_BUILD_CORE 1
+Modules/posixmodule.c Py_BUILD_CORE 1
Modules/sha256module.c Py_BUILD_CORE 1
Modules/sha512module.c Py_BUILD_CORE 1
-Modules/_datetimemodule.c Py_BUILD_CORE 1
-Modules/_ctypes/cfield.c Py_BUILD_CORE 1
-Modules/_heapqmodule.c Py_BUILD_CORE 1
-Modules/_posixsubprocess.c Py_BUILD_CORE 1
-Modules/_sre.c Py_BUILD_CORE 1
-Modules/_collectionsmodule.c Py_BUILD_CORE 1
-Modules/_zoneinfo.c Py_BUILD_CORE 1
+Modules/signalmodule.c Py_BUILD_CORE 1
+Modules/symtablemodule.c Py_BUILD_CORE 1
+Modules/timemodule.c Py_BUILD_CORE 1
Modules/unicodedata.c Py_BUILD_CORE 1
-Modules/_cursesmodule.c Py_BUILD_CORE 1
-Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1
Objects/stringlib/codecs.h Py_BUILD_CORE 1
+Objects/stringlib/unicode_format.h Py_BUILD_CORE 1
Python/ceval_gil.h Py_BUILD_CORE 1
Python/condvar.h Py_BUILD_CORE 1

@@ -244,6 +258,7 @@ def clean_lines(text):
Modules/sre_lib.h LOCAL(type) static inline type
Modules/sre_lib.h SRE(F) sre_ucs2_##F
Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1
+Include/internal/pycore_bitutils.h _Py__has_builtin(B) 0

# @end=tsv@
''')[1:]
@@ -264,6 +279,18 @@ def clean_lines(text):
'./Include/cpython/',
]

+MAX_SIZES = {
+ _abs('Include/**/*.h'): (5_000, 500),
+ _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
+ _abs('Modules/_datetimemodule.c'): (20_000, 300),
+ _abs('Modules/posixmodule.c'): (20_000, 500),
+ _abs('Modules/termios.c'): (10_000, 800),
+ _abs('Modules/_testcapimodule.c'): (20_000, 400),
+ _abs('Modules/expat/expat.h'): (10_000, 400),
+ _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
+ _abs('Objects/typeobject.c'): (20_000, 200),
+}
+

def get_preprocessor(*,
file_macros=None,
@@ -298,6 +325,7 @@ def parse_file(filename, *,
filename,
match_kind=match_kind,
get_file_preprocessor=get_file_preprocessor,
+ file_maxsizes=MAX_SIZES,
)


@@ -317,5 +345,6 @@ def parse_files(filenames=None, *,
filenames,
match_kind=match_kind,
get_file_preprocessor=get_file_preprocessor,
+ file_maxsizes=MAX_SIZES,
**file_kwargs
)

_______________________________________________
Python-checkins mailing list
Python-checkins@python.org
https://mail.python.org/mailman/listinfo/python-checkins