Add an equivalent to re groups() method. This is useful on debug messages. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 2816bd9f90f8..19e777e2c97e 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -106,6 +106,13 @@ class KernRe: return self.last_match.group(num) + def groups(self): + """ + Returns the group results of the last match + """ + + return self.last_match.groups() + class NestedMatch: """ -- 2.52.0 The logic which checks if the line ends with ";" is currently broken: it may try to read past the buffer. Fix it by checking before trying to access line[pos]. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 19e777e2c97e..a0402c065d3a 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -265,7 +265,7 @@ class NestedMatch: out += new_sub # Drop end ';' if any - if line[pos] == ';': + if pos < len(line) and line[pos] == ';': pos += 1 cur_pos = pos -- 2.52.0 Just like functions and structs had their transform variables placed at the beginning, move variable transforms to there as well. No functional changes. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index fd57944ae907..0b68b140cd02 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -191,6 +191,18 @@ function_xforms = [ (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] +# +# Transforms for variable prototypes +# +var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + (KernRe(r"=.*"), ""), +] + # # Ancillary functions # @@ -971,15 +983,6 @@ class KernelDoc: ] OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] - # # Store the full prototype before modifying it # @@ -1003,7 +1006,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: + for r, sub in var_xforms: proto = r.sub(sub, proto) proto = proto.rstrip() -- 2.52.0 Mangling with #defines is not nice, as we may end removing the macro names, preventing several macros from being properly documented. Also, on defines, we have something like: #define foo(a1, a2, a3, ...) \ /* some real implementation */ The prototype part (first line on this example) won't contain any macros, so no need to apply any regexes on it. With that, move the apply_transforms() logic to ensure that it will be called only on functions. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 0b68b140cd02..3ba2cda2487a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -163,7 +163,7 @@ struct_nested_prefixes = [ # # Transforms for function prototypes # -function_xforms = [ +function_xforms = [ (KernRe(r"^static +"), ""), (KernRe(r"^extern +"), ""), (KernRe(r"^asmlinkage +"), ""), @@ -1065,10 +1065,7 @@ class KernelDoc: found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1087,6 +1084,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) -- 2.52.0 Some annotations macros may have nested parenthesis, causing normal regex parsing to fail. Extend apply_transforms to also use NestedMatch and add support for nested functions. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 38 ++++++++++++++++++---------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 3ba2cda2487a..ae5b2ef80f75 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -152,7 +152,7 @@ struct_xforms = [ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), ] # -# Regexes here are guaranteed to have the end delimiter matching +# Struct regexes here are guaranteed to have the end delimiter matching # the start delimiter. Yet, right now, only one replace group # is allowed. # @@ -160,6 +160,13 @@ struct_nested_prefixes = [ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] +# +# Function Regexes here are guaranteed to have the end delimiter matching +# the start delimiter. +# +function_nested_prefixes = [ +] + # # Transforms for function prototypes # @@ -207,13 +214,6 @@ var_xforms = [ # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text multi_space = KernRe(r'\s\s+') def trim_whitespace(s): @@ -408,6 +408,8 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] + self.nested = NestedMatch() + # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -505,6 +507,16 @@ class KernelDoc: # State flags self.state = state.NORMAL + def apply_transforms(self, regex_xforms, nested_xforms, text): + """Apply a set of transforms to a block of text.""" + for search, subst in regex_xforms: + text = search.sub(subst, text) + + for search, sub in nested_xforms: + text = self.nested.sub(search, sub, text) + + return text.strip() + def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): """ @@ -881,11 +893,9 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.apply_transforms(struct_xforms, struct_nested_prefixes, + members) - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # @@ -1088,7 +1098,9 @@ class KernelDoc: # # Apply the initial transformations. # - prototype = apply_transforms(function_xforms, prototype) + prototype = self.apply_transforms(function_xforms, + function_nested_prefixes, + prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) -- 2.52.0 Some annotations macros may have nested parenthesis, causing normal regex parsing to fail. The __attribute__ regex is currently very complex to try to avoid that, but it doesn't catch all cases. Ensure that the parenthesis will be properly handled by using the NestedMatch() logic. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ae5b2ef80f75..64165d8df84e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -165,6 +165,7 @@ struct_nested_prefixes = [ # the start delimiter. # function_nested_prefixes = [ + (re.compile(r"__attribute__\s*\("), ""), ] # @@ -195,7 +196,6 @@ function_xforms = [ (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] # -- 2.52.0 The regular expressions meant to pick variable types are too naive: they forgot that the type word may contain underlines. Co-developed-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 64165d8df84e..201c4f7298d7 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1027,14 +1027,14 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- 2.52.0 The indentation is wrong for the second regex, which causes problems on variables with defaults. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 201c4f7298d7..cbfdaba39494 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1035,9 +1035,9 @@ class KernelDoc: default_val = r.group(2) else: r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return -- 2.52.0 This is a new parser that we're still fine-tuning. Add some extra debug messages to help addressing issues over there. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index cbfdaba39494..ccee4e0bcaab 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1033,11 +1033,19 @@ class KernelDoc: declaration_name = r.group(1) default_val = r.group(2) + + self.config.log.debug("Variable proto parser: %s from '%s'", + r.groups(), proto) + else: r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) + + if default_val: + self.config.log.debug("default: '%s'", default_val) + if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return @@ -1045,6 +1053,9 @@ class KernelDoc: if default_val: default_val = default_val.lstrip("=").strip() + self.config.log.debug("'%s' variable prototype: '%s', default: %s", + declaration_name, proto, default_val) + self.output_declaration("var", declaration_name, full_proto=full_proto, default_val=default_val, -- 2.52.0 If we do that, the defaults won't be parsed. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ccee4e0bcaab..0b6cba442d72 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -207,7 +207,6 @@ var_xforms = [ (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), ] # -- 2.52.0 The regular expression currently expects a single word for the type, but it may be something like "struct foo". Add support for it. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 0b6cba442d72..21cc4e19a1e8 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1026,7 +1026,7 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) @@ -1037,7 +1037,7 @@ class KernelDoc: r.groups(), proto) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- 2.52.0 From: Randy Dunlap Drop all context analysis and lock (tracking) attributes to avoid kernel-doc warnings. Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96] int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex) ------------------------------------------------------------------------------------------------^ Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92] int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock) --------------------------------------------------------------------------------------------^ The regex is suggested by Mauro; mine was too greedy. Thanks. Updated context analysis and lock macros list provided by PeterZ. Thanks. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/ Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_parser.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 21cc4e19a1e8..92b550189988 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -81,6 +81,8 @@ struct_xforms = [ (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), (KernRe(r'\s*__packed\s*', re.S), ' '), (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*__private', re.S), ' '), @@ -165,6 +167,16 @@ struct_nested_prefixes = [ # the start delimiter. # function_nested_prefixes = [ + (re.compile(r"__cond_acquires\s*\("), ""), + (re.compile(r"__cond_releases\s*\("), ""), + (re.compile(r"__acquires\s*\("), ""), + (re.compile(r"__releases\s*\("), ""), + (re.compile(r"__must_hold\s*\("), ""), + (re.compile(r"__must_not_hold\s*\("), ""), + (re.compile(r"__must_hold_shared\s*\("), ""), + (re.compile(r"__cond_acquires_shared\s*\("), ""), + (re.compile(r"__acquires_shared\s*\("), ""), + (re.compile(r"__releases_shared\s*\("), ""), (re.compile(r"__attribute__\s*\("), ""), ] @@ -195,6 +207,7 @@ function_xforms = [ (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), ] @@ -204,6 +217,8 @@ function_xforms = [ var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- 2.52.0 Convert LIST_HEAD into struct list_head when handling its prototype. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 92b550189988..33710c4be145 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -219,6 +219,7 @@ var_xforms = [ (KernRe(r"__ro_after_init"), ""), (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- 2.52.0 From: Randy Dunlap Parse the macro VIRTIO_DECLARE_FEATURES(name) and expand it to its definition. These prevents one build warning: WARNING: include/linux/virtio.h:188 struct member 'VIRTIO_DECLARE_FEATURES(features' not described in 'virtio_device' Signed-off-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 33710c4be145..db140363104a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -152,6 +152,7 @@ struct_xforms = [ struct_args_pattern + r'\)', re.S), r'\1 \2[]'), (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), ] # # Struct regexes here are guaranteed to have the end delimiter matching -- 2.52.0 The logic inside NestedMatch currently doesn't consider that function arguments may have chars and strings, which may eventually contain delimiters. Add logic to handle strings and escape characters on them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index a0402c065d3a..1861799f1966 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -195,6 +195,8 @@ class NestedMatch: for match_re in regex.finditer(line): start = match_re.start() offset = match_re.end() + string_char = None + escape = False d = line[offset - 1] if d not in self.DELIMITER_PAIRS: @@ -208,6 +210,22 @@ class NestedMatch: d = line[pos] + if escape: + escape = False + continue + + if string_char: + if d == '\\': + escape = True + elif d == string_char: + string_char = None + + continue + + if d in ('"', "'"): + string_char = d + continue + if d in self.DELIMITER_PAIRS: end = self.DELIMITER_PAIRS[d] -- 2.52.0 the __repr__() function is used by autodoc to document macro initialization. Add a better representation for them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 1861799f1966..3f405addcc58 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -52,7 +52,28 @@ class KernRe: return self.regex.pattern def __repr__(self): - return f're.compile("{self.regex.pattern}")' + """ + Returns a displayable version of the class init. + """ + + flag_map = { + re.IGNORECASE: "re.I", + re.MULTILINE: "re.M", + re.DOTALL: "re.S", + re.VERBOSE: "re.X", + } + + flags = [] + for flag, name in flag_map.items(): + if self.regex.flags & flag: + flags.append(name) + + flags_name = " | ".join(flags) + + if flags_name: + return f'KernRe("{self.regex.pattern}", {flags_name})' + else: + return f'KernRe("{self.regex.pattern}")' def __add__(self, other): """ -- 2.52.0 Store delimiters and its regex-compiled version as const vars. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 35 ++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 3f405addcc58..7b7ddc50ac36 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -99,6 +99,13 @@ class KernRe: self.last_match = self.regex.search(string) return self.last_match + def finditer(self, string): + """ + Alias to re.finditer. + """ + + return self.regex.finditer(string) + def findall(self, string): """ Alias to re.findall. @@ -134,6 +141,16 @@ class KernRe: return self.last_match.groups() +#: Nested delimited pairs (brackets and parenthesis) +DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', +} + +#: compiled delimiters +RE_DELIM = KernRe(r'[\{\}\[\]\(\)]') + class NestedMatch: """ @@ -183,14 +200,6 @@ class NestedMatch: # # FOO(arg1, arg2, arg3) - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - def _search(self, regex, line): """ Finds paired blocks for a regex that ends with a delimiter. @@ -220,13 +229,13 @@ class NestedMatch: escape = False d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: + if d not in DELIMITER_PAIRS: continue - end = self.DELIMITER_PAIRS[d] + end = DELIMITER_PAIRS[d] stack.append(end) - for match in self.RE_DELIM.finditer(line[offset:]): + for match in RE_DELIM.finditer(line[offset:]): pos = match.start() + offset d = line[pos] @@ -247,8 +256,8 @@ class NestedMatch: string_char = d continue - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] + if d in DELIMITER_PAIRS: + end = DELIMITER_PAIRS[d] stack.append(end) continue -- 2.52.0 Future patches will allow parsing each argument instead of the hole set. Prepare for it by changing the replace all args from \1 to \0. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/kdoc_re.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index db140363104a..4d52a00acfad 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -160,7 +160,7 @@ struct_xforms = [ # is allowed. # struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), ] # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 7b7ddc50ac36..8933e1a62776 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -289,8 +289,9 @@ class NestedMatch: It matches a regex that it is followed by a delimiter, replacing occurrences only if all delimiters are paired. - if r'\1' is used, it works just like re: it places there the - matched paired data with the delimiter stripped. + if r'\0' is used, it works on a similar way of using re.group(0): + it places the entire args of the matched paired data, with the + delimiter stripped. If count is different than zero, it will replace at most count items. @@ -306,9 +307,9 @@ class NestedMatch: # Value, ignoring start/end delimiters value = line[end:pos - 1] - # replaces \1 at the sub string, if \1 is used there + # replaces \0 at the sub string, if \0 is used there new_sub = sub - new_sub = new_sub.replace(r'\1', value) + new_sub = new_sub.replace(r'\0', value) out += new_sub -- 2.52.0 Instead of using re_compile, let's create the class with the regex and use KernRe to keep it cached. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 55 ++++++++-------------------- tools/lib/python/kdoc/kdoc_re.py | 22 ++++++++--- 2 files changed, 32 insertions(+), 45 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4d52a00acfad..3a5614106af7 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -153,32 +153,7 @@ struct_xforms = [ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), -] -# -# Struct regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), -] - -# -# Function Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. -# -function_nested_prefixes = [ - (re.compile(r"__cond_acquires\s*\("), ""), - (re.compile(r"__cond_releases\s*\("), ""), - (re.compile(r"__acquires\s*\("), ""), - (re.compile(r"__releases\s*\("), ""), - (re.compile(r"__must_hold\s*\("), ""), - (re.compile(r"__must_not_hold\s*\("), ""), - (re.compile(r"__must_hold_shared\s*\("), ""), - (re.compile(r"__cond_acquires_shared\s*\("), ""), - (re.compile(r"__acquires_shared\s*\("), ""), - (re.compile(r"__releases_shared\s*\("), ""), - (re.compile(r"__attribute__\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] # @@ -210,6 +185,17 @@ function_xforms = [ (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r"__attribute__\s*\("), ""), ] # @@ -230,7 +216,6 @@ var_xforms = [ # Ancillary functions # - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -424,8 +409,6 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - self.nested = NestedMatch() - # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -523,14 +506,11 @@ class KernelDoc: # State flags self.state = state.NORMAL - def apply_transforms(self, regex_xforms, nested_xforms, text): + def apply_transforms(self, xforms, text): """Apply a set of transforms to a block of text.""" - for search, subst in regex_xforms: + for search, subst in xforms: text = search.sub(subst, text) - for search, sub in nested_xforms: - text = self.nested.sub(search, sub, text) - return text.strip() def push_parameter(self, ln, decl_type, param, dtype, @@ -909,8 +889,7 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - members = self.apply_transforms(struct_xforms, struct_nested_prefixes, - members) + members = self.apply_transforms(struct_xforms, members) # # Deal with embedded struct and union members, and drop enums entirely. @@ -1125,9 +1104,7 @@ class KernelDoc: # # Apply the initial transformations. # - prototype = self.apply_transforms(function_xforms, - function_nested_prefixes, - prototype) + prototype = self.apply_transforms(function_xforms, prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 8933e1a62776..e34d55c25680 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -200,7 +200,10 @@ class NestedMatch: # # FOO(arg1, arg2, arg3) - def _search(self, regex, line): + def __init__(self, regex): + self.regex = KernRe(regex) + + def _search(self, line): """ Finds paired blocks for a regex that ends with a delimiter. @@ -222,7 +225,7 @@ class NestedMatch: stack = [] - for match_re in regex.finditer(line): + for match_re in self.regex.finditer(line): start = match_re.start() offset = match_re.end() string_char = None @@ -270,7 +273,7 @@ class NestedMatch: yield start, offset, pos + 1 break - def search(self, regex, line): + def search(self, line): """ This is similar to re.search: @@ -278,11 +281,11 @@ class NestedMatch: returning occurrences only if all delimiters are paired. """ - for t in self._search(regex, line): + for t in self._search(line): yield line[t[0]:t[2]] - def sub(self, regex, sub, line, count=0): + def sub(self, sub, line, count=0): """ This is similar to re.sub: @@ -301,7 +304,7 @@ class NestedMatch: cur_pos = 0 n = 0 - for start, end, pos in self._search(regex, line): + for start, end, pos in self._search(line): out += line[cur_pos:start] # Value, ignoring start/end delimiters @@ -328,3 +331,10 @@ class NestedMatch: out += line[cur_pos:l] return out + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'NestedMatch("{self.regex.regex.pattern}")' -- 2.52.0 Currently, NestedMatch has very limited support for aguments replacement: it is all or nothing. Add support to allow replacing individual arguments as well. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 84 ++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 25 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index e34d55c25680..858cc688a58f 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -177,29 +177,6 @@ class NestedMatch: will ignore the search string. """ - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter-aligned. - # - # The content inside parentheses is converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to: - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - def __init__(self, regex): self.regex = KernRe(regex) @@ -285,6 +262,59 @@ class NestedMatch: yield line[t[0]:t[2]] + @staticmethod + def _split_args(all_args, delim=","): + """ + Helper method to split comma-separated function arguments + or struct elements, if delim is set to ";". + + It returns a list of arguments that can be used later on by + the sub() method. + """ + args = [all_args] + stack = [] + arg_start = 0 + string_char = None + escape = False + + for idx, d in enumerate(all_args): + if escape: + escape = False + continue + + if string_char: + if d == '\\': + escape = True + elif d == string_char: + string_char = None + + continue + + if d in ('"', "'"): + string_char = d + continue + + if d in DELIMITER_PAIRS: + end = DELIMITER_PAIRS[d] + + stack.append(end) + continue + + if stack and d == stack[-1]: + stack.pop() + continue + + if d == delim and not stack: + args.append(all_args[arg_start:idx].strip()) + arg_start = idx + 1 + + # Add the last argument (if any) + last = all_args[arg_start:].strip() + if last: + args.append(last) + + return args + def sub(self, sub, line, count=0): """ This is similar to re.sub: @@ -310,9 +340,13 @@ class NestedMatch: # Value, ignoring start/end delimiters value = line[end:pos - 1] - # replaces \0 at the sub string, if \0 is used there + # replace arguments new_sub = sub - new_sub = new_sub.replace(r'\0', value) + if "\\" in sub: + args = self._split_args(value) + + new_sub = re.sub(r'\\(\d+)', + lambda m: args[int(m.group(1))], new_sub) out += new_sub -- 2.52.0 Instead of converting them on two steps, implement a single logic to parse them using the new sub functionality of NestedMatch.sub(). Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 3a5614106af7..d2eb93f9d489 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -124,10 +124,11 @@ struct_xforms = [ # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + (NestedMatch(r'\bstruct_group\s*\('), r'\2'), + (NestedMatch(r'\bstruct_group_attr\s*\('), r'\3'), + (NestedMatch(r'\bstruct_group_tagged\s*\('), r'struct \1 { \3 } \2;'), + (NestedMatch(r'\b__struct_group\s*\('), r'\4'), + # # Replace macros # @@ -153,7 +154,6 @@ struct_xforms = [ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] # -- 2.52.0 The struct page_pool_params definition has a private definition on it: struct page_pool_params { struct_group_tagged(page_pool_params_fast, fast, unsigned int order; unsigned int pool_size; int nid; struct device *dev; struct napi_struct *napi; enum dma_data_direction dma_dir; unsigned int max_len; unsigned int offset; ); struct_group_tagged(page_pool_params_slow, slow, struct net_device *netdev; unsigned int queue_idx; unsigned int flags; /* private: used by test code only */ void (*init_callback)(netmem_ref netmem, void *arg); void *init_arg; ); }; This makes kernel-doc parser to miss the end parenthesis of the second struct_group_tagged, causing documentation issues. Address it by ensuring that, if are there anything at the stack, it will be placed as the last part of the argument. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 858cc688a58f..5f455ffff7b2 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -201,6 +201,9 @@ class NestedMatch: """ stack = [] + start = 0 + offset = 0 + pos = 0 for match_re in self.regex.finditer(line): start = match_re.start() @@ -250,6 +253,11 @@ class NestedMatch: yield start, offset, pos + 1 break + # When /* private */ is used, it may end the end delimiterq + if stack: + stack.pop() + yield start, offset, len(line) + 1 + def search(self, line): """ This is similar to re.search: -- 2.52.0 Add a more convenient class to match C functions and avoiding issues at the beginning and ending of NestedMatch inits. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 5f455ffff7b2..a49b42e3d189 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -380,3 +380,14 @@ class NestedMatch: """ return f'NestedMatch("{self.regex.regex.pattern}")' + + +class CFunction(NestedMatch): + r""" + Variant of NestedMatch. + + It overrides the init method to ensure that the regular expression will + start with a ``\b`` and end with a C function delimiter (open parenthesis). + """ + def __init__(self, regex): + self.regex = KernRe(r"\b" + regex + r"\s*\(") -- 2.52.0 The match logic for transforms becomes a lot clearer if we use CFunction convenient alias class instead of NestedMatch. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index d2eb93f9d489..50d57c6799bb 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,7 @@ import sys import re from pprint import pformat -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_re import CFunction, KernRe from kdoc.kdoc_item import KdocItem # @@ -119,22 +119,22 @@ struct_xforms = [ # # As it doesn't properly match the end parenthesis on some cases. # - # So, a better solution was crafted: there's now a NestedMatch + # So, a better solution was crafted: there's now a CFunction # class that ensures that delimiters after a search are properly # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. # - (NestedMatch(r'\bstruct_group\s*\('), r'\2'), - (NestedMatch(r'\bstruct_group_attr\s*\('), r'\3'), - (NestedMatch(r'\bstruct_group_tagged\s*\('), r'struct \1 { \3 } \2;'), - (NestedMatch(r'\b__struct_group\s*\('), r'\4'), + (CFunction('struct_group'), r'\2'), + (CFunction('struct_group_attr'), r'\3'), + (CFunction('struct_group_tagged'), r'struct \1 { \3 } \2;'), + (CFunction('__struct_group'), r'\4'), # # Replace macros # - # TODO: use NestedMatch for FOO($1, $2, ...) matches + # TODO: use CFunction on all FOO($1, $2, ...) matches # - # it is better to also move those to the NestedMatch logic, + # it is better to also move those to the CFunction logic, # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), @@ -185,17 +185,17 @@ function_xforms = [ (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r"__attribute__\s*\("), ""), + (CFunction("__cond_acquires"), ""), + (CFunction("__cond_releases"), ""), + (CFunction("__acquires"), ""), + (CFunction("__releases"), ""), + (CFunction("__must_hold"), ""), + (CFunction("__must_not_hold"), ""), + (CFunction("__must_hold_shared"), ""), + (CFunction("__cond_acquires_shared"), ""), + (CFunction("__acquires_shared"), ""), + (CFunction("__releases_shared"), ""), + (CFunction("__attribute__"), ""), ] # -- 2.52.0 While the previous version does a better job representing the actual struct, it ends losing documentation from each member. Change the replacements to minimize such changes. With that, the only differences before/after using NestedMatch new replacement logic are (at man page output): --- before.log 2026-01-29 06:14:20.163592584 +0100 +++ after.log 2026-01-29 06:32:04.811370234 +0100 @@ -1573701 +1573701 @@ -.BI " struct ice_health_tx_hang_buf tx_hang_buf;" +.BI " struct ice_health_tx_hang_buf tx_hang_buf;" @@ -4156451 +4156451 @@ -.BI " struct libeth_fq_fp fp;" +.BI " struct libeth_fq_fp fp;" @@ -4164041 +4164041 @@ -.BI " struct libeth_xskfq_fp fp;" +.BI " struct libeth_xskfq_fp fp;" @@ -4269434 +4269434 @@ -.BI " struct page_pool_params_fast fast;" +.BI " struct page_pool_params_fast fast;" @@ -4269452 +4269452 @@ -.BI " struct page_pool_params_slow slow;" +.BI " struct page_pool_params_slow slow;" @@ -4269454 +4269454 @@ -.BI " STRUCT_GROUP( struct net_device *netdev;" +.BI " struct net_device *netdev;" e.g. basically whitespaces, plus a fix NestedMatch to better handle /* private */ comments. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 50d57c6799bb..1e8e156e2a9e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -126,7 +126,7 @@ struct_xforms = [ # (CFunction('struct_group'), r'\2'), (CFunction('struct_group_attr'), r'\3'), - (CFunction('struct_group_tagged'), r'struct \1 { \3 } \2;'), + (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), (CFunction('__struct_group'), r'\4'), # -- 2.52.0 Over the time, most of the changes at kernel-doc are related to maintaining a list of transforms to convert macros into pure C code. Place such transforms on a separate module, to cleanup the parser module. While here, drop the now obsolete comment about the two-steps logic to handle struct_group macros. There is an advantage on that: QEMU also uses our own kernel-doc, but the xforms list there is different. By placing it on a separate module, we can minimize the differences and make it easier to keep QEMU in sync with Kernel upstream. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- Documentation/tools/kdoc_parser.rst | 8 ++ tools/lib/python/kdoc/kdoc_files.py | 3 +- tools/lib/python/kdoc/kdoc_parser.py | 147 ++------------------------ tools/lib/python/kdoc/xforms_lists.py | 117 ++++++++++++++++++++ 4 files changed, 133 insertions(+), 142 deletions(-) create mode 100644 tools/lib/python/kdoc/xforms_lists.py diff --git a/Documentation/tools/kdoc_parser.rst b/Documentation/tools/kdoc_parser.rst index 03ee54a1b1cc..55b202173195 100644 --- a/Documentation/tools/kdoc_parser.rst +++ b/Documentation/tools/kdoc_parser.rst @@ -4,6 +4,14 @@ Kernel-doc parser stage ======================= +C replacement rules used by the parser +====================================== + +.. automodule:: lib.python.kdoc.xforms_lists + :members: + :show-inheritance: + :undoc-members: + File handler classes ==================== diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 022487ea2cc6..7357c97a4b01 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -15,6 +15,7 @@ import os import re from kdoc.kdoc_parser import KernelDoc +from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat @@ -117,7 +118,7 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname) + doc = KernelDoc(self.config, fname, CTransforms) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 1e8e156e2a9e..a280fe581937 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,142 +75,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # struct_args_pattern = r'([^,)]+)' -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a CFunction - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (CFunction('struct_group'), r'\2'), - (CFunction('struct_group_attr'), r'\3'), - (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), - (CFunction('__struct_group'), r'\4'), - - # - # Replace macros - # - # TODO: use CFunction on all FOO($1, $2, ...) matches - # - # it is better to also move those to the CFunction logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__no_context_analysis\s*"), ""), - (KernRe(r"__attribute_const__ +"), ""), - (CFunction("__cond_acquires"), ""), - (CFunction("__cond_releases"), ""), - (CFunction("__acquires"), ""), - (CFunction("__releases"), ""), - (CFunction("__must_hold"), ""), - (CFunction("__must_not_hold"), ""), - (CFunction("__must_hold_shared"), ""), - (CFunction("__cond_acquires_shared"), ""), - (CFunction("__acquires_shared"), ""), - (CFunction("__releases_shared"), ""), - (CFunction("__attribute__"), ""), -] - -# -# Transforms for variable prototypes -# -var_xforms = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), -] # # Ancillary functions @@ -394,11 +258,12 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms # Initial state for the state machines self.state = state.NORMAL @@ -889,7 +754,7 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - members = self.apply_transforms(struct_xforms, members) + members = self.apply_transforms(self.xforms.struct_xforms, members) # # Deal with embedded struct and union members, and drop enums entirely. @@ -1011,8 +876,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in var_xforms: - proto = r.sub(sub, proto) + proto = self.apply_transforms(self.xforms.var_xforms, proto) proto = proto.rstrip() @@ -1104,7 +968,8 @@ class KernelDoc: # # Apply the initial transformations. # - prototype = self.apply_transforms(function_xforms, prototype) + prototype = self.apply_transforms(self.xforms.function_xforms, + prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..88968bafdb78 --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . + +import re + +from kdoc.kdoc_re import CFunction, KernRe + +struct_args_pattern = r'([^,)]+)' + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + #: Transforms for structs and unions + struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + + (CFunction('struct_group'), r'\2'), + (CFunction('struct_group_attr'), r'\3'), + (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), + (CFunction('__struct_group'), r'\4'), + + # + # Replace macros + # + # TODO: use CFunction on all FOO($1, $2, ...) matches + # + # it is better to also move those to the CFunction logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + ] + + #: Transforms for function prototypes + function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__no_context_analysis\s*"), ""), + (KernRe(r"__attribute_const__ +"), ""), + (CFunction("__cond_acquires"), ""), + (CFunction("__cond_releases"), ""), + (CFunction("__acquires"), ""), + (CFunction("__releases"), ""), + (CFunction("__must_hold"), ""), + (CFunction("__must_not_hold"), ""), + (CFunction("__must_hold_shared"), ""), + (CFunction("__cond_acquires_shared"), ""), + (CFunction("__acquires_shared"), ""), + (CFunction("__releases_shared"), ""), + (CFunction("__attribute__"), ""), + ] + + #: Transforms for variables + var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] -- 2.52.0 Removing it causes the parse to break some conversions, when NestedMatch is used on macros like __attribute__(). Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov --- tools/lib/python/kdoc/kdoc_re.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index a49b42e3d189..294051dbc050 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -358,10 +358,6 @@ class NestedMatch: out += new_sub - # Drop end ';' if any - if pos < len(line) and line[pos] == ';': - pos += 1 - cur_pos = pos n += 1 -- 2.52.0 When NestedMatch is used, blank whitespaces may be placed after substitutions. As such spaces are part of the C syntax, we can safelly drop them, improving the quality of the output. Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_re.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 294051dbc050..886e33ffd2b9 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -341,8 +341,12 @@ class NestedMatch: cur_pos = 0 n = 0 + l = len(line) for start, end, pos in self._search(line): + while cur_pos < l and line[cur_pos] == ' ': + cur_pos += 1 + out += line[cur_pos:start] # Value, ignoring start/end delimiters @@ -365,7 +369,9 @@ class NestedMatch: break # Append the remaining string - l = len(line) + while cur_pos < l and line[cur_pos] == ' ': + cur_pos += 1 + out += line[cur_pos:l] return out -- 2.52.0 The new CFunction class handles better macros, as it works the same way C compilers do, handling delimiters tha right way. This allows removing complex regular expressions, placing instead just a simple one with the name(s) of the functions to be replaced. Doing a before/after check using "kernel-doc -man ." shows only cosmetic changes (whitespaces, mostly). Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/xforms_lists.py | 71 ++++++++++++--------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 88968bafdb78..ea6520b38ea2 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -17,51 +17,38 @@ class CTransforms: #: Transforms for structs and unions struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), + (CFunction("__attribute__"), ' '), + (CFunction('__aligned'), ' '), + (CFunction('__counted_by'), ' '), + (CFunction('__counted_by_(le|be)'), ' '), + (CFunction('__guarded_by'), ' '), + (CFunction('__pt_guarded_by'), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*__private', re.S), ' '), (KernRe(r'\s*__rcu', re.S), ' '), (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + + (CFunction('__cacheline_group_(begin|end)'), ''), (CFunction('struct_group'), r'\2'), (CFunction('struct_group_attr'), r'\3'), (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), (CFunction('__struct_group'), r'\4'), - # - # Replace macros - # - # TODO: use CFunction on all FOO($1, $2, ...) matches - # - # it is better to also move those to the CFunction logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + (CFunction('__ETHTOOL_DECLARE_LINK_MODE_MASK'), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (CFunction('DECLARE_PHY_INTERFACE_MASK',), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (CFunction('DECLARE_BITMAP'), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + + (CFunction('DECLARE_HASHTABLE'), r'unsigned long \1[1 << ((\2) - 1)]'), + (CFunction('DECLARE_KFIFO'), r'\2 *\1'), + (CFunction('DECLARE_KFIFO_PTR'), r'\2 *\1'), + (CFunction('(?:__)?DECLARE_FLEX_ARRAY'), r'\1 \2[]'), + (CFunction('DEFINE_DMA_UNMAP_ADDR'), r'dma_addr_t \1'), + (CFunction('DEFINE_DMA_UNMAP_LEN'), r'__u32 \1'), + (CFunction('VIRTIO_DECLARE_FEATURES'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), ] #: Transforms for function prototypes @@ -85,12 +72,14 @@ class CTransforms: (KernRe(r"__sched +"), ""), (KernRe(r"_noprof"), ""), (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), + + (CFunction('__printf'), ""), + (CFunction('__(?:re)?alloc_size'), ""), + (CFunction("__diagnose_as"), ""), + (CFunction("DECL_BUCKET_PARAMS"), r"\1, \2"), + (CFunction("__cond_acquires"), ""), (CFunction("__cond_releases"), ""), (CFunction("__acquires"), ""), @@ -108,9 +97,11 @@ class CTransforms: var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + + (CFunction('__guarded_by'), ""), + (CFunction('__pt_guarded_by'), ""), + (CFunction("LIST_HEAD"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- 2.52.0 While the main goal for kernel-doc is to be used inside the Linux Kernel, other open source projects could benefit for it. That's currently the case of QEMU, which has a fork, mainly due to two reasons: - they need an extra C function transform rule; - they handle the html output a little bit different. Add an extra optional argument to make easier for the code to be shared, as, with that, QEMU can just create a new derivated class that will contain its specific rulesets, and just copy the remaining kernel-doc files as-is. Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_files.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 7357c97a4b01..c35e033cf123 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -118,7 +118,7 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname, CTransforms) + doc = KernelDoc(self.config, fname, self.xforms) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -154,7 +154,7 @@ class KernelFiles(): self.error(f"Cannot find file {fname}") - def __init__(self, verbose=False, out_style=None, + def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, logger=None): @@ -193,6 +193,11 @@ class KernelFiles(): self.config.wshort_desc = wshort_desc self.config.wcontents_before_sections = wcontents_before_sections + if xforms: + self.xforms = xforms + else: + self.xforms = CTransforms() + if not logger: self.config.log = logging.getLogger("kernel-doc") else: -- 2.52.0 Having a "\digit" inside a docstring with normal strings causes PDF output to break, as it will add a weird character inside the string. It should be using a raw string instead. Yet, having r"\0" won't solve, as this would be converted in Sphinx as "0". So, this has to be inside a pre formatted text. That's said, the comment itself is probably not the best one. Rewrite the entire comment to properly document each parameter and add a "delim" parameter that will be passed to the ancillary function. Reported-by: Akira Yokosawa Closes: https://lore.kernel.org/linux-doc/63e99049-cc72-4156-83af-414fdde34312@gmail.com/ Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_re.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 886e33ffd2b9..f67ebe86c458 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -323,19 +323,28 @@ class NestedMatch: return args - def sub(self, sub, line, count=0): - """ - This is similar to re.sub: + def sub(self, sub, line, delim=",", count=0): + r""" + Perform a regex‑based replacement on ``line`` for all matches with + the ``self.regex`` pattern. It uses the following parameters: - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. + ``sub`` + Replacement string that may contain placeholders in the form + ``\{digit}``, where ``digit`` is an integer referring to the regex + capture group number. - if r'\0' is used, it works on a similar way of using re.group(0): - it places the entire args of the matched paired data, with the - delimiter stripped. + ``\{0}`` is a special case that expands to the entire matched text. - If count is different than zero, it will replace at most count - items. + ``line`` + The string to operate on. + + ``delim`` + The delimiter used by identify the placeholder groups + (defaults to ","). + + ``count`` + Maximum number of replacements per match. If 0 or omitted, + all matches are replaced. """ out = "" @@ -355,7 +364,7 @@ class NestedMatch: # replace arguments new_sub = sub if "\\" in sub: - args = self._split_args(value) + args = self._split_args(value, delim=delim) new_sub = re.sub(r'\\(\d+)', lambda m: args[int(m.group(1))], new_sub) -- 2.52.0 Documentation builds were using "-q" for a long time, but sometimes it is nice to see the Sphinx progress, without increasing build verbosity - which would also turn on kernel-doc verbosity. Instead of doing that, let's parse the sphinx-build already-existing -v: each time it is used, it increases the verbosity level. With that, if the default is to use -q, a single -v will disable quiet mode. Passing more -v will keep increasing its verbosity. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/sphinx-build-wrapper | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 78ff7ac202ef..8080ace60680 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -168,6 +168,7 @@ class SphinxBuilder: parser = argparse.ArgumentParser() parser.add_argument('-j', '--jobs', type=int) parser.add_argument('-q', '--quiet', action='store_true') + parser.add_argument('-v', '--verbose', default=0, action='count') # # Other sphinx-build arguments go as-is, so place them @@ -179,10 +180,14 @@ class SphinxBuilder: # Build a list of sphinx args, honoring verbosity here if specified # - verbose = self.verbose sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts) + + verbose = sphinx_args.verbose + if self.verbose: + verbose += 1 + if sphinx_args.quiet is True: - verbose = False + verbose = 0 # # If the user explicitly sets "-j" at command line, use it. @@ -195,8 +200,11 @@ class SphinxBuilder: else: self.n_jobs = None - if not verbose: + if verbose < 1: self.sphinxopts += ["-q"] + else: + for i in range(1, sphinx_args.verbose): + self.sphinxopts += ["-v"] def __init__(self, builddir, venv=None, verbose=False, n_jobs=None, interactive=None): -- 2.52.0 Besides the parameters that are passed via command line arguments, the wrapper's behavior is affected by several environment variables. Document that. While here, use __doc__ for its description. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/sphinx-build-wrapper | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 8080ace60680..b7c149dff06b 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -814,20 +814,42 @@ def jobs_type(value): except ValueError: raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}") # pylint: disable=W0707 +EPILOG=""" +Besides the command line arguments, several environment variables affect its +default behavior, meant to be used when called via Kernel Makefile: + +- KERNELVERSION: Kernel major version +- KERNELRELEASE: Kernel release +- KBUILD_VERBOSE: Contains the value of "make V=[0|1] variable. + When V=0 (KBUILD_VERBOSE=0), sets verbose level to "-q". +- SPHINXBUILD: Documentation build tool (default: "sphinx-build"). +- SPHINXOPTS: Extra options pased to SPHINXBUILD + (default: "-j auto" and "-q" if KBUILD_VERBOSE=0). + The "-v" flag can be used to increase verbosity. + If V=0, the first "-v" will drop "-q". +- PYTHON3: Python command to run SPHINXBUILD +- PDFLATEX: LaTeX PDF engine. (default: "xelatex") +- LATEXOPTS: Optional set of command line arguments to the LaTeX engine +- srctree: Location of the Kernel root directory (default: "."). + +""" + def main(): """ Main function. The only mandatory argument is the target. If not specified, the other arguments will use default values if not specified at os.environ. """ - parser = argparse.ArgumentParser(description="Kernel documentation builder") + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, + description=__doc__, + epilog=EPILOG) parser.add_argument("target", choices=list(TARGETS.keys()), help="Documentation target to build") parser.add_argument("--sphinxdirs", nargs="+", help="Specific directories to build") parser.add_argument("--builddir", default="output", - help="Sphinx configuration file") + help="Sphinx configuration file (default: %(default)s)") parser.add_argument("--theme", help="Sphinx theme to use") @@ -843,7 +865,7 @@ def main(): help="place build in verbose mode") parser.add_argument('-j', '--jobs', type=jobs_type, - help="Sets number of jobs to use with sphinx-build") + help="Sets number of jobs to use with sphinx-build(default: auto)") parser.add_argument('-i', '--interactive', action='store_true', help="Change latex default to run in interactive mode") -- 2.52.0 The KernelFiles is the main entry point to run kernel-doc, being used by both tools/docs/kernel-doc and Documentation/sphinx/kerneldoc.py. It is also used on QEMU, which also uses the kernel-doc libraries from tools/lib/python/kdoc. Properly describe its ABI contract. Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_files.py | 44 ++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index c35e033cf123..8c2059623949 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -91,7 +91,49 @@ class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. - There are two type of parsers defined here: + This is the main entry point to run kernel-doc. This class is initialized + using a series of optional arguments: + + ``verbose`` + If True, enables kernel-doc verbosity. Default: False. + + ``out_style`` + Class to be used to format output. If None (default), + only report errors. + + ``xforms`` + Transforms to be applied to C prototypes and data structs. + If not specified, defaults to xforms = CFunction() + + ``werror`` + If True, treat warnings as errors, retuning an error code on warnings. + + Default: False. + + ``wreturn`` + If True, warns about the lack of a return markup on functions. + + Default: False. + ``wshort_desc`` + If True, warns if initial short description is missing. + + Default: False. + + ``wcontents_before_sections`` + If True, warn if there are contents before sections (deprecated). + This option is kept just for backward-compatibility, but it does + nothing, neither here nor at the original Perl script. + + Default: False. + + ``logger`` + Optional logger class instance. + + If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + + Note: + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and ``EXPORT_SYMBOL*`` macros; - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros. -- 2.52.0 The current logic hardcodes several values that are placed inside troff's title header (.TH). Place them as parameters to make the class more flexible. While here, remove the extra unused "LINUX" parameter at the end of the .TH header. Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_output.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 4210b91dde5f..fe3fc0dfd02b 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -607,7 +607,14 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def __init__(self, modulename): + def emit_th(self, name): + """Emit a title header line.""" + name = name.strip() + + self.data += f'.TH "{self.modulename}" {self.section} "{name}" ' + self.data += f' "{self.date}" "{self.manual}"\n' + + def __init__(self, modulename, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -616,7 +623,11 @@ class ManFormat(OutputFormat): """ super().__init__() + self.modulename = modulename + self.section = section + self.manual = manual + self.symbols = [] dt = None @@ -632,7 +643,7 @@ class ManFormat(OutputFormat): if not dt: dt = datetime.now() - self.man_date = dt.strftime("%B %Y") + self.date = dt.strftime("%B %Y") def arg_name(self, args, name): """ @@ -724,7 +735,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -734,7 +745,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -780,7 +791,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -813,7 +824,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -834,7 +845,7 @@ class ManFormat(OutputFormat): purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -849,7 +860,7 @@ class ManFormat(OutputFormat): definition = args.get('definition') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- 2.52.0 Using a regular expression to match .TH is problematic, as it doesn't handle well quotation marks. Use shlex instead. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/sphinx-build-wrapper | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index b7c149dff06b..e6418e22e2ff 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -576,7 +576,6 @@ class SphinxBuilder: """ re_kernel_doc = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") - re_man = re.compile(r'^\.TH "[^"]*" (\d+) "([^"]*)"') if docs_dir == src_dir: # @@ -616,8 +615,7 @@ class SphinxBuilder: fp = None try: for line in result.stdout.split("\n"): - match = re_man.match(line) - if not match: + if not line.startswith(".TH"): if fp: fp.write(line + '\n') continue @@ -625,7 +623,9 @@ class SphinxBuilder: if fp: fp.close() - fname = f"{output_dir}/{match.group(2)}.{match.group(1)}" + # Use shlex here, as it handles well parameters with commas + args = shlex.split(line) + fname = f"{output_dir}/{args[3]}.{args[2]}" if self.verbose: print(f"Creating {fname}") -- 2.52.0 The generated man pages are not following the current standards for Linux documentation. Reorder .TH fields for them to look like other Linux man pages. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/sphinx-build-wrapper | 2 +- tools/lib/python/kdoc/kdoc_output.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index e6418e22e2ff..ac6852e3dd8c 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -625,7 +625,7 @@ class SphinxBuilder: # Use shlex here, as it handles well parameters with commas args = shlex.split(line) - fname = f"{output_dir}/{args[3]}.{args[2]}" + fname = f"{output_dir}/{args[1]}.{args[2]}" if self.verbose: print(f"Creating {fname}") diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index fe3fc0dfd02b..fb44cc8e0770 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -611,8 +611,8 @@ class ManFormat(OutputFormat): """Emit a title header line.""" name = name.strip() - self.data += f'.TH "{self.modulename}" {self.section} "{name}" ' - self.data += f' "{self.date}" "{self.manual}"\n' + self.data += f'.TH "{name}" {self.section} "{self.date}" ' + self.data += f' "{self.modulename}" "{self.manual}"\n' def __init__(self, modulename, section="9", manual="Kernel API Manual"): """ -- 2.52.0 As this class is part of the ABI used by both Sphinx kerneldoc extension and docs/tools/kernel-doc, better describe what parmeters are used to initialize ManOutput class. Signed-off-by: Mauro Carvalho Chehab --- tools/lib/python/kdoc/kdoc_output.py | 29 +++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index fb44cc8e0770..1e3dc47bc696 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -580,7 +580,34 @@ class RestFormat(OutputFormat): class ManFormat(OutputFormat): - """Consts and functions used by man pages output.""" + """ + Consts and functions used by man pages output. + + This class has one mandatory parameter and some optional ones, which + are needed to define the title header contents: + + ``modulename`` + Defines the module name to be used at the troff ``.TH`` output. + + This argument is mandatory. + + ``section`` + Usually a numeric value from 0 to 9, but man pages also accept + some strings like "p". + + Defauls to ``9`` + + ``manual`` + Defaults to ``Kernel API Manual``. + + The above controls the output of teh corresponding fields on troff + title headers, which will be filled like this:: + + .TH "{name}" {section} "{date}" "{modulename}" "{manual}" + + where ``name``` will match the API symbol name, and ``date`` will be + either the date where the Kernel was compiled or the current date + """ highlights = ( (type_constant, r"\1"), -- 2.52.0 Instead of placing the same data for modulename for all generated man pages, use the directory from the filename used to produce kernel docs as basis. Signed-off-by: Mauro Carvalho Chehab --- tools/docs/kernel-doc | 1 - tools/lib/python/kdoc/kdoc_output.py | 41 +++++++++++++++++----------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/tools/docs/kernel-doc b/tools/docs/kernel-doc index aed09f9a54dd..3a932f95bdf5 100755 --- a/tools/docs/kernel-doc +++ b/tools/docs/kernel-doc @@ -210,7 +210,6 @@ def main(): help="Enable debug messages") parser.add_argument("-M", "-modulename", "--modulename", - default="Kernel API", help="Allow setting a module name at the output.") parser.add_argument("-l", "-enable-lineno", "--enable_lineno", diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 1e3dc47bc696..44e40a6e8ca6 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -589,7 +589,8 @@ class ManFormat(OutputFormat): ``modulename`` Defines the module name to be used at the troff ``.TH`` output. - This argument is mandatory. + This argument is optional. If not specified, it will be filled + with the directory which contains the documented file. ``section`` Usually a numeric value from 0 to 9, but man pages also accept @@ -634,14 +635,21 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def emit_th(self, name): + def emit_th(self, name, args): """Emit a title header line.""" - name = name.strip() + title = name.strip() + module = self.modulename(args) - self.data += f'.TH "{name}" {self.section} "{self.date}" ' - self.data += f' "{self.modulename}" "{self.manual}"\n' + self.data += f'.TH "{title}" {self.section} "{self.date}" ' + self.data += f' "{module}" "{self.manual}"\n' - def __init__(self, modulename, section="9", manual="Kernel API Manual"): + def modulename(self, args): + if self._modulename: + return self._modulename + + return os.path.dirname(args.fname) + + def __init__(self, modulename=None, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -651,7 +659,7 @@ class ManFormat(OutputFormat): super().__init__() - self.modulename = modulename + self._modulename = modulename self.section = section self.manual = manual @@ -685,7 +693,8 @@ class ManFormat(OutputFormat): dtype = args.type if dtype == "doc": - return self.modulename + return name +# return os.path.basename(self.modulename(args)) if dtype in ["function", "typedef"]: return name @@ -762,7 +771,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -772,7 +781,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -818,7 +827,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -851,7 +860,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -868,11 +877,11 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_typedef(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -882,12 +891,12 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_struct(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') definition = args.get('definition') out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- 2.52.0 While python internal libraries have support for unit tests, its output is not nice. Add a helper module to improve its output. I wrote this module last year while testing some scripts I used internally. The initial skeleton was generated with the help of LLM tools, but it was higly modified to ensure that it will work as I would expect. Signed-off-by: Mauro Carvalho Chehab --- Documentation/tools/python.rst | 2 + Documentation/tools/unittest.rst | 24 ++ tools/lib/python/unittest_helper.py | 348 ++++++++++++++++++++++++++++ 3 files changed, 374 insertions(+) create mode 100644 Documentation/tools/unittest.rst create mode 100755 tools/lib/python/unittest_helper.py diff --git a/Documentation/tools/python.rst b/Documentation/tools/python.rst index 1444c1816735..3b7299161f20 100644 --- a/Documentation/tools/python.rst +++ b/Documentation/tools/python.rst @@ -11,3 +11,5 @@ Python libraries feat kdoc kabi + + unittest diff --git a/Documentation/tools/unittest.rst b/Documentation/tools/unittest.rst new file mode 100644 index 000000000000..14a2b2a65236 --- /dev/null +++ b/Documentation/tools/unittest.rst @@ -0,0 +1,24 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Python unittest +=============== + +Checking consistency of python modules can be complex. Sometimes, it is +useful to define a set of unit tests to help checking them. + +While the actual test implementation is usecase dependent, Python already +provides a standard way to add unit tests by using ``import unittest``. + +Using such class, requires setting up a test suite. Also, the default format +is a little bit ackward. To improve it and provide a more uniform way to +report errors, some unittest classes and functions are defined. + + +Unittest helper module +====================== + +.. automodule:: lib.python.unittest_helper + :members: + :show-inheritance: + :undoc-members: diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py new file mode 100755 index 000000000000..d2efb78d8561 --- /dev/null +++ b/tools/lib/python/unittest_helper.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025-2026: Mauro Carvalho Chehab . +# +# pylint: disable=C0103,R0912,R0914,E1101 + +""" +Provides helper functions and classes execute python unit tests. + +Those help functions provide a nice colored output summary of each +executed test and, when a test fails, it shows the different in diff +format when running in verbose mode, like:: + + $ tools/unittests/nested_match.py -v + ... + Traceback (most recent call last): + File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit + self.assertEqual(replaced, "bar(a); bar(b); foo(c)") + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)' + - bar(a) foo(b); foo(c) + ? ^^^^ + + bar(a); bar(b); foo(c) + ? ^^^^^ + ... + +It also allows filtering what tests will be executed via ``-k`` parameter. + +Typical usage is to do:: + + from unittest_helper import run_unittest + ... + + if __name__ == "__main__": + run_unittest(__file__) + +If passing arguments is needed, on a more complex scenario, it can be +used like on this example:: + + from unittest_helper import TestUnits, run_unittest + ... + env = {'sudo': ""} + ... + if __name__ == "__main__": + runner = TestUnits() + base_parser = runner.parse_args() + base_parser.add_argument('--sudo', action='store_true', + help='Enable tests requiring sudo privileges') + + args = base_parser.parse_args() + + # Update module-level flag + if args.sudo: + env['sudo'] = "1" + + # Run tests with customized arguments + runner.run(__file__, parser=base_parser, args=args, env=env) +""" + +import argparse +import atexit +import os +import re +import unittest +import sys + +from unittest.mock import patch + + +class Summary(unittest.TestResult): + """ + Overrides ``unittest.TestResult`` class to provide a nice colored + summary. When in verbose mode, displays actual/expected difference in + unified diff format. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + #: Dictionary to store organized test results. + self.test_results = {} + + #: max length of the test names. + self.max_name_length = 0 + + def startTest(self, test): + super().startTest(test) + test_id = test.id() + parts = test_id.split(".") + + # Extract module, class, and method names + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + + method_name = parts[-1] + + # Build the hierarchical structure + if module_name not in self.test_results: + self.test_results[module_name] = {} + + if class_name not in self.test_results[module_name]: + self.test_results[module_name][class_name] = [] + + # Track maximum test name length for alignment + display_name = f"{method_name}:" + + self.max_name_length = max(len(display_name), self.max_name_length) + + def _record_test(self, test, status): + test_id = test.id() + parts = test_id.split(".") + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + method_name = parts[-1] + self.test_results[module_name][class_name].append((method_name, status)) + + def addSuccess(self, test): + super().addSuccess(test) + self._record_test(test, "OK") + + def addFailure(self, test, err): + super().addFailure(test, err) + self._record_test(test, "FAIL") + + def addError(self, test, err): + super().addError(test, err) + self._record_test(test, "ERROR") + + def addSkip(self, test, reason): + super().addSkip(test, reason) + self._record_test(test, f"SKIP ({reason})") + + def printResults(self): + """ + Print results using colors if tty. + """ + # Check for ANSI color support + use_color = sys.stdout.isatty() + COLORS = { + "OK": "\033[32m", # Green + "FAIL": "\033[31m", # Red + "SKIP": "\033[1;33m", # Yellow + "PARTIAL": "\033[33m", # Orange + "EXPECTED_FAIL": "\033[36m", # Cyan + "reset": "\033[0m", # Reset to default terminal color + } + if not use_color: + for c in COLORS: + COLORS[c] = "" + + # Calculate maximum test name length + if not self.test_results: + return + try: + lengths = [] + for module in self.test_results.values(): + for tests in module.values(): + for test_name, _ in tests: + lengths.append(len(test_name) + 1) # +1 for colon + max_length = max(lengths) + 2 # Additional padding + except ValueError: + sys.exit("Test list is empty") + + # Print results + for module_name, classes in self.test_results.items(): + print(f"{module_name}:") + for class_name, tests in classes.items(): + print(f" {class_name}:") + for test_name, status in tests: + # Get base status without reason for SKIP + if status.startswith("SKIP"): + status_code = status.split()[0] + else: + status_code = status + color = COLORS.get(status_code, "") + print( + f" {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}" + ) + print() + + # Print summary + print(f"\nRan {self.testsRun} tests", end="") + if hasattr(self, "timeTaken"): + print(f" in {self.timeTaken:.3f}s", end="") + print() + + if not self.wasSuccessful(): + print(f"\n{COLORS['FAIL']}FAILED (", end="") + failures = getattr(self, "failures", []) + errors = getattr(self, "errors", []) + if failures: + print(f"failures={len(failures)}", end="") + if errors: + if failures: + print(", ", end="") + print(f"errors={len(errors)}", end="") + print(f"){COLORS['reset']}") + + +def flatten_suite(suite): + """Flatten test suite hierarchy.""" + tests = [] + for item in suite: + if isinstance(item, unittest.TestSuite): + tests.extend(flatten_suite(item)) + else: + tests.append(item) + return tests + + +class TestUnits: + """ + Helper class to set verbosity level. + + This class discover test files, import its unittest classes and + executes the test on it. + """ + def parse_args(self): + """Returns a parser for command line arguments.""" + parser = argparse.ArgumentParser(description="Test runner with regex filtering") + parser.add_argument("-v", "--verbose", action="count", default=1) + parser.add_argument("-f", "--failfast", action="store_true") + parser.add_argument("-k", "--keyword", + help="Regex pattern to filter test methods") + return parser + + def run(self, caller_file=None, pattern=None, + suite=None, parser=None, args=None, env=None): + """ + Execute all tests from the unity test file. + + It contains several optional parameters: + + ``caller_file``: + - name of the file that contains test. + + typical usage is to place __file__ at the caller test, e.g.:: + + if __name__ == "__main__": + TestUnits().run(__file__) + + ``pattern``: + - optional pattern to match multiple file names. Defaults + to basename of ``caller_file``. + + ``suite``: + - an unittest suite initialized by the caller using + ``unittest.TestLoader().discover()``. + + ``parser``: + - an argparse parser. If not defined, this helper will create + one. + + ``args``: + - an ``argparse.Namespace`` data filled by the caller. + + ``env``: + - environment variables that will be passed to the test suite + + At least ``caller_file`` or ``suite`` must be used, otherwise a + ``TypeError`` will be raised. + """ + if not args: + if not parser: + parser = self.parse_args() + args = parser.parse_args() + + if not caller_file and not suite: + raise TypeError("Either caller_file or suite is needed at TestUnits") + + if env: + patcher = patch.dict(os.environ, env) + patcher.start() + # ensure it gets stopped after + atexit.register(patcher.stop) + + verbose = args.verbose + + if verbose >= 2: + unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite + + # Load ONLY tests from the calling file + if not suite: + if not pattern: + pattern = caller_file + + loader = unittest.TestLoader() + suite = loader.discover(start_dir=os.path.dirname(caller_file), + pattern=os.path.basename(caller_file)) + + # Flatten the suite for environment injection + tests_to_inject = flatten_suite(suite) + + # Filter tests by method name if -k specified + if args.keyword: + try: + pattern = re.compile(args.keyword) + filtered_suite = unittest.TestSuite() + for test in tests_to_inject: # Use the pre-flattened list + method_name = test.id().split(".")[-1] + if pattern.search(method_name): + filtered_suite.addTest(test) + suite = filtered_suite + except re.error as e: + sys.stderr.write(f"Invalid regex pattern: {e}\n") + sys.exit(1) + else: + # Maintain original suite structure if no keyword filtering + suite = unittest.TestSuite(tests_to_inject) + + if verbose >= 2: + resultclass = None + else: + resultclass = Summary + + runner = unittest.TextTestRunner(verbosity=args.verbose, + resultclass=resultclass, + failfast=args.failfast) + result = runner.run(suite) + if resultclass: + result.printResults() + + sys.exit(not result.wasSuccessful()) + + +def run_unittest(fname): + """ + Basic usage of TestUnits class. + + Use it when there's no need to pass any extra argument to the tests + with. The recommended way is to place this at the end of each + unittest module:: + + if __name__ == "__main__": + run_unittest(__file__) + """ + TestUnits().run(fname) -- 2.52.0 The NestedMatch logic is complex enough to justify tests to ensure that it is doing its job. Add unittests to check the functionality provided by NestedMatch by replicating expected patterns. The NestedMatch class handles with complex macros. Add an unittest to check if its doing the right thing and detect eventual regressions as we improve its code. The initial version was generated using gpt-oss:latest LLM on my local GPU, as LLMs aren't bad transforming patterns into unittests. Yet, the curent version contains only the skeleton of what LLM produced, as I ended higly changing its content to be more representative and to have real case scenarios. The kdoc_xforms test suite contains 3 test groups. Two of them tests the basic functionality of NestedMatch to replace patterns. The last one (TestRealUsecases) contains real code snippets from the Kernel with some cleanups to better fit in 80 columns and uses the same transforms as kernel-doc, thus allowing to test the logic used inside kdoc_parser to transform functions, structs and variable patterns. Its output is like this: $ tools/unittests/kdoc_xforms.py Ran 25 tests in 0.003s OK kdoc_xforms: TestDifferentReplacements: test_strip_multiple_acquires: OK test_sub_count_parameter: OK test_sub_mixed_placeholders: OK test_sub_multiple_placeholders: OK test_sub_no_placeholder: OK test_sub_single_placeholder: OK test_sub_with_capture: OK test_sub_zero_placeholder: OK TestMultipleMacros: test_acquires_multiple: OK test_acquires_nested_paren: OK test_acquires_simple: OK test_mixed_macros: OK test_must_hold: OK test_must_hold_shared: OK test_no_false_positive: OK test_no_function: OK test_no_macro_remains: OK TestRealUsecases: test_functions_with_acquires_and_releases: OK test_raw_struct_group: OK test_raw_struct_group_tagged: OK test_struct_group: OK test_struct_group_attr: OK test_struct_group_tagged_with_private: OK test_struct_kcov: OK test_vars_stackdepot: OK Ran 25 tests Signed-off-by: Mauro Carvalho Chehab --- tools/unittests/kdoc_xforms.py | 654 +++++++++++++++++++++++++++++++++ 1 file changed, 654 insertions(+) create mode 100755 tools/unittests/kdoc_xforms.py diff --git a/tools/unittests/kdoc_xforms.py b/tools/unittests/kdoc_xforms.py new file mode 100755 index 000000000000..71b42c8c75ea --- /dev/null +++ b/tools/unittests/kdoc_xforms.py @@ -0,0 +1,654 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . +# +# pylint: disable=C0413,R0904 + + +""" +Unit tests for kernel-doc NestedMatch. +""" + +import os +import re +import sys +import unittest + + +# Import Python modules + +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(SRC_DIR, "../lib/python")) + +from kdoc.kdoc_re import NestedMatch, CFunction +from kdoc.xforms_lists import CTransforms +from unittest_helper import run_unittest + +# +# Override unittest.TestCase to better compare diffs ignoring whitespaces +# +class TestCaseDiff(unittest.TestCase): + """ + Disable maximum limit on diffs and add a method to better + handle diffs with whitespace differences. + """ + + @classmethod + def setUpClass(cls): + """Ensure that there won't be limit for diffs""" + cls.maxDiff = None + + def assertLogicallyEqual(self, a, b): + """ + Compare two results ignoring multiple whitespace differences. + + This is useful to check more complex matches picked from examples. + On a plus side, we also don't need to use dedent. + Please notice that line breaks still need to match. We might + remove it at the regex, but this way, checking the diff is easier. + """ + a = re.sub(r"[\t ]+", " ", a.strip()) + b = re.sub(r"[\t ]+", " ", b.strip()) + + a = re.sub(" ;", ";", a) + b = re.sub(" ;", ";", b) + + self.assertEqual(a, b) + +# +# Tests doing with different macros +# + +class TestMultipleMacros(TestCaseDiff): + """ + Tests doing with different macros. + + Here, we won't use assertLogicallyEqual. Instead, we'll check if each + of the expected patterns are present at the answer. + """ + + def test_acquires_simple(self): + """Simple replacement test with __acquires""" + line = "__acquires(ctx) foo();" + result = NestedMatch(r"__acquires\s*\(").sub("REPLACED", line) + + self.assertNotIn("__acquires(", result) + self.assertIn("foo();", result) + + def test_acquires_multiple(self): + """Multiple __acquires""" + line = "__acquires(ctx) __acquires(other) bar();" + result = NestedMatch(r"__acquires\s*\(").sub("REPLACED", line) + + self.assertNotIn("__acquires(", result) + self.assertEqual(result.count("REPLACED"), 2) + + def test_acquires_nested_paren(self): + """__acquires with nested pattern""" + line = "__acquires((ctx1, ctx2)) baz();" + result = NestedMatch(r"__acquires\s*\(").sub("REPLACED", line) + + self.assertNotIn("__acquires(", result) + self.assertIn("baz();", result) + + def test_must_hold(self): + """__must_hold with a pointer""" + line = "__must_hold(&lock) do_something();" + result = NestedMatch(r"__must_hold\s*\(").sub("REPLACED", line) + + self.assertNotIn("__must_hold(", result) + self.assertIn("do_something();", result) + + def test_must_hold_shared(self): + """__must_hold with an upercase defined value""" + line = "__must_hold_shared(RCU) other();" + result = NestedMatch(r"__must_hold_shared\s*\(").sub("REPLACED", line) + + self.assertNotIn("__must_hold_shared(", result) + self.assertIn("other();", result) + + def test_no_false_positive(self): + """ + Ensure that unrelated text containing similar patterns is preserved + """ + line = "call__acquires(foo); // should stay intact" + result = NestedMatch(r"\b__acquires\s*\(").sub("REPLACED", line) + + self.assertEqual(result, line) + + def test_mixed_macros(self): + """Add a mix of macros""" + line = "__acquires(ctx) __releases(ctx) __must_hold(&lock) foo();" + + result = NestedMatch(r"__acquires\s*\(").sub("REPLACED", line) + result = NestedMatch(r"__releases\s*\(").sub("REPLACED", result) + result = NestedMatch(r"__must_hold\s*\(").sub("REPLACED", result) + + self.assertNotIn("__acquires(", result) + self.assertNotIn("__releases(", result) + self.assertNotIn("__must_hold(", result) + + self.assertIn("foo();", result) + + def test_no_macro_remains(self): + """Ensures that unmatched macros are untouched""" + line = "do_something_else();" + result = NestedMatch(r"__acquires\s*\(").sub("REPLACED", line) + + self.assertEqual(result, line) + + def test_no_function(self): + """Ensures that no functions will remain untouched""" + line = "something" + result = NestedMatch(line).sub("REPLACED", line) + + self.assertEqual(result, line) + +# +# Check if the diff is logically equivalent. To simplify, the tests here +# use a single macro name for all replacements. +# + +class TestDifferentReplacements(TestCaseDiff): + """ + Test argument replacements. + + Here, the function name can be anything. So, we picked __attribute__(), + to mimic a macro found at the Kernel, but none of the replacements her + has any relationship with the Kernel usage. + """ + + MACRO = "__attribute__" + + @classmethod + def setUpClass(cls): + """Define a NestedMatch to be used for all tests""" + cls.matcher = NestedMatch(re.compile(rf"{cls.MACRO}\s*\(")) + + def test_sub_with_capture(self): + """Test all arguments replacement with a single arg""" + line = f"{self.MACRO}(&ctx)\nfoo();" + + result = self.matcher.sub(r"ACQUIRED(\0)", line) + + self.assertLogicallyEqual("ACQUIRED(&ctx)\nfoo();", result) + + def test_sub_zero_placeholder(self): + """Test all arguments replacement with a multiple args""" + line = f"{self.MACRO}(arg1, arg2)\nbar();" + + result = self.matcher.sub(r"REPLACED(\0)", line) + + self.assertLogicallyEqual("REPLACED(arg1, arg2)\nbar();", result) + + def test_sub_single_placeholder(self): + """Single replacement rule for \1""" + line = f"{self.MACRO}(ctx, boo)\nfoo();" + result = self.matcher.sub(r"ACQUIRED(\1)", line) + + self.assertLogicallyEqual("ACQUIRED(ctx)\nfoo();", result) + + def test_sub_multiple_placeholders(self): + """Replacement rule for both \1 and \2""" + line = f"{self.MACRO}(arg1, arg2)\nbar();" + result = self.matcher.sub(r"REPLACE(\1, \2)", line) + + self.assertLogicallyEqual("REPLACE(arg1, arg2)\nbar();", result) + + def test_sub_mixed_placeholders(self): + """Replacement rule for \0, \1 and additional text""" + line = f"{self.MACRO}(foo, bar)\nbaz();" + result = self.matcher.sub(r"ALL(\0) FIRST(\1)", line) + + self.assertLogicallyEqual("ALL(foo, bar) FIRST(foo)\nbaz();", result) + + def test_sub_no_placeholder(self): + """Replacement without placeholders""" + line = f"{self.MACRO}(arg)\nfoo();" + result = self.matcher.sub(r"NO_BACKREFS()", line) + + self.assertLogicallyEqual("NO_BACKREFS()\nfoo();", result) + + def test_sub_count_parameter(self): + """Verify that the algorithm stops after the requested count""" + line = f"{self.MACRO}(a1) x();\n{self.MACRO}(a2) y();" + result = self.matcher.sub(r"ONLY_FIRST(\1) ", line, count=1) + + self.assertLogicallyEqual(f"ONLY_FIRST(a1) x();\n{self.MACRO}(a2) y();", + result) + + def test_strip_multiple_acquires(self): + """Check if spaces between removed delimiters will be dropped""" + line = f"int {self.MACRO}(1) {self.MACRO}(2 ) {self.MACRO}(3) foo;" + result = self.matcher.sub(r"", line) + + self.assertLogicallyEqual(result, "int foo;") + + +# +# Test struct_group replacements +# + + +class TestRealUsecases(TestCaseDiff): + """ + Test diferent usecase patterns found at the Kernel. + + Here, replacements using both NestedMatch and KernRe can be tested, + as it will import the actual replacement rules used by kernel-doc. + """ + + xforms = { + "func": CTransforms.function_xforms, + "struct": CTransforms.struct_xforms, + "var": CTransforms.var_xforms, + } + + @classmethod + def apply_transforms(cls, xform_type, text): + """ + Mimic the behavior of kdoc_parser.apply_transforms() method. + + For each element of STRUCT_XFORMS, apply apply_transforms. + + There are two parameters: + + - ``xform_type`` + Can be ``func``, ``struct`` or ``var``; + - ``text`` + The text where the sub patterns from CTransforms will be applied. + """ + for search, subst in cls.xforms.get(xform_type): + text = search.sub(subst, text) + + return text.strip() + + cls.matcher = NestedMatch(r"\bstruct_group[\w\_]*\(") + + def test_struct_group(self): + """ + Test struct_group using a pattern from + drivers/net/ethernet/asix/ax88796c_main.h. + """ + line = """ + struct tx_pkt_info { + struct_group(tx_overhead, + struct tx_sop_header sop; + struct tx_segment_header seg; + ); + struct tx_eop_header eop; + u16 pkt_len; + u16 seq_num; + }; + """ + expected = """ + struct tx_pkt_info { + struct tx_sop_header sop; + struct tx_segment_header seg;; + struct tx_eop_header eop; + u16 pkt_len; + u16 seq_num; + }; + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_struct_group_attr(self): + """ + Test two struct_group_attr using patterns from fs/smb/client/cifspdu.h. + """ + line = """ + typedef struct smb_com_open_rsp { + struct smb_hdr hdr; /* wct = 34 BB */ + __u8 AndXCommand; + __u8 AndXReserved; + __le16 AndXOffset; + __u8 OplockLevel; + __u16 Fid; + __le32 CreateAction; + struct_group_attr(common_attributes, __packed, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 FileAttributes; + ); + __le64 AllocationSize; + __le64 EndOfFile; + __le16 FileType; + __le16 DeviceState; + __u8 DirectoryFlag; + __u16 ByteCount; /* bct = 0 */ + } __packed OPEN_RSP; + + typedef struct { + struct_group_attr(common_attributes, __packed, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 Attributes; + ); + __u32 Pad1; + __le64 AllocationSize; + __le64 EndOfFile; + __le32 NumberOfLinks; + __u8 DeletePending; + __u8 Directory; + __u16 Pad2; + __le32 EASize; + __le32 FileNameLength; + union { + char __pad; + DECLARE_FLEX_ARRAY(char, FileName); + }; + } __packed FILE_ALL_INFO; /* level 0x107 QPathInfo */ + """ + expected = """ + typedef struct smb_com_open_rsp { + struct smb_hdr hdr; /* wct = 34 BB */ + __u8 AndXCommand; + __u8 AndXReserved; + __le16 AndXOffset; + __u8 OplockLevel; + __u16 Fid; + __le32 CreateAction; + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 FileAttributes;; + __le64 AllocationSize; + __le64 EndOfFile; + __le16 FileType; + __le16 DeviceState; + __u8 DirectoryFlag; + __u16 ByteCount; /* bct = 0 */ + } OPEN_RSP; + + typedef struct { + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 Attributes;; + __u32 Pad1; + __le64 AllocationSize; + __le64 EndOfFile; + __le32 NumberOfLinks; + __u8 DeletePending; + __u8 Directory; + __u16 Pad2; + __le32 EASize; + __le32 FileNameLength; + union { + char __pad; + char FileName[]; + }; + } FILE_ALL_INFO; /* level 0x107 QPathInfo */ + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_raw_struct_group(self): + """ + Test a __struct_group pattern from include/uapi/cxl/features.h. + """ + line = """ + struct cxl_mbox_get_sup_feats_out { + __struct_group(cxl_mbox_get_sup_feats_out_hdr, hdr, /* empty */, + __le16 num_entries; + __le16 supported_feats; + __u8 reserved[4]; + ); + struct cxl_feat_entry ents[] __counted_by_le(num_entries); + } __attribute__ ((__packed__)); + """ + expected = """ + struct cxl_mbox_get_sup_feats_out { + __le16 num_entries; + __le16 supported_feats; + __u8 reserved[4];; + struct cxl_feat_entry ents[]; + }; + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_raw_struct_group_tagged(self): + """ + Test some struct_group_tagged patterns from drivers/cxl/cxl.h. + """ + line = """ + struct cxl_regs { + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); + }; + """ + expected = """ + struct cxl_regs { + struct cxl_component_regs component; void __iomem *hdm_decoder; + void __iomem *ras;; + + struct cxl_device_regs device_regs; void __iomem *status; + + struct cxl_pmu_regs pmu_regs; void __iomem *pmu;; + + struct cxl_rch_regs rch_regs; void __iomem *dport_aer;; + + struct cxl_rcd_regs rcd_regs; void __iomem *rcd_pcie_cap;; + }; + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_struct_group_tagged_with_private(self): + """ + Replace struct_group_tagged with private, using the same regex + for the replacement as what happens in xforms_lists.py. + + As the private removal happens outside NestedGroup class, we manually + dropped the remaining part of the struct, to simulate what happens + at kdoc_parser. + + Taken from include/net/page_pool/types.h + """ + line = """ + struct page_pool_params { + struct_group_tagged(page_pool_params_fast, fast, + unsigned int order; + unsigned int pool_size; + int nid; + struct device *dev; + struct napi_struct *napi; + enum dma_data_direction dma_dir; + unsigned int max_len; + unsigned int offset; + ); + struct_group_tagged(page_pool_params_slow, slow, + struct net_device *netdev; + unsigned int queue_idx; + unsigned int flags; + /* private: used by test code only */ + """ + expected = """ + struct page_pool_params { + struct page_pool_params_fast fast; unsigned int order; + unsigned int pool_size; + int nid; + struct device *dev; + struct napi_struct *napi; + enum dma_data_direction dma_dir; + unsigned int max_len; + unsigned int offset;; + struct page_pool_params_slow slow; struct net_device *netdev; + unsigned int queue_idx; + unsigned int flags; + /* private: used by test code only */ + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_struct_kcov(self): + """ + """ + line = """ + struct kcov { + refcount_t refcount; + spinlock_t lock; + enum kcov_mode mode __guarded_by(&lock); + unsigned int size __guarded_by(&lock); + void *area __guarded_by(&lock); + struct task_struct *t __guarded_by(&lock); + bool remote; + unsigned int remote_size; + int sequence; + }; + """ + expected = """ + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + + def test_struct_kcov(self): + """ + Test a struct from kernel/kcov.c. + """ + line = """ + struct kcov { + refcount_t refcount; + spinlock_t lock; + enum kcov_mode mode __guarded_by(&lock); + unsigned int size __guarded_by(&lock); + void *area __guarded_by(&lock); + struct task_struct *t __guarded_by(&lock); + bool remote; + unsigned int remote_size; + int sequence; + }; + """ + expected = """ + struct kcov { + refcount_t refcount; + spinlock_t lock; + enum kcov_mode mode; + unsigned int size; + void *area; + struct task_struct *t; + bool remote; + unsigned int remote_size; + int sequence; + }; + """ + + result = self.apply_transforms("struct", line) + self.assertLogicallyEqual(result, expected) + + def test_vars_stackdepot(self): + """ + Test guarded_by on vars from lib/stackdepot.c. + """ + line = """ + size_t pool_offset __guarded_by(&pool_lock) = DEPOT_POOL_SIZE; + __guarded_by(&pool_lock) LIST_HEAD(free_stacks); + void **stack_pools __pt_guarded_by(&pool_lock); + """ + expected = """ + size_t pool_offset = DEPOT_POOL_SIZE; + struct list_head free_stacks; + void **stack_pools; + """ + + result = self.apply_transforms("var", line) + self.assertLogicallyEqual(result, expected) + + def test_functions_with_acquires_and_releases(self): + """ + Test guarded_by on vars from lib/stackdepot.c. + """ + line = """ + bool prepare_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) \ + __cond_acquires(true, &report_lock); + + int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) \ + __cond_acquires(0, RCU_BH); + + bool undo_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) \ + __cond_releases(true, &report_lock); + + void debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *c) \ + __acquires(cancellation); + + void debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *c) \ + __releases(cancellation); + + acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock lockp) \ + __acquires(lockp); + + void acpi_os_release_lock(acpi_spinlock lockp, + acpi_cpu_flags not_used) \ + __releases(lockp) + """ + expected = """ + bool prepare_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info); + + int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); + + bool undo_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info); + + void debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *c); + + void debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *c); + + acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock lockp); + + void acpi_os_release_lock(acpi_spinlock lockp, + acpi_cpu_flags not_used) + """ + + result = self.apply_transforms("func", line) + self.assertLogicallyEqual(result, expected) + + +# +# Run all tests +# +if __name__ == "__main__": + run_unittest(__file__) -- 2.52.0