[RFC] scripts: kernel-doc: reduce repeated regex expressions into variables

Lukas Bulwahn lukas.bulwahn at gmail.com
Thu Apr 22 19:33:13 UTC 2021


Aditya Srivastava <yashsri421 at gmail.com> schrieb am Do., 22. Apr. 2021,
21:18:

> There are some regex expressions in the kernel-doc script, which are used
> repeatedly in the script.
>
> Reduce such expressions into variables, which can be used everywhere.
>
> A quick manual check found that no errors and warnings were added/removed
> in this process.
>
> Suggested-by: Jonathan Corbet <corbet at lwn.net>
> Signed-off-by: Aditya Srivastava <yashsri421 at gmail.com>
> ---
>  scripts/kernel-doc | 89 ++++++++++++++++++++++++++--------------------
>  1 file changed, 50 insertions(+), 39 deletions(-)
>
> diff --git a/scripts/kernel-doc b/scripts/kernel-doc
> index 2a85d34fdcd0..579c9fdd275f 100755
> --- a/scripts/kernel-doc
> +++ b/scripts/kernel-doc
> @@ -406,6 +406,7 @@ my $doc_inline_sect =
> '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
>  my $doc_inline_end = '^\s*\*/\s*$';
>  my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
>  my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
>
>  my %parameterdescs;
>  my %parameterdesc_start_lines;
> @@ -694,7 +695,7 @@ sub output_function_man(%) {
>             $post = ");";
>         }
>         $type = $args{'parametertypes'}{$parameter};
> -       if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +       if ($type =~ m/$pointer_function/) {
>             # pointer-to-function
>             print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" .
> $post . "\"\n";
>         } else {
> @@ -974,7 +975,7 @@ sub output_function_rst(%) {
>         $count++;
>         $type = $args{'parametertypes'}{$parameter};
>
> -       if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +       if ($type =~ m/$pointer_function/) {
>             # pointer-to-function
>             print $1 . $parameter . ") (" . $2 . ")";
>         } else {
> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
>      my $decl_type;
>      my $members;
>      my $type = qr{struct|union};
> +    my $packed = qr{__packed};
> +    my $aligned = qr{__aligned};
> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
> +    my $cacheline_aligned = qr{____cacheline_aligned};
> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
>      # For capturing struct/union definition body, i.e.
> "{members*}qualifiers*"
> -    my $definition_body =
> qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
> +    my $definition_body =
> qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
> +    my $struct_members =
> qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
>
>      if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
>         $decl_type = $1;
> @@ -1235,27 +1242,27 @@ sub dump_struct($$) {
>         # strip comments:
>         $members =~ s/\/\*.*?\*\///gos;
>         # strip attributes
> -       $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
> -       $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
> -       $members =~ s/\s*__packed\s*/ /gos;
> +       $members =~ s/\s*$attribute/ /gi;
> +       $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
> +       $members =~ s/\s*$packed\s*/ /gos;
>         $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
> -       $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
> -       $members =~ s/\s*____cacheline_aligned/ /gos;
> +       $members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
> +       $members =~ s/\s*$cacheline_aligned/ /gos;
>
> +       my $args = qr{([^,)]+)};
>         # replace DECLARE_BITMAP
>         $members =~
> s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1,
> __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
> -       $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned
> long $1\[BITS_TO_LONGS($2)\]/gos;
> +       $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long
> $1\[BITS_TO_LONGS($2)\]/gos;
>         # replace DECLARE_HASHTABLE
> -       $members =~
> s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2)
> - 1)\]/gos;
> +       $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long
> $1\[1 << (($2) - 1)\]/gos;
>         # replace DECLARE_KFIFO
> -       $members =~
> s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
> +       $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2
> \*$1/gos;
>         # replace DECLARE_KFIFO_PTR
> -       $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2
> \*$1/gos;
> -
> +       $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
>         my $declaration = $members;
>
>         # Split nested struct/union elements as newer ones
> -       while ($members =~
> m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
> +       while ($members =~ m/$struct_members/) {
>                 my $newmember;
>                 my $maintype = $1;
>                 my $ids = $4;
> @@ -1315,7 +1322,7 @@ sub dump_struct($$) {
>                                 }
>                         }
>                 }
> -               $members =~
> s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
> +               $members =~ s/$struct_members/$newmember/;
>         }
>
>         # Ignore other nested elements, like enums
> @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) {
>      my $param;
>
>      # temporarily replace commas inside function pointer definition
> -    while ($args =~ /(\([^\),]+),/) {
> -       $args =~ s/(\([^\),]+),/$1#/g;
> +    my $arg_expr = qr{\([^\),]+};
> +    while ($args =~ /$arg_expr,/) {
> +       $args =~ s/($arg_expr),/$1#/g;
>      }
>
>      foreach my $arg (split($splitter, $args)) {
> @@ -1808,8 +1816,11 @@ sub dump_function($$) {
>      # - parport_register_device (function pointer parameters)
>      # - atomic_set (macro)
>      # - pci_match_device, __copy_to_user (long return type)
> +    my $name = qr{[a-zA-Z0-9_~:]+};
> +    my $prototype_end1 = qr{\(([^\(]*)\)};
> +    my $prototype_end2 = qr{\(([^\{]*)\)};
>

Why do you need end1 and end2 here?

-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
> +    if ($define && $prototype =~ m/^()($name)\s+/) {
>          # This is an object-like macro, it has no return type and no
> parameter
>          # list.
>          # Function-like macros are not allowed to have spaces between
> @@ -1817,23 +1828,23 @@ sub dump_function($$) {
>          $return_type = $1;
>          $declaration_name = $2;
>          $noret = 1;
> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -       $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -       $prototype =~
> m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
> +       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +       $prototype =~ m/^()($name)\s*$prototype_end2/ ||
> +       $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ ||
> +       $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +       $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +       $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +       $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +       $prototype =~
> m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +       $prototype =~
> m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/)  {
>         $return_type = $1;
>         $declaration_name = $2;
>         my $args = $3;
> @@ -2110,12 +2121,12 @@ sub process_name($$) {
>      } elsif (/$doc_decl/o) {
>         $identifier = $1;
>         my $is_kernel_comment = 0;
> -       my $decl_start = qr{\s*\*};
> +       my $decl_start = qr{$doc_com};
>         # test for pointer declaration type, foo * bar() - desc
>         my $fn_type = qr{\w+\s*\*\s*};
>         my $parenthesis = qr{\(\w*\)};
>         my $decl_end = qr{[-:].*};
> -       if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
> +       if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
>             $identifier = $1;
>         }
>         if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
> @@ -2125,8 +2136,8 @@ sub process_name($$) {
>         }
>         # Look for foo() or static void foo() - description; or misspelt
>         # identifier
> -       elsif
> (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
> -           /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
> +       elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/
> ||
> +           /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
>             $identifier = $1;
>             $decl_type = 'function';
>             $identifier =~ s/^define\s+//;
> --
> 2.17.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linuxfoundation.org/pipermail/linux-kernel-mentees/attachments/20210422/e3678812/attachment-0001.html>


More information about the Linux-kernel-mentees mailing list