[Linux-kernel-mentees] [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature

Joe Perches joe at perches.com
Sat Nov 28 15:40:33 UTC 2020


On Sat, 2020-11-28 at 18:35 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.
> 
> A large number of these warnings occur because of typo mistakes in
> signature tags. An evaluation over v4.13..v5.8 showed that out of 539
> warnings due to non-standard signatures, 87 are due to typo mistakes.
> 
> Following are the standard signature tags which are often incorrectly
> used, along with their individual counts of incorrect use (over
> v4.13..v5.8):
> 
>  Reviewed-by: 42
>  Signed-off-by: 25
>  Reported-by: 6
>  Acked-by: 4
>  Tested-by: 4
>  Suggested-by: 4
> 
> Provide a fix by calculating levenshtein distance for the signature tag
> with all the standard signatures and suggest a fix with a signature, whose
> edit distance is less than or equal to 2 with the misspelled signature.
> 
> Out of the 86 mispelled signatures fixed with this approach, 85 were
> found to be good corrections and 1 was bad correction.
> 
> Following was found to be a bad correction:
>  Tweeted-by (count: 1) => Tested-by
> 
> Signed-off-by: Aditya Srivastava <yashsri421 at gmail.com>
> ---
> changes in v2: modify commit message: replace specific example with overall evaluation, minor changes
> 
> changes in v3: summarize commit message
> 
> changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

Seems OKish but this needs style modifications as there are
several whitespace uses that don't match the typical forms
and perhaps some new function naming could be improved.

> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
[]
> @@ -506,6 +506,77 @@ our $signature_tags = qr{(?xi:
>  	Cc:
>  )};
>  
> 
> +sub get_min {

probably a poor name choice.  Maybe edit_distance_min

> +	my (@arr) = @_;
> +	my $len = scalar @arr;
> +	if((scalar @arr) < 1) {

space after if

> +		# if underflow, return
> +		return;
> +	}
> +	my $min = $arr[0];
> +	for my $i (0 .. ($len-1)) {
> +		if ($arr[$i] < $min) {
> +			$min = $arr[$i];
> +		}
> +	}
> +	return $min;
> +}
> +
> +sub get_edit_distance {
> +	my ($str1, $str2) = @_;

maybe lc($str) =~ s/-//g; here instead of the code in the caller

> +	my $len1 = length($str1);
> +	my $len2 = length($str2);
> +	# two dimensional array storing minimum edit distance
> +	my @distance;
> +	for my $i (0 .. $len1) {
> +		for my $j (0 .. $len2) {
> +			if ($i == 0) {
> +				$distance[$i][$j] = $j;
> +			}
> +			elsif ($j == 0) {

} elsif {

> +				$distance[$i][$j] = $i;
> +			}
> +			elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
> +				$distance[$i][$j] = $distance[$i - 1][$j - 1];
> +			}
> +			else {

} else {

> +				my $dist1 = $distance[$i][$j - 1]; #insert distance
> +				my $dist2 = $distance[$i - 1][$j]; # remove
> +				my $dist3 = $distance[$i - 1][$j - 1]; #replace
> +				$distance[$i][$j] = 1 + get_min($dist1, $dist2, $dist3);
> +			}
> +		}
> +	}
> +	return $distance[$len1][$len2];
> +}
> +
> +sub get_standard_signature {

find_standard_signature ?

> +	my ($sign_off) = @_;
> +	$sign_off = lc($sign_off);
> +	$sign_off =~ s/\-//g; # to match with formed hash

why not strip the dashes in get_edit_distance instead
of using this weird dance with dashes here?

> +	my @standard_signature_tags = (
> +		'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
> +		'reviewed-by:', 'reported-by:', 'suggested-by:'
> +	);
> +	# setting default values
> +	my $standard_signature = 'signed-off-by';

why is does this need to be given a value?

> +	my $min_edit_distance = 20;
> +	my $edit_distance;
> +	foreach (@standard_signature_tags) {
> +		my $signature = $_;
> +		$_ =~ s/\-//g;

and this dancing here

> +		$edit_distance = get_edit_distance($sign_off, $_);
> +		if ($edit_distance < $min_edit_distance) {
> +			$min_edit_distance = $edit_distance;
> +			$standard_signature = $signature;
> +		}
> +	}
> +        if($min_edit_distance<=2) {

bad indentation, if (, spaces around test <=

> +		return ucfirst($standard_signature);
> +        }

bad indentation

> +	return "";
> +}
> +
>  our @typeListMisordered = (
>  	qr{char\s+(?:un)?signed},
>  	qr{int\s+(?:(?:un)?signed\s+)?short\s},
> @@ -2773,8 +2844,18 @@ sub process {
>  			my $ucfirst_sign_off = ucfirst(lc($sign_off));
>  
> 
>  			if ($sign_off !~ /$signature_tags/) {
> -				WARN("BAD_SIGN_OFF",
> -				     "Non-standard signature: $sign_off\n" . $herecurr);
> +				my $suggested_signature = get_standard_signature($sign_off);
> +				if ($suggested_signature eq "") {
> +					WARN("BAD_SIGN_OFF",
> +					"Non-standard signature: $sign_off\n" . $herecurr);

bad alignment

> +				}
> +				else {

} else {

> +					if (WARN("BAD_SIGN_OFF",
> +						 "Non-standard signature: $sign_off. Please use '$suggested_signature' instead\n" . $herecurr) &&

"perhaps" rather than "please use" or "likely typo of"

> +					    $fix) {
> +						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> +					}
> +				}
>  			}
>  			if (defined $space_before && $space_before ne "") {
>  				if (WARN("BAD_SIGN_OFF",




More information about the Linux-kernel-mentees mailing list