[Linux-kernel-mentees] checkpatch.pl investigation: NO_AUTHOR_SIGN_OFF issues

Lukas Bulwahn lukas.bulwahn at gmail.com
Tue Sep 22 18:38:10 UTC 2020



On Tue, 22 Sep 2020, Dwaipayan Ray wrote:

> Hi Lukas,
> 
> I looked up the mailmap implementations in get_maintainers
> and I was able to draw up something. After that I updated
> .mailmap with Daniel's mail addresses and ran the
> checkpatch script on some of Daniel Vetter's commits and
> the author sign off warning no longer appears :).
> 
> Let me know what you think of this.
> I am sending you the diff for now.
>

Generally, I think it is a good first proof of concept.
I believe you that functionality 'basically' works; again, we might 
already want to run a full-scale evaluation on that. Just to see
if there are some impacts we might not be aware of yet.

As you already wrote, we, Joe, you and me, need to figure out
now all the further details:

- how can we avoid the duplicate code in checkpatch.pl and 
get_maintainers.pl?
 
- what is performance impact, especially as AUTHOR_SIGN_OFF check is not 
triggered often, and there are many other rules in checkpatch.pl?

- further details, such as why do we need the lk_path is the first place? 
and many more questions of that kind.

Feel free to sketch a first commit message and create a PATCH RFC for the 
discussion with Joe.


Lukas

> Thanks,
> Dwaipayan.
> 
> --------------------------------------------------------
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 9e65d21456f1..e8c12a5a7e7b 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -51,6 +51,7 @@ my %ignore_type = ();
>  my @ignore = ();
>  my $help = 0;
>  my $configuration_file = ".checkpatch.conf";
> +my $lk_path = "./";
>  my $max_line_length = 100;
>  my $ignore_perl_version = 0;
>  my $minimum_perl_version = 5.10.0;
> @@ -1128,6 +1129,78 @@ sub top_of_kernel_tree {
>   return 1;
>  }
> 
> +my $mailmap;
> +
> +sub read_mailmap {
> +    $mailmap = {
> + names => {},
> + addresses => {}
> +    };
> +
> +    return if (!defined($lk_path) || !(-f "${lk_path}.mailmap"));
> +
> +    open(my $mailmap_file, '<', "${lk_path}.mailmap")
> + or warn "$P: Can't open .mailmap: $!\n";
> +
> +    while (<$mailmap_file>) {
> + s/#.*$//; #strip comments
> + s/^\s+|\s+$//g; #trim
> +
> + next if (/^\s*$/); #skip empty lines
> + #entries have one of the following formats:
> + # name1 <mail1>
> + # <mail1> <mail2>
> + # name1 <mail1> <mail2>
> + # name1 <mail1> name2 <mail2>
> + # (see man git-shortlog)
> +
> + if (/^([^<]+)<([^>]+)>$/) {
> +     my $real_name = $1;
> +     my $address = $2;
> +
> +     $real_name =~ s/\s+$//;
> +     ($real_name, $address) = parse_email("$real_name <$address>");
> +     $mailmap->{names}->{$address} = $real_name;
> +
> + } elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
> +     my $real_address = $1;
> +     my $wrong_address = $2;
> +
> +     $mailmap->{addresses}->{$wrong_address} = $real_address;
> +
> + } elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
> +     my $real_name = $1;
> +     my $real_address = $2;
> +     my $wrong_address = $3;
> +
> +     $real_name =~ s/\s+$//;
> +     ($real_name, $real_address) =
> +   parse_email("$real_name <$real_address>");
> +     $mailmap->{names}->{$wrong_address} = $real_name;
> +     $mailmap->{addresses}->{$wrong_address} = $real_address;
> +
> + } elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
> +     my $real_name = $1;
> +     my $real_address = $2;
> +     my $wrong_name = $3;
> +     my $wrong_address = $4;
> +
> +     $real_name =~ s/\s+$//;
> +     ($real_name, $real_address) =
> +   parse_email("$real_name <$real_address>");
> +
> +     $wrong_name =~ s/\s+$//;
> +     ($wrong_name, $wrong_address) =
> +   parse_email("$wrong_name <$wrong_address>");
> +
> +     my $wrong_email = format_email($wrong_name, $wrong_address);
> +     $mailmap->{names}->{$wrong_email} = $real_name;
> +     $mailmap->{addresses}->{$wrong_email} = $real_address;
> + }
> +    }
> +    close($mailmap_file);
> +}
> +
>  sub parse_email {
>   my ($formatted_email) = @_;
> 
> @@ -1210,14 +1283,50 @@ sub reformat_email {
>   return format_email($email_name, $email_address);
>  }
> 
> +sub mailmap_email {
> +    my ($name, $address) = @_;
> +
> +    my $email = format_email($name, $address);
> +    my $real_name = $name;
> +    my $real_address = $address;
> +
> +    if (exists $mailmap->{names}->{$email} ||
> + exists $mailmap->{addresses}->{$email}) {
> + if (exists $mailmap->{names}->{$email}) {
> +     $real_name = $mailmap->{names}->{$email};
> + }
> + if (exists $mailmap->{addresses}->{$email}) {
> +     $real_address = $mailmap->{addresses}->{$email};
> + }
> +    } else {
> + if (exists $mailmap->{names}->{$address}) {
> +     $real_name = $mailmap->{names}->{$address};
> + }
> + if (exists $mailmap->{addresses}->{$address}) {
> +     $real_address = $mailmap->{addresses}->{$address};
> + }
> +    }
> +    return ($real_name, $real_address);
> +}
> +
>  sub same_email_addresses {
>   my ($email1, $email2) = @_;
> 
>   my ($email1_name, $name1_comment, $email1_address, $comment1) =
> parse_email($email1);
>   my ($email2_name, $name2_comment, $email2_address, $comment2) =
> parse_email($email2);
> 
> - return $email1_name eq $email2_name &&
> -        $email1_address eq $email2_address;
> + my $name_match = $email1_name eq $email2_name;
> + my $address_match = $email1_address eq $email2_address;
> +
> + if(!$name_match || !$address_match) {
> +   my ($real_name1, $real_address1) = mailmap_email($email1_name,
> $email1_address);
> +   my ($real_name2, $real_address2) = mailmap_email($email2_name,
> $email2_address);
> +
> +   $name_match |= ($real_name1 eq $real_name2);
> +   $address_match |= ($real_address1 eq $real_address2);
> + }
> +
> + return $name_match && $address_match;
>  }
> 
>  sub which {
> @@ -2400,6 +2509,7 @@ sub process {
> 
>   my $checklicenseline = 1;
> 
> + read_mailmap();
>   sanitise_line_reset();
>   my $line;
>   foreach my $rawline (@rawlines) {
> 


More information about the Linux-kernel-mentees mailing list