#!/usr/bin/perl

#
# src/tools/git_changelog
#
# Display all commits on active branches, merging together commits from
# different branches that occur close together in time and with identical
# log messages.
#
# By default, commits are annotated with branch and release info thus:
# Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000
# This shows that the commit on REL8_3_STABLE was released in 8.3.2.
# Commits on master will usually instead have notes like
# Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000
# showing that this commit is ancestral to release branches 8.4 and later.
# If no Release: marker appears, the commit hasn't yet made it into any
# release.
#
# The --brief option shortens that to a format like:
# YYYY-MM-DD [hash] abbreviated commit subject line
# Since the branch isn't shown, this is mainly useful in conjunction
# with --master-only.
#
# Most of the time, matchable commits occur in the same order on all branches,
# and we print them out in that order.  However, if commit A occurs before
# commit B on branch X and commit B occurs before commit A on branch Y, then
# there's no ordering which is consistent with both branches.  In such cases
# we sort a merged commit according to its timestamp on the newest branch
# it appears in.
#
# The default output of this script is meant for generating minor release
# notes, where we need to know which branches a merged commit affects.
#
# To generate major release notes, intended usage is
#   git_changelog --master-only --brief --oldest-first --since='start-date'
# To find the appropriate start date, use:
#   git show $(git merge-base REL9_5_STABLE master)
# where the branch to mention is the previously forked-off branch.  This
# shows the last commit before that branch was made.
#
# Note that --master-only is an imperfect filter, since it will not detect
# cases where a HEAD patch was back-patched awhile later or with a slightly
# different commit message.  To find such cases, it's a good idea to look
# through the output of
#   git_changelog --non-master-only --oldest-first --since='start-date'
# and then remove anything from the --master-only output that would be
# duplicative.


use strict;
use warnings;
require Time::Local;
require Getopt::Long;
require IPC::Open2;

# Adjust this list when the set of interesting branches changes.
# (We could get this from "git branches", but not worth the trouble.)
# NB: master must be first!
my @BRANCHES = qw(master
  REL9_5_STABLE
  REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE
  REL8_4_STABLE REL8_3_STABLE REL8_2_STABLE REL8_1_STABLE REL8_0_STABLE
  REL7_4_STABLE REL7_3_STABLE REL7_2_STABLE REL7_1_STABLE REL7_0_PATCHES
  REL6_5_PATCHES REL6_4);

# Might want to make this parameter user-settable.
my $timestamp_slop = 24 * 60 * 60;

my $brief           = 0;
my $details_after   = 0;
my $post_date       = 0;
my $master_only     = 0;
my $non_master_only = 0;
my $oldest_first    = 0;
my $since;
my @output_buffer;
my $output_line = '';

Getopt::Long::GetOptions(
	'brief'           => \$brief,
	'details-after'   => \$details_after,
	'master-only'     => \$master_only,
	'non-master-only' => \$non_master_only,
	'post-date'       => \$post_date,
	'oldest-first'    => \$oldest_first,
	'since=s'         => \$since) || usage();
usage() if @ARGV;

my @git = qw(git log --format=fuller --date=iso);
push @git, '--since=' . $since if defined $since;

# Collect the release tag data
my %rel_tags;

{
	my $cmd = "git for-each-ref refs/tags";
	my $pid = IPC::Open2::open2(my $git_out, my $git_in, $cmd)
	  || die "can't run $cmd: $!";
	while (my $line = <$git_out>)
	{
		if ($line =~ m|^([a-f0-9]+)\s+commit\s+refs/tags/(\S+)|)
		{
			my $commit = $1;
			my $tag    = $2;
			if (   $tag =~ /^REL\d+_\d+$/
				|| $tag =~ /^REL\d+_\d+_\d+$/)
			{
				$rel_tags{$commit} = $tag;
			}
		}
	}
	waitpid($pid, 0);
	my $child_exit_status = $? >> 8;
	die "$cmd failed" if $child_exit_status != 0;
}

# Collect the commit data
my %all_commits;
my %all_commits_by_branch;

# This remembers where each branch sprouted from master.  Note the values
# will be wrong if --since terminates the log listing before the branch
# sprouts; but in that case it doesn't matter since we also won't reach
# the part of master where it would matter.
my %sprout_tags;

for my $branch (@BRANCHES)
{
	my @cmd = @git;
	if ($branch eq "master")
	{
		push @cmd, "origin/$branch";
	}
	else
	{
		push @cmd, "--parents";
		push @cmd, "master..origin/$branch";
	}
	my $pid = IPC::Open2::open2(my $git_out, my $git_in, @cmd)
	  || die "can't run @cmd: $!";
	my $last_tag = undef;
	my $last_parent;
	my %commit;
	while (my $line = <$git_out>)
	{
		if ($line =~ /^commit\s+(\S+)/)
		{
			push_commit(\%commit) if %commit;
			$last_tag = $rel_tags{$1} if defined $rel_tags{$1};
			%commit = (
				'branch'   => $branch,
				'commit'   => $1,
				'last_tag' => $last_tag,
				'message'  => '',);
			if ($line =~ /^commit\s+\S+\s+(\S+)/)
			{
				$last_parent = $1;
			}
			else
			{
				$last_parent = undef;
			}
		}
		elsif ($line =~ /^Author:\s+(.*)/)
		{
			$commit{'author'} = $1;
		}
		elsif ($line =~ /^CommitDate:\s+(.*)/)
		{
			$commit{'date'} = $1;
		}
		elsif ($line =~ /^\s\s/)
		{
			$commit{'message'} .= $line;
		}
	}
	push_commit(\%commit) if %commit;
	$sprout_tags{$last_parent} = $branch if defined $last_parent;
	waitpid($pid, 0);
	my $child_exit_status = $? >> 8;
	die "@cmd failed" if $child_exit_status != 0;
}

# Run through the master branch and apply tags.  We already tagged the other
# branches, but master needs a separate pass after we've acquired the
# sprout_tags data.  Also, in post-date mode we need to add phony entries
# for branches that sprouted after a particular master commit was made.
{
	my $last_tag = undef;
	my %sprouted_branches;
	for my $cc (@{ $all_commits_by_branch{'master'} })
	{
		my $commit = $cc->{'commit'};
		my $c      = $cc->{'commits'}->[0];
		$last_tag = $rel_tags{$commit} if defined $rel_tags{$commit};
		if (defined $sprout_tags{$commit})
		{
			$last_tag = $sprout_tags{$commit};

			# normalize branch names for making sprout tags
			$last_tag =~ s/^(REL\d+_\d+).*/$1_BR/;
		}
		$c->{'last_tag'} = $last_tag;
		if ($post_date)
		{
			if (defined $sprout_tags{$commit})
			{
				$sprouted_branches{ $sprout_tags{$commit} } = 1;
			}

			# insert new commits between master and any other commits
			my @new_commits = (shift @{ $cc->{'commits'} });
			for my $branch (reverse sort keys %sprouted_branches)
			{
				my $ccopy = { %{$c} };
				$ccopy->{'branch'} = $branch;
				push @new_commits, $ccopy;
			}
			$cc->{'commits'} = [ @new_commits, @{ $cc->{'commits'} } ];
		}
	}
}

my %position;
for my $branch (@BRANCHES)
{
	$position{$branch} = 0;
}

while (1)
{
	my $best_branch;
	my $best_timestamp;
	for my $branch (@BRANCHES)
	{
		my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
		next if !defined $leader;
		if (!defined $best_branch
			|| $leader->{'timestamp'} > $best_timestamp)
		{
			$best_branch    = $branch;
			$best_timestamp = $leader->{'timestamp'};
		}
	}
	last if !defined $best_branch;
	my $winner =
	  $all_commits_by_branch{$best_branch}->[ $position{$best_branch} ];

	my $print_it = 1;
	if ($master_only)
	{
		$print_it = (@{ $winner->{'commits'} } == 1)
		  && ($winner->{'commits'}[0]->{'branch'} eq 'master');
	}
	elsif ($non_master_only)
	{
		foreach my $c (@{ $winner->{'commits'} })
		{
			$print_it = 0 if ($c->{'branch'} eq 'master');
		}
	}

	if ($print_it)
	{
		output_details($winner) if (!$details_after);
		output_str("%s", $winner->{'message'} . "\n");
		output_details($winner) if ($details_after);
		unshift(@output_buffer, $output_line) if ($oldest_first);
		$output_line = '';
	}

	$winner->{'done'} = 1;
	for my $branch (@BRANCHES)
	{
		my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
		if (defined $leader && $leader->{'done'})
		{
			++$position{$branch};
			redo;
		}
	}
}

print @output_buffer if ($oldest_first);

sub push_commit
{
	my ($c) = @_;
	my $ht  = hash_commit($c);
	my $ts  = parse_datetime($c->{'date'});
	my $cc;

	# Note that this code will never merge two commits on the same branch,
	# even if they have the same hash (author/message) and nearby
	# timestamps.  This means that there could be multiple potential
	# matches when we come to add a commit from another branch.  Prefer
	# the closest-in-time one.
	for my $candidate (@{ $all_commits{$ht} })
	{
		my $diff = abs($ts - $candidate->{'timestamp'});
		if ($diff < $timestamp_slop
			&& !exists $candidate->{'branch_position'}{ $c->{'branch'} })
		{
			if (!defined $cc
				|| $diff < abs($ts - $cc->{'timestamp'}))
			{
				$cc = $candidate;
			}
		}
	}
	if (!defined $cc)
	{
		$cc = {
			'author'    => $c->{'author'},
			'message'   => $c->{'message'},
			'commit'    => $c->{'commit'},
			'commits'   => [],
			'timestamp' => $ts };
		push @{ $all_commits{$ht} }, $cc;
	}

	# stash only the fields we'll need later
	my $smallc = {
		'branch'   => $c->{'branch'},
		'commit'   => $c->{'commit'},
		'date'     => $c->{'date'},
		'last_tag' => $c->{'last_tag'} };
	push @{ $cc->{'commits'} }, $smallc;
	push @{ $all_commits_by_branch{ $c->{'branch'} } }, $cc;
	$cc->{'branch_position'}{ $c->{'branch'} } =
	  -1 + @{ $all_commits_by_branch{ $c->{'branch'} } };
}

sub hash_commit
{
	my ($c) = @_;
	return $c->{'author'} . "\0" . $c->{'message'};
}

sub parse_datetime
{
	my ($dt) = @_;
	$dt =~
/^(\d\d\d\d)-(\d\d)-(\d\d)\s+(\d\d):(\d\d):(\d\d)\s+([-+])(\d\d)(\d\d)$/;
	my $gm = Time::Local::timegm($6, $5, $4, $3, $2 - 1, $1);
	my $tzoffset = ($8 * 60 + $9) * 60;
	$tzoffset = -$tzoffset if $7 eq '-';
	return $gm - $tzoffset;
}

sub output_str
{
	($oldest_first) ? ($output_line .= sprintf(shift, @_)) : printf(@_);
}

sub output_details
{
	my $item = shift;

	if ($details_after)
	{
		$item->{'author'} =~ m{^(.*?)\s*<[^>]*>$};

		# output only author name, not email address
		output_str("(%s)\n", $1);
	}
	else
	{
		output_str("Author: %s\n", $item->{'author'});
	}
	foreach my $c (@{ $item->{'commits'} })
	{
		if ($brief)
		{
			$item->{'message'} =~ m/^\s*(.*)/;

			output_str(
				"%s [%s] %s\n",
				substr($c->{'date'},   0, 10),
				substr($c->{'commit'}, 0, 9),
				substr($1,             0, 56));
		}
		else
		{
			output_str("Branch: %s ", $c->{'branch'})
			  if (!$master_only);
			output_str("Release: %s ", $c->{'last_tag'})
			  if (defined $c->{'last_tag'});
			output_str("[%s] %s\n", substr($c->{'commit'}, 0, 9),
				$c->{'date'});
		}
	}
	output_str("\n");
}

sub usage
{
	print STDERR <<EOM;
Usage: git_changelog [--brief/-b] [--details-after/-d] [--master-only/-m] [--non-master-only/-n] [--oldest-first/-o] [--post-date/-p] [--since=SINCE]
    --brief         Shorten commit descriptions, omitting branch identification
    --details-after Show branch and author info after the commit description
    --master-only   Show only commits made just in the master branch
    --non-master-only Show only commits made just in back branches
    --oldest-first  Show oldest commits first
    --post-date     Show branches made after a commit occurred
    --since         Show only commits dated since SINCE
EOM
	exit 1;
}
