llvm-project/flang/flatten.cpp

917 lines
35 KiB
C++

// Copyright (c) 2019, Arm Ltd. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Reminder: no warranty with this program. I recommend using a fresh checkout.
// Compile with:
// clang++ -Wall -Werror -O2 flatten.cpp -lgit2
// Run with f18 in PWD or argv[1]:
// time ./a.out ~/.local/src/github.com/flang-compiler/f18/
// To get a rewritten history, do this:
//
// sudo apt install -y libgit2-dev # or equivalent
// git clone https://github.com/flang-compiler/f18
// git remote add llvm-project https://github.com/llvm/llvm-project
// git fetch llvm-project
// clang++ -DREPLACE_REFERENCES -Wall -Werror -O2 flatten.cpp -lgit2
// ./a.out
// Inputs:
// * a ref called origin/master, representing f18 history
// * (optionally) a ref called llvm-project/master, representing llvm upstream
// * (optionally) branches called rebase-{12 digit merge sha}, representing the
// manual rebase of tricky cases.
//
// Outputs:
// * A branch called rewritten-history-v2, with a linearized f18 history.
// * A branch called rewritten-history-v2-llvm-project-merge, representing the
// renaming of the project under /flang/ and taking llvm-project/master as the
// new parent for the (original) root commit.
//
// This program is meant to be idempotent and should not write to the working
// directory, it simply takes refs as input and produces them as output.
// Key concepts:
//
// * The checkout that git gives you for a commit is called a "tree", which is
// determined by a recursive checksum of the directory structure. If two
// commits have the same tree ("treesame"), then they are by definition
// equivalent when you check them out.
//
// * Lineage of the "master" branch is taken by following the first parents of
// each commit. To see this in git log, run `git log --first-parent`. This
// effectively ignores the second-parent history (i.e. commits that happened
// on branches).
//
// * By construction it is arranged that the trees of the first-parent history
// are preserved. This means "the code on the master branch is the same before
// and after rewrite".
//
// * Preserving the non-first-parent commits is trickier, and requires a rebase.
//
// * If nothing changed on the master branch during a feature branch, a rebase
// will not change the trees of the feature branch, so trees of those commits
// will still be the same. It's like rewriting the merge as a fast-forward.
//
// * However, if something happened on the master branch during the feature
// branch, then a rebase *must* create new trees. This implies code which
// might not build. As an example, imagine a case where a class is renamed on
// master, and the old name is used in the feature branch (until it's fixed at
// some point by the time it is merged).
//
// * By the end of the rebase, we assert that the trees are the same as those
// merged into master. So code in the middle of the rebased feature branch may
// not build, but at least the overall result of the feature branch will be as
// good as master was. Thankfully this is relatively rare.
//
// * If a branch exists called rebase-{sha of merge commit}, that branch is
// substituted in place of the merge commit. This allows manually rebasing
// tricky merges.
//
// * For the non-treesame, we can take a second-order diff (diff-of-diff)
// comparing those commits before and after rewrite, and ensure that only line
// numbers and context changed. This is almost totally the case.
// Using the following script, it is possible to see whether non-TREESAME
// patches still have the same diff, modulo blank lines, by taking a
// second-order diff.
//
// git log --grep=TREESAME --invert-grep --format="%h %(trailers:key=Original-commit)" rewritten-history-v2 |
// sed -n 's|Original-commit: flang-compiler/f18@||p' |
// while read NEW ORIG
// do
// echo ORIG NEW: $ORIG $NEW
// git show $ORIG > a
// git show $NEW > b
// sed -r -i \
// -e 's/@@ .* @@/@@ Numbers @@/g' \
// -e '/^(commit|index) .*/d' \
// -e '/Original-commit.*/d' \
// -e '/^\s$/d' \
// a b
// git diff --color --no-index a b
// done |& less -SR
#include <array>
#include <cstdio>
#include <cstring>
#include <map>
#include <sstream>
#ifndef NO_REPLACE_REFERENCES
#include <regex>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <git2.h>
#include <git2/sys/commit.h>
void check(int error, const char *message, const char *extra) {
const git_error *err;
const char *msg = "", *spacer = "";
if (!error)
return;
if ((err = giterr_last()) != NULL && err->message != NULL) {
msg = err->message;
spacer = " - ";
}
if (extra)
fprintf(stderr, "%s '%s' [%d]%s%s\n", message, extra, error, spacer, msg);
else
fprintf(stderr, "%s [%d]%s%s\n", message, error, spacer, msg);
exit(1);
}
int n_conflicts = 0, n_discards = 0;
// Copy src string to dst string, rewriting issue references.
char *rewrite_issue_references(char *dst, const char *src) {
#ifndef NO_REPLACE_REFERENCES
const char *src_end = src + strlen(src);
// return src_end;
char *new_end = std::regex_replace(
dst, src, src_end,
std::regex("(^|\\b[^a-zA-Z0-9]+)(#[0-9]+)\\b"),
"$1flang-compiler/f18$2");
*new_end = '\0';
return new_end;
#else
return stpcpy(dst, src);
#endif
}
// test_rewrite_issue_references runs some test cases thorugh the string
// replacement machinery and aborts if anything is awry.
void test_rewrite_issue_references() {
#ifdef NO_REPLACE_REFERENCES
return;
#endif
struct { const char *input, *want; } tests[] = {
{"foo#123", "foo#123"},
{"Test #123bar", "Test #123bar"},
// Special case.
// {"commit message #123", "commit message #123"},
{"#123", "flang-compiler/f18#123"},
{"Test #123", "Test flang-compiler/f18#123"},
{"Test #123", "Test flang-compiler/f18#123"},
{"Test (#123)", "Test (flang-compiler/f18#123)"},
};
bool fail = false;
for (const auto test : tests) {
char *x = (char*)malloc(1024);
const char *new_end = rewrite_issue_references(x, test.input);
if (strcmp(x, test.want)) {
fprintf(stderr, "Got : %s\n", x);
fprintf(stderr, "Want: %s\n", test.want);
fail = true;
}
if (new_end != x + strlen(x)) {
abort();
}
(void)new_end;
free((void*)x);
}
if (fail)
abort();
}
static const char mergemsg_prefix[] = "Merge pull request #";
// has_merge_pr_prefix returns true if the commit message begins "Merge pull
// request #".
bool has_merge_pr_prefix(const char* msg) {
int len = sizeof(mergemsg_prefix)-1;
if (strlen(msg) < len)
len = strlen(msg);
return !strncmp(mergemsg_prefix, msg, len);
}
// tweak_commit_message
// Prepend [flang-compiler/f18#PRNUM]
// Append "Original-commit", "Reviewed-on" and "Tree-same-pre-rewrite".
//
// Allocates a new commit message. Return value must be freed.
// The Reviewed-on trailer URL is determined by "Merge pull request #(number)",
// if present.
char *tweak_commit_message(git_commit *orig_commit, git_commit *orig_merge, const git_oid *new_tree) {
const char *orig_msg = git_commit_message_raw(orig_commit);
const char *prnum = NULL, *prnum_end = NULL;
// If the message indicates a PR, store in prnum.
if (orig_merge != NULL && has_merge_pr_prefix(git_commit_message(orig_merge))) {
const char *mergemsg = git_commit_message_raw(orig_merge);
prnum = mergemsg + sizeof(mergemsg_prefix) - 1;
prnum_end = strchr(prnum, ' ');
}
#ifndef NO_REPLACE_REFERENCES
// Match "foo bar baz (#123)", which is the convention for "Squash" commit
// merges on GitHub.
static std::regex prnum_re("^(.*\\(#)([0-9]+)\\)$");
std::cmatch match;
if (std::regex_match(git_commit_summary(orig_merge), match, prnum_re)) {
const char *summary = git_commit_summary(orig_merge);
prnum = summary + match.length(1);
prnum_end = prnum + match.length(2);
}
#endif
// Gratuitous space for appending things.
const ssize_t extra_space = 102400;
ssize_t size = strlen(orig_msg) + extra_space;
char *newmsg_start = (char*)malloc(size);
char *newmsg_end = newmsg_start + (size);
char *newmsg = newmsg_start; // Pointer tracks the current write position.
newmsg[0] = 0;
// Set to leave message unmodified except for Original-commit, useful for
// verifying second-order diffs.
const bool use_original_message = false;
if (use_original_message) {
// These are here to indicate if the checkouts are the same as a commit and/or a merge.
if (git_oid_equal(git_commit_tree_id(orig_merge), new_tree)) {
newmsg = stpncpy(newmsg, "[TREESAME master] ", newmsg_end - newmsg);
} else if (git_oid_equal(git_commit_tree_id(orig_commit), new_tree)) {
newmsg = stpncpy(newmsg, "[TREESAME commit] ", newmsg_end - newmsg);
}
newmsg = stpcpy(newmsg, orig_msg);
// From here on out, append trailer headers.
char buf[GIT_OID_HEXSZ+1] = {};
newmsg = stpncpy(newmsg, "\n\nOriginal-commit: flang-compiler/f18@", newmsg_end - newmsg);
git_oid_fmt(buf, git_commit_id(orig_commit));
newmsg = stpncpy(newmsg, buf, newmsg_end - newmsg);
newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg);
return newmsg_start;
}
// Prepend [Flang] tag.
newmsg = stpncpy(newmsg, "[Flang] ", newmsg_end - newmsg);
// Paste in the original message, rewriting references #123 => flang-compiler/f18#123
newmsg = rewrite_issue_references(newmsg, orig_msg);
// If there is a newline at the end, remove it; subsequent insertion of the
// Original-commit header will always insert it. This ensures consistent
// spacing before the header.
while (newmsg[-1] == '\n') {
newmsg[-1] = 0;
newmsg--;
}
// From here on out, append trailer headers.
char buf[GIT_OID_HEXSZ+1] = {};
newmsg = stpncpy(newmsg, "\n\nOriginal-commit: flang-compiler/f18@", newmsg_end - newmsg);
git_oid_fmt(buf, git_commit_id(orig_commit));
newmsg = stpncpy(newmsg, buf, newmsg_end - newmsg);
newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg);
if (prnum != NULL) {
newmsg = stpncpy(newmsg, "Reviewed-on: https://github.com/flang-compiler/f18/pull/", newmsg_end - newmsg);
newmsg = stpncpy(newmsg, prnum, prnum_end - prnum);
newmsg = stpncpy(newmsg, "\n", newmsg_end - newmsg);
}
if (!git_oid_equal(git_commit_tree_id(orig_merge), new_tree)) {
// If this is present, then the contents of the tree are identical pre-
// and post- merge. If it is not present, then the patch was rebased.
newmsg = stpncpy(newmsg, "Tree-same-pre-rewrite: false\n", newmsg_end - newmsg);
}
return newmsg_start;
}
// insert_flang_directory sets new_root to a newly created tree with one entry
// in it: /flang/, which points at orig_root.
void insert_flang_directory(git_repository *repo, git_oid *new_root, const git_oid *orig_root) {
git_treebuilder *tb;
check(git_treebuilder_new(&tb, repo, NULL), "git_treebuilder_new", NULL);
const git_tree_entry *te;
git_treebuilder_insert(&te, tb, "flang", orig_root, GIT_FILEMODE_TREE);
git_treebuilder_write(new_root, tb);
git_treebuilder_free(tb);
}
// count_branch_commits counts the number of on-branch (non-merge) commits in
// the given merge.
int count_branch_commits(git_commit *merge) {
git_revwalk *walk;
check(git_revwalk_new(&walk, git_commit_owner(merge)), "git_revwalk_new", NULL);
check(git_revwalk_hide(walk, git_commit_parent_id(merge, 0)), "git_revwalk_hide", NULL);
check(git_revwalk_push(walk, git_commit_parent_id(merge, 1)), "git_revwalk_push", NULL);
git_oid commit_oid;
int n = 0;
while (!git_revwalk_next(&commit_oid, walk))
n++;
git_revwalk_free(walk);
return n;
}
// tree_for_commit grabs the git_oid pointing to the tree for a given commit_id.
git_oid tree_for_commit(git_repository *repo, const git_oid *commit_id) {
git_commit *c;
check(git_commit_lookup(&c, repo, commit_id), "git_commit_lookup", NULL);
git_oid tree_id;
git_oid_cpy(&tree_id, git_commit_tree_id(c));
git_commit_free(c);
// git_commit_
return tree_id;
}
// generate_authortime_to_commit_map walks the commits on the second-parent
// history of the given `merge`, computing a mapping from the author time to the
// original commit id. Since this is scoped to feature-branch commits, there are
// not likely to be collisions.
void generate_authortime_to_commit_map(std::map<git_time_t, git_oid> &authortime_to_commit, git_commit *merge) {
git_repository *repo = git_commit_owner(merge);
git_revwalk *walk;
check(git_revwalk_new(&walk, git_commit_owner(merge)), "git_revwalk_new", NULL);
check(git_revwalk_hide(walk, git_commit_parent_id(merge, 0)), "git_revwalk_hide", NULL);
check(git_revwalk_push(walk, git_commit_parent_id(merge, 1)), "git_revwalk_push", NULL);
// Only walk first parent history on the grounds that most of those which
// introduce commits not-already-on-mainline are accidental merges of
// rebases, duplicating patches in history. Where patches are missed, they
// won't have an entry in the authortime_to_commit.
git_revwalk_simplify_first_parent(walk);
git_oid commit_id;
while (!git_revwalk_next(&commit_id, walk)) {
git_commit *c;
check(git_commit_lookup(&c, repo, &commit_id), "git_commit_lookup", NULL);
int when = git_commit_author(c)->when.time;
if (authortime_to_commit.count(when) != 0) {
char buf[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf, 12, &commit_id);
char buf1[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf1, 12, git_commit_id(merge));
char buf2[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf2, 12, &authortime_to_commit[when]);
printf("Hit duplicate commit considering %s "
"(merge %s, duplicate %s)\n", buf, buf1, buf2);
// Duplicate author times. Need another strategy.
abort();
}
authortime_to_commit[when] = commit_id;
git_commit_free(c);
}
git_revwalk_free(walk);
}
// try_rebase attempts to rebase orig_merge onto the new history.
// It returns true if the rebase succeeds without conflicts, and false otherwise.
// On success, new_head is set to the tip of the rebase.
bool try_rebase(git_oid **new_head, git_commit *orig_merge) {
git_repository *repo = git_commit_owner(orig_merge);
const git_oid *p0 = git_commit_parent_id(orig_merge, 0);
const git_oid *p1 = git_commit_parent_id(orig_merge, 1);
git_annotated_commit *p0a, *p1a, *new_heada;
check(git_annotated_commit_lookup(&p0a, repo, p0), "git_annotated_commit_lookup p0", NULL);
check(git_annotated_commit_lookup(&p1a, repo, p1), "git_annotated_commit_lookup p1", NULL);
check(git_annotated_commit_lookup(&new_heada, repo, *new_head), "git_annotated_commit_lookup new_head", NULL);
char buf[] = "refs/heads/rebase-0123456789ab\0";
git_oid_nfmt(buf+sizeof("refs/heads/rebase-")-1, 12, git_commit_id(orig_merge));
bool using_manual_rebase = false;
// Look for a branch called rebase-[12 digit SHA]. If it exists and is
// tree-same to the merge, treat it as the branch we're trying to rebase.
git_reference *manual_rebase;
int err = git_reference_lookup(&manual_rebase, repo, buf);
switch (err) {
case 0: { // Reference found.
const git_oid manual_tree = tree_for_commit(repo, git_reference_target(manual_rebase));
if (0 == git_oid_cmp(
git_reference_target(manual_rebase),
git_commit_id(orig_merge))) {
printf("Skip %s because it's pointing at the merge.\n", buf);
goto manual_rebase_unusable;
}
if (0 != git_oid_cmp(&manual_tree, git_commit_tree_id(orig_merge))) {
printf("Skip %s because the tip of the rebase is not "
"treesame to the merge commit\n", buf);
goto manual_rebase_unusable;
}
printf("Using manual rebase branch %s\n", buf);
using_manual_rebase = true;
// Update p1a, the commits being rebased, to point at the branch.
// Then rebase, and this shouldn't result in any conflicts.
git_annotated_commit_free(p1a);
git_annotated_commit_lookup(&p1a, repo, git_reference_target(manual_rebase));
manual_rebase_unusable:
git_reference_free(manual_rebase);
break;
}
case GIT_ENOTFOUND:
// printf("Rebase branch %s not found.\n", buf);
break;
default:
check(err, "git_reference_lookup rebase-...", NULL);
}
git_rebase_options rb_opts;
check(git_rebase_init_options(&rb_opts, GIT_REBASE_OPTIONS_VERSION), "git_rebase_init_options", NULL);
rb_opts.inmemory = 1;
rb_opts.merge_options.flags = GIT_MERGE_FIND_RENAMES;
rb_opts.merge_options.rename_threshold = 50;
git_rebase *rb;
check(git_rebase_init(&rb, repo, p1a, p0a, new_heada, &rb_opts), "git_rebase_init", NULL);
bool is_success = true; // becomes false if conflicts encountered.
bool committed_at_least_one_patch = false;
git_oid rebase_tip_id;
git_oid_cpy(&rebase_tip_id, *new_head);
std::map<git_time_t, git_oid> authortime_to_commit;
if (using_manual_rebase) {
generate_authortime_to_commit_map(authortime_to_commit, orig_merge);
}
// Loop over each patch in the rebase, committing it.
git_rebase_operation *op;
while (!git_rebase_next(&op, rb)) {
git_index *idx;
check(git_rebase_inmemory_index(&idx, rb), "git_rebase_inmemory_index", NULL);
if (git_index_has_conflicts(idx)) {
// Conflicting case. Print a useful message.
char buf_patch[GIT_OID_HEXSZ+1] = {};
char buf_merge[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf_patch, 12, &op->id);
git_oid_nfmt(buf_merge, 12, git_commit_id(orig_merge));
int discarded = count_branch_commits(orig_merge);
printf("Conflicts encountered; patch=%s merge=%s - discarding %d commits\n", buf_patch, buf_merge, discarded);
printf(" M=%s; git checkout -B rebase-${M} ${M}^2; git rebase ${M}^1\n", buf_merge);
n_conflicts++;
n_discards += discarded;
git_index_free(idx);
is_success = false;
// If conflicts are found, abort, fall back to taking the merge
// commit.
break;
}
git_index_free(idx);
git_commit *orig_commit;
check(git_commit_lookup(&orig_commit, repo, &op->id), "git_commit_lookup", NULL);
// Generate the new tree now (as opposed to within git_rebase_commit) so that it can be used for TREESAME
// diagnostics in the commit message.
git_oid new_tree;
check(git_index_write_tree_to(&new_tree, idx, repo), "git_index_write_tree_to", NULL);
if (using_manual_rebase) {
// If in a manual rebase, need to lookup original patch.
// Use the author timestamp as a heuristic for patch equality.
const git_time_t when = git_commit_author(orig_commit)->when.time;
git_oid pre_rebase_commit_id = {};
if (when == 1518039228) { // Wed Feb 7 13:33:48 2018 -0800
// Hack for a single special case, a commit which was merged.
check(git_oid_fromstr(&pre_rebase_commit_id, "044148ead21f18e16716d5bc30819525c79065d0"), "git_oid_fromstr", NULL);
} else if (authortime_to_commit.count(when) == 0) {
char buf[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf, 12, &op->id);
printf("Unable to find original commit for manual "
"rebase: %s\n", buf);
git_oid_nfmt(buf, 12, git_commit_id(orig_merge));
printf(" Merge: %s\n", buf);
abort();
} else {
pre_rebase_commit_id = authortime_to_commit[when];
}
// Replace orig_commit (the rebased commit in this context) with the
// true original commit, so that the commit cross-reference
// correctly reflects a commit which exists in the f18 repository.
git_commit_free(orig_commit);
check(git_commit_lookup(&orig_commit, repo, &pre_rebase_commit_id), "git_commit_lookup", NULL);
}
const char *msg = tweak_commit_message(orig_commit, orig_merge, &new_tree);
int err = git_rebase_commit(
&rebase_tip_id,
rb,
NULL,
// Take the committer information from the merge commit if manually rebased.
using_manual_rebase ? git_commit_committer(orig_merge): git_commit_committer(orig_commit),
NULL,
msg
);
free((void*)msg);
git_commit_free(orig_commit);
if (err == GIT_EAPPLIED) {
// Applying the patch results in the same tree, so the patch is
// empty.
char buf_patch[GIT_OID_HEXSZ+1] = {};
char buf_merge[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf_patch, 12, &op->id);
git_oid_nfmt(buf_merge, 12, git_commit_id(orig_merge));
printf("Patch already exists in history; patch=%s merge=%s\n", buf_patch, buf_merge);
continue;
}
check(err, "git_rebase_commit", NULL);
committed_at_least_one_patch = true;
}
if (is_success && committed_at_least_one_patch) {
// Update the growing new_head to point at our new rebase tip.
git_oid_cpy(*new_head, &rebase_tip_id);
}
git_rebase_abort(rb);
git_rebase_free(rb);
git_annotated_commit_free(p0a);
git_annotated_commit_free(p1a);
git_annotated_commit_free(new_heada);
return is_success;
}
// merge_llvm_project_tree generates a new root tree combining the llvm project
// tree and the given new_tree_id. new_tree_id is updated to point at the new tree.
void merge_llvm_project_tree(
git_oid *new_tree_id,
const git_oid *orig_tree,
const git_tree *llvm_project_tree) {
git_repository *repo = git_tree_owner(llvm_project_tree);
git_tree *flang_tree;
check(git_tree_lookup(&flang_tree, repo, orig_tree), "git_tree_lookup", NULL);
const git_oid *flang_dir_tree_id = git_tree_entry_id(git_tree_entry_byname(flang_tree, "flang"));
// Effectively merges the flang/ directory into the llvm project tree.
git_treebuilder *tb;
check(git_treebuilder_new(&tb, repo, llvm_project_tree), "git_treebuilder_new", NULL);
check(git_treebuilder_insert(NULL, tb, "flang", flang_dir_tree_id, GIT_FILEMODE_TREE), "git_treebuilder_insert", NULL);
check(git_treebuilder_write(new_tree_id, tb), "git_treebuilder_write", NULL);
git_treebuilder_free(tb);
git_tree_free(flang_tree);
}
// generate_squash_message generates a commit message for merges which have been
// squashed.
void generate_squash_message(char **newmsg, git_commit *merge_commit) {
std::stringstream s;
// Start the message with the existing rewritten message.
s << *newmsg;
s << "\nDue to a conflicting rebase during the linearizing of "
"flang-compiler/f18, this commit squashes a number of "
"other commits:\n\n";
git_revwalk *walk;
check(git_revwalk_new(&walk, git_commit_owner(merge_commit)), "allocate git_revwalk", NULL);
git_revwalk_simplify_first_parent(walk);
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE);
check(git_revwalk_push(walk, git_commit_parent_id(merge_commit, 1)), "git_revwalk_push", NULL);
check(git_revwalk_hide(walk, git_commit_parent_id(merge_commit, 0)), "git_revwalk_hide", NULL);
git_oid commit_id;
while (!git_revwalk_next(&commit_id, walk)) {
char buf[GIT_OID_HEXSZ+1] = {};
git_oid_fmt(buf, &commit_id);
git_commit *c;
check(git_commit_lookup(&c, git_commit_owner(merge_commit), &commit_id), "git_commit_lookup", NULL);
s << "flang-compiler/f18@" << buf << " " << git_commit_summary(c) << "\n";
git_commit_free(c);
}
git_revwalk_free(walk);
// Replace newmsg with the squashed msg.
auto result = s.str();
char *squashmsg = (char*)malloc(result.size()+1);
squashmsg[result.size()] = 0;
strncpy(squashmsg, result.c_str(), result.size());
free(*newmsg);
*newmsg = squashmsg;
}
int main(int argc, char* argv[]) {
test_rewrite_issue_references();
git_libgit2_init();
const char *repo_path = ".";
if (argc > 1)
repo_path = argv[1];
git_repository *repo;
int error = git_repository_open(&repo, repo_path);
if (error < 0) {
fprintf(stderr, "Could not open repository: %s\n", giterr_last()->message);
exit(1);
}
// Walk commits in reverse topological order starting from origin/master.
git_revwalk *walk;
check(git_revwalk_new(&walk, repo), "allocate git_revwalk", NULL);
git_revwalk_simplify_first_parent(walk);
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE);
check(git_revwalk_push_ref(walk, "refs/remotes/origin/master"), "git_revwalk_push_head", NULL);
// check(git_revwalk_push_ref(walk, "refs/heads/flatten-top"), "git_revwalk_push_ref", NULL);
// check(git_revwalk_hide_ref(walk, "refs/heads/flatten-bottom"), "git_revwalk_hide_ref", NULL);
bool is_root = true; // First commit has no parents.
git_oid old_head = {};
git_oid *new_head = NULL;
git_oid new_commit_id = {};
// For each commit in the first-parent lineage of the original history:
//
// 1. Take non-merge commits as they were.
// 2. Attempt to rebase second-parent of merge commits onto first-parent.
// 2a. Otherwise, squash them.
//
// Merge commits are preserved as empty commits.
while (!git_revwalk_next(&old_head, walk)) {
git_commit *c;
check(git_commit_lookup(&c, repo, &old_head), "git_commit_lookup", NULL);
// Prettify the commit message - rewrite references, add trailer headers.
char *newmsg = tweak_commit_message(c, c, git_commit_tree_id(c));
switch (git_commit_parentcount(c)) {
default:
fprintf(stderr, "Unexpected number of parents.\n");
exit(5);
case 2: {
if (is_root) {
// root commit cannot be rebased. Squash instead.
// (only happens if using a restricted commit range)
break;
}
if (try_rebase(&new_head, c)) {
// Rebase succeeded. Now ensure that at the end of the rebase,
// the tree state is the same as if the merge had been done.
git_oid old_tree = tree_for_commit(repo, &old_head);
git_oid new_tree = tree_for_commit(repo, new_head);
if (!git_oid_equal(&old_tree, &new_tree)) {
char buf_old_head[GIT_OID_HEXSZ+1] = {};
char buf_new_head[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf_old_head, 12, &old_head);
git_oid_nfmt(buf_new_head, 12, new_head);
fprintf(stderr, "commits do not have the same tree: (old, "
"new) = %s %s", buf_old_head, buf_new_head);
exit(6);
}
// Create an empty commit for the merge.
check(git_commit_create_from_ids(
&new_commit_id,
repo,
NULL,
git_commit_author(c),
git_commit_committer(c),
git_commit_message_encoding(c),
newmsg,
&new_tree,
is_root ? 0 : 1,
(const git_oid**)(&new_head)
), "git_commit_create_from_ids", NULL);
new_head = &new_commit_id;
is_root = false;
// Rebase succeeded, new_head updated. Keep going...
goto next_patch;
}
generate_squash_message(&newmsg, c);
}
// These are non-merge commits on the first-parent history.
// Take them as-is.
case 0: case 1: ;
}
// Create a new commit.
check(git_commit_create_from_ids(
&new_commit_id,
repo,
NULL,
git_commit_author(c),
git_commit_committer(c),
git_commit_message_encoding(c),
newmsg,
git_commit_tree_id(c),
is_root ? 0 : 1,
(const git_oid**)(&new_head)
), "git_commit_create_from_ids", NULL);
new_head = &new_commit_id;
is_root = false;
next_patch:
free((void*)newmsg);
git_commit_free(c);
}
// First pass now done. Move the directory in a second pass, and re-parent
// onto llvm-project if it is available.
char buf[GIT_OID_HEXSZ+1] = {};
git_oid_nfmt(buf, 12, new_head);
printf("\nConflicts encountered: %d, discarding %d commits\n", n_conflicts, n_discards);
printf("Done; rewritten-history-v4 => %s\n", buf);
git_reference *ref;
check(git_reference_create(
&ref,
repo,
"refs/heads/rewritten-history-v4",
new_head,
1,
"flatten.cpp update"
),
"git_reference_create", NULL);
git_reference_free(ref);
git_revwalk_reset(walk);
// Now rename everything under flang/.
printf("Inserting /flang/...\n");
{
git_oid new_commit_id;
bool is_root = true; // First commit has no parents.
git_oid *new_head_renamed = NULL;
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE);
check(git_revwalk_push(walk, new_head), "git_revwalk_push_head", NULL);
// See if the upstream is available at llvm-project/master. If it is,
// we'll write the history into there, and use the LLVM project head as
// the root commit.
git_oid llvm_project_head = {};
git_tree *llvm_project_tree;
int err = git_reference_name_to_id(&llvm_project_head, repo, "refs/remotes/llvm-project/master");
bool have_llvm_project = err == 0;
if (!have_llvm_project) {
fprintf(stderr, "Require llvm-project/master ref to exist before proceeding. Add llvm-project as a remote and fetch it.\n");
exit(2);
}
git_oid_nfmt(buf, 12, &llvm_project_head);
printf("Rewriting history on top of llvm-project@%s...\n", buf);
// Disabled since the merged MLIR root commit has zero parents.
// Take the same approach to be consistent (= false).
const bool use_llvm_project_head_as_root = false;
if (use_llvm_project_head_as_root) {
new_head_renamed = &llvm_project_head;
is_root = false;
}
// Grab the llvm_project_tree.
git_commit *c;
check(git_commit_lookup(&c, repo, &llvm_project_head), "git_commit_lookup", NULL);
check(git_commit_tree(&llvm_project_tree, c), "git_commit_tree", NULL);
git_commit_free(c);
git_oid new_tree;
// For each commit, rewrite its tree.
while (!git_revwalk_next(&old_head, walk)) {
git_commit *c;
check(git_commit_lookup(&c, repo, &old_head), "git_commit_lookup", NULL);
insert_flang_directory(repo, &new_tree, git_commit_tree_id(c));
check(git_commit_create_from_ids(
&new_commit_id,
repo,
NULL,
git_commit_author(c),
git_commit_committer(c),
git_commit_message_encoding(c),
git_commit_message_raw(c),
&new_tree,
is_root ? 0 : 1,
(const git_oid**)(&new_head_renamed)
), "git_commit_create_from_ids", NULL);
new_head_renamed = &new_commit_id;
is_root = false;
git_commit_free(c);
}
git_signature *merge_commit_author;
check(git_signature_default(&merge_commit_author, repo), "git_signature_default", NULL);
const char *merge_message =
"[Flang] Merge flang-compiler/f18\n"
"\n"
"This is the initial merge of flang-compiler, which is done in this way\n"
"principally to preserve the history and git-blame, without generating a large\n"
"number of commits on the first-parent history of LLVM.\n"
"\n"
"If you don't care about the flang history during a bisect remember that you can\n"
"supply paths to git-bisect, e.g. `git bisect start clang llvm`.\n"
"\n"
"The history of f18 was rewritten to:\n"
"\n"
"* Put the code under /flang/.\n"
"* Linearize the history.\n"
"* Rewrite commit messages so that issue and PR numbers point to the old repository.\n"
"\n"
"Updates: flang-compiler/f18#876 (submission into llvm-project)\n"
"Mailing-list: http://lists.llvm.org/pipermail/llvm-dev/2020-January/137989.html ([llvm-dev] Flang landing in the monorepo - next Monday!)\n"
"Mailing-list: http://lists.llvm.org/pipermail/llvm-dev/2019-December/137661.html ([llvm-dev] Flang landing in the monorepo)\n";
merge_llvm_project_tree(&new_tree, &new_tree, llvm_project_tree);
const git_oid *parents[2] = {};
parents[0] = &llvm_project_head;
parents[1] = &new_commit_id;
git_oid new_head_merged;
check(git_commit_create_from_ids(
&new_head_merged,
repo,
NULL,
merge_commit_author,
merge_commit_author,
NULL,
merge_message,
&new_tree,
2,
parents
), "git_commit_create_from_ids", NULL);
git_signature_free(merge_commit_author);
git_tree_free(llvm_project_tree);
git_reference *ref;
check(git_reference_create(
&ref,
repo,
"refs/heads/rewritten-history-v4-llvm-project-merge",
&new_head_merged,
1,
"flatten.cpp update"
),
"git_reference_create", NULL);
git_reference_free(ref);
git_oid_nfmt(buf, 12, &new_head_merged);
printf("Done; rewritten-history-v4-llvm-project-merge => %s\n", buf);
}
printf(" ... all done\n");
git_oid origin_master;
git_reference_name_to_id(&origin_master, repo, "refs/remotes/origin/master");
git_oid_nfmt(buf, 12, &origin_master);
printf("Start point was origin/master => %s\n", buf);
git_revwalk_free(walk);
git_repository_free(repo);
return 0;
}