1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
163 /* Cache for the hash code. */
164 bool hashcode_cached;
169 The memory region passed by the caller must of indefinite extent. It is
170 *not* copied here. */
171 static struct entry *
172 entry_create (char *string, size_t length)
174 struct entry *result = XMALLOC (struct entry);
175 result->string = string;
176 result->length = length;
177 result->hashcode_cached = false;
181 /* Compare two entries for equality. */
183 entry_equals (const void *elt1, const void *elt2)
185 const struct entry *entry1 = (const struct entry *) elt1;
186 const struct entry *entry2 = (const struct entry *) elt2;
187 return entry1->length == entry2->length
188 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
191 /* Return a hash code of the contents of a ChangeLog entry. */
193 entry_hashcode (const void *elt)
195 struct entry *entry = (struct entry *) elt;
196 if (!entry->hashcode_cached)
198 /* See http://www.haible.de/bruno/hashfunc.html. */
203 for (s = entry->string, n = entry->length; n > 0; s++, n--)
204 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
207 entry->hashcode_cached = true;
209 return entry->hashcode;
212 /* Perform a fuzzy comparison of two ChangeLog entries.
213 Return a similarity measure of the two entries, a value between 0 and 1.
214 0 stands for very distinct, 1 for identical.
215 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
218 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
221 /* fstrcmp works only on NUL terminated strings. */
225 if (memchr (entry1->string, '\0', entry1->length) != NULL)
227 if (memchr (entry2->string, '\0', entry2->length) != NULL)
229 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
232 memcpy (p, entry1->string, entry1->length);
235 memcpy (p, entry2->string, entry2->length);
240 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
245 /* This structure represents an entire ChangeLog file, after it was read
247 struct changelog_file
249 /* The entries, as a list. */
250 gl_list_t /* <struct entry *> */ entries_list;
251 /* The entries, as a list in opposite direction. */
252 gl_list_t /* <struct entry *> */ entries_reversed;
253 /* The entries, as an array. */
255 struct entry **entries;
258 /* Read a ChangeLog file into memory.
259 Return the contents in *RESULT. */
261 read_changelog_file (const char *filename, struct changelog_file *result)
263 /* Read the file in text mode, otherwise it's hard to recognize empty
266 char *contents = read_file (filename, &length);
267 if (contents == NULL)
269 fprintf (stderr, "could not read file '%s'\n", filename);
273 result->entries_list =
274 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
276 result->entries_reversed =
277 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
279 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
280 at a line following a blank line and that starts with a non-whitespace
281 character, or at the beginning of a file.
282 Split the file contents into entries. */
284 char *contents_end = contents + length;
285 char *start = contents;
286 while (start < contents_end)
288 /* Search the end of the current entry. */
292 while (ptr < contents_end)
294 ptr = memchr (ptr, '\n', contents_end - ptr);
301 if (contents_end - ptr >= 2
303 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
310 curr = entry_create (start, ptr - start);
311 gl_list_add_last (result->entries_list, curr);
312 gl_list_add_first (result->entries_reversed, curr);
318 result->num_entries = gl_list_size (result->entries_list);
319 result->entries = XNMALLOC (result->num_entries, struct entry *);
322 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
325 while (gl_list_iterator_next (&iter, &elt, &node))
326 result->entries[index++] = (struct entry *) elt;
327 gl_list_iterator_free (&iter);
328 ASSERT (index == result->num_entries);
332 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
333 Return a set of two arrays:
334 - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
335 from FILE1 is not found in FILE2).
336 - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
337 from FILE2 is not found in FILE1).
338 The correspondence also takes into account small modifications; i.e. the
339 indicated relation is not equality of entries but best-match similarity
342 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
345 /* Mapping from indices in file1 to indices in file2. */
346 ssize_t *index_mapping;
347 /* Mapping from indices in file2 to indices in file1. */
348 ssize_t *index_mapping_reverse;
349 size_t n1 = file1->num_entries;
350 size_t n2 = file2->num_entries;
353 index_mapping = XNMALLOC (n1, ssize_t);
354 for (i = 0; i < n1; i++)
355 index_mapping[i] = -1;
357 index_mapping_reverse = XNMALLOC (n2, ssize_t);
358 for (j = 0; j < n2; j++)
359 index_mapping_reverse[j] = -1;
361 for (i = n1 - 1; i >= 0; i--)
362 /* Take an entry from file1. */
363 if (index_mapping[i] < 0)
365 struct entry *entry = file1->entries[i];
366 /* Search whether it occurs in file2. */
367 j = gl_list_indexof (file2->entries_reversed, entry);
371 /* Found an exact correspondence. */
372 ASSERT (index_mapping_reverse[j] < 0);
373 index_mapping[i] = j;
374 index_mapping_reverse[j] = i;
375 /* Look for more occurrences of the same entry. */
386 gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
391 gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
395 curr_i = n1 - 1 - next_i;
396 curr_j = n2 - 1 - next_j;
397 ASSERT (index_mapping[curr_i] < 0);
398 ASSERT (index_mapping_reverse[curr_j] < 0);
399 index_mapping[curr_i] = curr_j;
400 index_mapping_reverse[curr_j] = curr_i;
406 for (i = n1 - 1; i >= 0; i--)
407 /* Take an entry from file1. */
408 if (index_mapping[i] < 0)
410 struct entry *entry_i = file1->entries[i];
411 /* Search whether it approximately occurs in file2. */
413 double best_j_similarity = 0.0;
414 for (j = n2 - 1; j >= 0; j--)
415 if (index_mapping_reverse[j] < 0)
418 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
419 if (similarity > best_j_similarity)
422 best_j_similarity = similarity;
425 if (best_j_similarity >= FSTRCMP_THRESHOLD)
427 /* Found a similar entry in file2. */
428 struct entry *entry_j = file2->entries[best_j];
429 /* Search whether it approximately occurs in file1 at index i. */
431 double best_i_similarity = 0.0;
433 for (ii = n1 - 1; ii >= 0; ii--)
434 if (index_mapping[ii] < 0)
437 entry_fstrcmp (file1->entries[ii], entry_j,
439 if (similarity > best_i_similarity)
442 best_i_similarity = similarity;
445 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
447 index_mapping[i] = best_j;
448 index_mapping_reverse[best_j] = i;
453 result[0] = index_mapping;
454 result[1] = index_mapping_reverse;
457 /* An "edit" is a textual modification performed by the user, that needs to
458 be applied to the other file. */
461 /* Some consecutive entries were added. */
463 /* Some consecutive entries were removed; some other consecutive entries
464 were added at the same position. (Not necessarily the same number of
467 /* Some consecutive entries were removed. */
471 /* This structure represents an edit. */
475 /* Range of indices into the entries of FILE1. */
476 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
477 /* Range of indices into the entries of FILE2. */
478 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
481 /* This structure represents the differences from one file, FILE1, to another
485 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
486 from FILE1 is not found in FILE2). */
487 ssize_t *index_mapping;
488 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
489 from FILE2 is not found in FILE1). */
490 ssize_t *index_mapping_reverse;
491 /* The edits that transform FILE1 into FILE2. */
496 /* Import the difference detection algorithm from GNU diff. */
497 #define ELEMENT struct entry *
498 #define EQUAL entry_equals
499 #define OFFSET ssize_t
500 #define EXTRA_CONTEXT_FIELDS \
501 ssize_t *index_mapping; \
502 ssize_t *index_mapping_reverse;
503 #define NOTE_DELETE(ctxt, xoff) \
504 ctxt->index_mapping[xoff] = -1
505 #define NOTE_INSERT(ctxt, yoff) \
506 ctxt->index_mapping_reverse[yoff] = -1
509 /* Compute the differences between the entries of FILE1 and the entries of
512 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
513 struct differences *result)
515 /* Unlike compute_mapping, which mostly ignores the order of the entries and
516 therefore works well when some entries are permuted, here we use the order.
517 I think this is needed in order to distinguish changes from
518 additions+removals; I don't know how to say what is a "change" if the
519 files are considered as unordered sets of entries. */
521 size_t n1 = file1->num_entries;
522 size_t n2 = file2->num_entries;
525 gl_list_t /* <struct edit *> */ edits;
527 ctxt.xvec = file1->entries;
528 ctxt.yvec = file2->entries;
529 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
530 for (i = 0; i < n1; i++)
531 ctxt.index_mapping[i] = 0;
532 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
533 for (j = 0; j < n2; j++)
534 ctxt.index_mapping_reverse[j] = 0;
535 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
536 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
537 ctxt.too_expensive = n1 + n2;
539 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
540 each removed or added entry. */
541 compareseq (0, n1, 0, n2, 0, &ctxt);
543 /* Complete the index_mapping and index_mapping_reverse arrays. */
546 while (i < n1 || j < n2)
548 while (i < n1 && ctxt.index_mapping[i] < 0)
550 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
552 ASSERT ((i < n1) == (j < n2));
553 if (i == n1 && j == n2)
555 ctxt.index_mapping[i] = j;
556 ctxt.index_mapping_reverse[j] = i;
561 /* Create the edits. */
562 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
565 while (i < n1 || j < n2)
571 e = XMALLOC (struct edit);
575 gl_list_add_last (edits, e);
582 e = XMALLOC (struct edit);
586 gl_list_add_last (edits, e);
589 if (ctxt.index_mapping[i] >= 0)
591 if (ctxt.index_mapping_reverse[j] >= 0)
593 ASSERT (ctxt.index_mapping[i] == j);
594 ASSERT (ctxt.index_mapping_reverse[j] == i);
601 ASSERT (ctxt.index_mapping_reverse[j] < 0);
602 e = XMALLOC (struct edit);
607 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
609 gl_list_add_last (edits, e);
614 if (ctxt.index_mapping_reverse[j] >= 0)
617 ASSERT (ctxt.index_mapping[i] < 0);
618 e = XMALLOC (struct edit);
623 while (i < n1 && ctxt.index_mapping[i] < 0);
625 gl_list_add_last (edits, e);
630 ASSERT (ctxt.index_mapping[i] < 0);
631 ASSERT (ctxt.index_mapping_reverse[j] < 0);
632 e = XMALLOC (struct edit);
637 while (i < n1 && ctxt.index_mapping[i] < 0);
642 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
644 gl_list_add_last (edits, e);
649 result->index_mapping = ctxt.index_mapping;
650 result->index_mapping_reverse = ctxt.index_mapping_reverse;
651 result->num_edits = gl_list_size (edits);
652 result->edits = XNMALLOC (result->num_edits, struct edit *);
655 gl_list_iterator_t iter = gl_list_iterator (edits);
658 while (gl_list_iterator_next (&iter, &elt, &node))
659 result->edits[index++] = (struct edit *) elt;
660 gl_list_iterator_free (&iter);
661 ASSERT (index == result->num_edits);
665 /* An empty entry. */
666 static struct entry empty_entry = { NULL, 0 };
668 /* Return the end a paragraph.
670 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
671 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
672 it is the start of a blank line or the end of the entry. */
674 find_paragraph_end (const struct entry *entry, size_t offset)
676 const char *string = entry->string;
677 size_t length = entry->length;
681 const char *nl = memchr (string + offset, '\n', length - offset);
684 offset = (nl - string) + 1;
685 if (offset < length && string[offset] == '\n')
690 /* Split a merged entry.
691 Given an old entry of the form
694 and a new entry of the form
698 where the two titles are the same and BODY and BODY' are very similar,
699 this computes two new entries
706 If the entries don't have this form, it returns false. */
708 try_split_merged_entry (const struct entry *old_entry,
709 const struct entry *new_entry,
710 struct entry *new_split[2])
712 size_t old_title_len = find_paragraph_end (old_entry, 0);
713 size_t new_title_len = find_paragraph_end (new_entry, 0);
714 struct entry old_body;
715 struct entry new_body;
716 size_t best_split_offset;
717 double best_similarity;
721 if (!(old_title_len == new_title_len
722 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
725 old_body.string = old_entry->string + old_title_len;
726 old_body.length = old_entry->length - old_title_len;
728 /* Determine where to split the new entry.
729 This is done by maximizing the similarity between BODY and BODY'. */
730 best_split_offset = split_offset = new_title_len;
731 best_similarity = 0.0;
736 new_body.string = new_entry->string + split_offset;
737 new_body.length = new_entry->length - split_offset;
739 entry_fstrcmp (&old_body, &new_body, best_similarity);
740 if (similarity > best_similarity)
742 best_split_offset = split_offset;
743 best_similarity = similarity;
745 if (best_similarity == 1.0)
746 /* It cannot get better. */
749 if (split_offset < new_entry->length)
750 split_offset = find_paragraph_end (new_entry, split_offset + 1);
755 /* BODY' should not be empty. */
756 if (best_split_offset == new_entry->length)
758 ASSERT (new_entry->string[best_split_offset] == '\n');
760 /* A certain similarity between BODY and BODY' is required. */
761 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
764 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
767 size_t len1 = new_title_len;
768 size_t len2 = new_entry->length - best_split_offset;
769 char *combined = XNMALLOC (len1 + len2, char);
770 memcpy (combined, new_entry->string, len1);
771 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
772 new_split[1] = entry_create (combined, len1 + len2);
778 /* Write the contents of an entry to the output stream FP. */
780 entry_write (FILE *fp, struct entry *entry)
782 if (entry->length > 0)
783 fwrite (entry->string, 1, entry->length, fp);
786 /* This structure represents a conflict.
787 A conflict can occur for various reasons. */
790 /* Parts from the ancestor file. */
791 size_t num_old_entries;
792 struct entry **old_entries;
793 /* Parts of the modified file. */
794 size_t num_modified_entries;
795 struct entry **modified_entries;
798 /* Write a conflict to the output stream FP, including markers. */
800 conflict_write (FILE *fp, struct conflict *c)
804 /* Use the same syntax as git's default merge driver.
805 Don't indent the contents of the entries (with things like ">" or "-"),
806 otherwise the user needs more textual editing to resolve the conflict. */
807 fputs ("<<<<<<<\n", fp);
808 for (i = 0; i < c->num_old_entries; i++)
809 entry_write (fp, c->old_entries[i]);
810 fputs ("=======\n", fp);
811 for (i = 0; i < c->num_modified_entries; i++)
812 entry_write (fp, c->modified_entries[i]);
813 fputs (">>>>>>>\n", fp);
817 static const struct option long_options[] =
819 { "help", no_argument, NULL, 'h' },
820 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
821 { "version", no_argument, NULL, 'V' },
825 /* Print a usage mesage and exit. */
829 if (status != EXIT_SUCCESS)
830 fprintf (stderr, "Try `%s --help' for more information.\n",
834 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
837 printf ("Merges independent modifications of a ChangeLog style file.\n");
838 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
839 printf ("A-FILE-NAME names the publicly modified file.\n");
840 printf ("B-FILE-NAME names the user-modified file.\n");
841 printf ("Writes the merged file into A-FILE-NAME.\n");
843 printf ("Operation modifiers:\n");
845 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
846 Use this if you have the habit to merge unrelated\n\
847 entries into a single one, separated only by a\n\
848 newline, just because they happened on the same\n\
851 printf ("Informative output:\n");
852 printf (" -h, --help display this help and exit\n");
853 printf (" -V, --version output version information and exit\n");
855 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
863 main (int argc, char *argv[])
868 bool split_merged_entry;
870 /* Set program name for messages. */
871 set_program_name (argv[0]);
873 /* Set default values for variables. */
876 split_merged_entry = false;
878 /* Parse command line options. */
879 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
882 case '\0': /* Long option. */
890 case CHAR_MAX + 1: /* --split-merged-entry */
891 split_merged_entry = true;
894 usage (EXIT_FAILURE);
899 /* Version information is requested. */
900 printf ("%s\n", program_name);
901 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
902 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
903 This is free software: you are free to change and redistribute it.\n\
904 There is NO WARRANTY, to the extent permitted by law.\n\
907 printf ("Written by %s.\n", "Bruno Haible");
913 /* Help is requested. */
914 usage (EXIT_SUCCESS);
917 /* Test argument count. */
918 if (optind + 3 != argc)
919 error (EXIT_FAILURE, 0, "expected three arguments");
922 const char *ancestor_file_name; /* O-FILE-NAME */
923 const char *destination_file_name; /* A-FILE-NAME */
925 const char *other_file_name; /* B-FILE-NAME */
926 const char *mainstream_file_name;
927 const char *modified_file_name;
928 struct changelog_file ancestor_file;
929 struct changelog_file mainstream_file;
930 struct changelog_file modified_file;
931 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
932 ssize_t *index_mapping;
933 /* Mapping from indices in mainstream_file to indices in ancestor_file. */
934 ssize_t *index_mapping_reverse;
935 struct differences diffs;
936 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
937 gl_list_t /* <struct entry *> */ result_entries;
938 gl_list_t /* <struct conflict *> */ result_conflicts;
940 ancestor_file_name = argv[optind];
941 destination_file_name = argv[optind + 1];
942 other_file_name = argv[optind + 2];
944 /* Heuristic to determine whether it's a pull in downstream direction
945 (e.g. pull from a centralized server) or a pull in upstream direction
946 (e.g. "git stash apply").
948 For ChangeLog this distinction is important. The difference between
949 an "upstream" and a "downstream" repository is that more people are
950 looking at the "upstream" repository. They want to be informed about
951 changes and expect them to be shown at the top of the ChangeLog.
952 When a user pulls downstream, on the other hand, he has two options:
953 a) He gets the change entries from the central repository also at the
954 top of his ChangeLog, and his own changes come after them.
955 b) He gets the change entries from the central repository after those
956 he has collected for his branch. His own change entries stay at
957 the top of the ChangeLog file.
958 In the case a) he has to reorder the ChangeLog before he can commit.
959 No one does that. So most people want b).
960 In other words, the order of entries in a ChangeLog should represent
961 the order in which they have flown (or will flow) into the *central*
964 But in git this is fundamentally indistinguishable, because when Linus
965 pulls patches from akpm and akpm pulls patches from Linus, it's not
966 clear which of the two is more "upstream". Also, when you have many
967 branches in a repository and pull from one to another, "git" has no way
968 to know which branch is more "upstream" than the other. The git-tag(1)
969 manual page also says:
970 "One important aspect of git is it is distributed, and being
971 distributed largely means there is no inherent "upstream" or
972 "downstream" in the system."
973 Therefore anyone who attempts to produce a ChangeLog from the merge
976 Here we allow the user to specify the pull direction through an
977 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
978 environment variables are not set, we assume a "simple single user"
979 usage pattern: He manages local changes through stashes and uses
980 "git pull" only to pull downstream.
982 How to distinguish these situation? There are several hints:
983 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
984 a "git pull", it is set to 'pull '. During a "git pull --rebase",
985 it is set to 'pull --rebase'. During a "git cherry-pick", it is
986 set to 'cherry-pick'.
987 - During a "git stash apply", there is an environment variable of
988 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
992 var = getenv ("GIT_DOWNSTREAM");
993 if (var != NULL && var[0] != '\0')
997 var = getenv ("GIT_UPSTREAM");
998 if (var != NULL && var[0] != '\0')
1002 var = getenv ("GIT_REFLOG_ACTION");
1003 #if 0 /* Debugging code */
1004 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1007 && ((strncmp (var, "pull", 4) == 0
1008 && c_strstr (var, " --rebase") == NULL)
1009 || strncmp (var, "merge origin", 12) == 0))
1013 /* "git stash apply", "git rebase", "git cherry-pick" and
1021 #if 0 /* Debugging code */
1024 printf ("First line of %%A:\n");
1025 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1026 printf ("First line of %%B:\n");
1027 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1028 printf ("Guessing calling convention: %s\n",
1030 ? "%A = modified by user, %B = upstream"
1031 : "%A = upstream, %B = modified by user");
1037 mainstream_file_name = other_file_name;
1038 modified_file_name = destination_file_name;
1042 mainstream_file_name = destination_file_name;
1043 modified_file_name = other_file_name;
1046 /* Read the three files into memory. */
1047 read_changelog_file (ancestor_file_name, &ancestor_file);
1048 read_changelog_file (mainstream_file_name, &mainstream_file);
1049 read_changelog_file (modified_file_name, &modified_file);
1051 /* Compute correspondence between the entries of ancestor_file and of
1055 compute_mapping (&ancestor_file, &mainstream_file, result);
1056 index_mapping = result[0];
1057 index_mapping_reverse = result[1];
1060 /* Compute differences between the entries of ancestor_file and of
1062 compute_differences (&ancestor_file, &modified_file, &diffs);
1064 /* Compute the result. */
1065 result_entries_pointers =
1066 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1068 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1072 for (k = 0; k < mainstream_file.num_entries; k++)
1073 result_entries_pointers[k] =
1074 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1077 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1080 for (e = 0; e < diffs.num_edits; e++)
1082 struct edit *edit = diffs.edits[e];
1088 /* An addition to the top of modified_file.
1089 Apply it to the top of mainstream_file. */
1091 for (j = edit->j2; j >= edit->j1; j--)
1093 struct entry *added_entry = modified_file.entries[j];
1094 gl_list_add_first (result_entries, added_entry);
1103 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1104 ASSERT (i_before >= 0);
1105 i_after = (edit->j2 + 1 == modified_file.num_entries
1106 ? ancestor_file.num_entries
1107 : diffs.index_mapping_reverse[edit->j2 + 1]);
1108 ASSERT (i_after >= 0);
1109 ASSERT (i_after == i_before + 1);
1110 /* An addition between ancestor_file.entries[i_before] and
1111 ancestor_file.entries[i_after]. See whether these two
1112 entries still exist in mainstream_file and are still
1114 k_before = index_mapping[i_before];
1115 k_after = (i_after == ancestor_file.num_entries
1116 ? mainstream_file.num_entries
1117 : index_mapping[i_after]);
1118 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1120 /* Yes, the entry before and after are still neighbours
1121 in mainstream_file. Apply the addition between
1123 if (k_after == mainstream_file.num_entries)
1126 for (j = edit->j1; j <= edit->j2; j++)
1128 struct entry *added_entry = modified_file.entries[j];
1129 gl_list_add_last (result_entries, added_entry);
1134 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1136 for (j = edit->j1; j <= edit->j2; j++)
1138 struct entry *added_entry = modified_file.entries[j];
1139 gl_list_add_before (result_entries, node_k_after, added_entry);
1145 /* It's not clear where the additions should be applied.
1146 Let the user decide. */
1147 struct conflict *c = XMALLOC (struct conflict);
1149 c->num_old_entries = 0;
1150 c->old_entries = NULL;
1151 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1152 c->modified_entries =
1153 XNMALLOC (c->num_modified_entries, struct entry *);
1154 for (j = edit->j1; j <= edit->j2; j++)
1155 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1156 gl_list_add_last (result_conflicts, c);
1162 /* Apply the removals one by one. */
1164 for (i = edit->i1; i <= edit->i2; i++)
1166 struct entry *removed_entry = ancestor_file.entries[i];
1167 ssize_t k = index_mapping[i];
1169 && entry_equals (removed_entry,
1170 mainstream_file.entries[k]))
1172 /* The entry to be removed still exists in
1173 mainstream_file. Remove it. */
1174 gl_list_node_set_value (result_entries,
1175 result_entries_pointers[k],
1180 /* The entry to be removed was already removed or was
1181 modified. This is a conflict. */
1182 struct conflict *c = XMALLOC (struct conflict);
1183 c->num_old_entries = 1;
1185 XNMALLOC (c->num_old_entries, struct entry *);
1186 c->old_entries[0] = removed_entry;
1187 c->num_modified_entries = 0;
1188 c->modified_entries = NULL;
1189 gl_list_add_last (result_conflicts, c);
1197 /* When the user usually merges entries from the same day,
1198 and this edit is at the top of the file: */
1199 if (split_merged_entry && edit->j1 == 0)
1201 /* Test whether the change is "simple merged", i.e. whether
1202 it consists of additions, followed by an augmentation of
1203 the first changed entry, followed by small changes of the
1216 modified_entry_n. */
1217 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1219 struct entry *split[2];
1220 bool simple_merged =
1221 try_split_merged_entry (ancestor_file.entries[edit->i1],
1222 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1227 for (i = edit->i1 + 1; i <= edit->i2; i++)
1228 if (entry_fstrcmp (ancestor_file.entries[i],
1229 modified_file.entries[i + edit->j2 - edit->i2],
1231 < FSTRCMP_THRESHOLD)
1233 simple_merged = false;
1239 /* Apply the additions at the top of modified_file.
1240 Apply each of the single-entry changes
1242 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1243 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1245 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1246 gl_list_add_first (result_entries, split[0]);
1247 /* The additions. */
1248 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1250 struct entry *added_entry = modified_file.entries[j];
1251 gl_list_add_first (result_entries, added_entry);
1253 /* Now the single-entry changes. */
1254 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1256 struct entry *changed_entry =
1257 (j == edit->j1 + num_added
1259 : modified_file.entries[j]);
1260 size_t i = j + edit->i2 - edit->j2;
1261 ssize_t k = index_mapping[i];
1263 && entry_equals (ancestor_file.entries[i],
1264 mainstream_file.entries[k]))
1266 gl_list_node_set_value (result_entries,
1267 result_entries_pointers[k],
1270 else if (!entry_equals (ancestor_file.entries[i],
1273 struct conflict *c = XMALLOC (struct conflict);
1274 c->num_old_entries = 1;
1276 XNMALLOC (c->num_old_entries, struct entry *);
1277 c->old_entries[0] = ancestor_file.entries[i];
1278 c->num_modified_entries = 1;
1279 c->modified_entries =
1280 XNMALLOC (c->num_modified_entries, struct entry *);
1281 c->modified_entries[0] = changed_entry;
1282 gl_list_add_last (result_conflicts, c);
1292 /* Test whether the change is "simple", i.e. whether it
1293 consists of small changes to the old ChangeLog entries
1294 and additions before them:
1304 modified_entry_n. */
1305 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1309 for (i = edit->i1; i <= edit->i2; i++)
1310 if (entry_fstrcmp (ancestor_file.entries[i],
1311 modified_file.entries[i + edit->j2 - edit->i2],
1313 < FSTRCMP_THRESHOLD)
1323 /* Apply the additions and each of the single-entry
1324 changes separately. */
1325 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1326 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1329 /* A simple change at the top of modified_file.
1330 Apply it to the top of mainstream_file. */
1332 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1334 struct entry *added_entry = modified_file.entries[j];
1335 gl_list_add_first (result_entries, added_entry);
1337 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1339 struct entry *changed_entry = modified_file.entries[j];
1340 size_t i = j + edit->i2 - edit->j2;
1341 ssize_t k = index_mapping[i];
1343 && entry_equals (ancestor_file.entries[i],
1344 mainstream_file.entries[k]))
1346 gl_list_node_set_value (result_entries,
1347 result_entries_pointers[k],
1353 ASSERT (!entry_equals (ancestor_file.entries[i],
1355 c = XMALLOC (struct conflict);
1356 c->num_old_entries = 1;
1358 XNMALLOC (c->num_old_entries, struct entry *);
1359 c->old_entries[0] = ancestor_file.entries[i];
1360 c->num_modified_entries = 1;
1361 c->modified_entries =
1362 XNMALLOC (c->num_modified_entries, struct entry *);
1363 c->modified_entries[0] = changed_entry;
1364 gl_list_add_last (result_conflicts, c);
1374 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1375 ASSERT (i_before >= 0);
1376 /* A simple change after ancestor_file.entries[i_before].
1377 See whether this entry and the following num_changed
1378 entries still exist in mainstream_file and are still
1380 k_before = index_mapping[i_before];
1381 linear = (k_before >= 0);
1385 for (i = i_before + 1; i <= i_before + num_changed; i++)
1386 if (index_mapping[i] != k_before + (i - i_before))
1394 gl_list_node_t node_for_insert =
1395 result_entries_pointers[k_before + 1];
1397 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1399 struct entry *added_entry = modified_file.entries[j];
1400 gl_list_add_before (result_entries, node_for_insert, added_entry);
1402 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1404 struct entry *changed_entry = modified_file.entries[j];
1405 size_t i = j + edit->i2 - edit->j2;
1406 ssize_t k = index_mapping[i];
1408 if (entry_equals (ancestor_file.entries[i],
1409 mainstream_file.entries[k]))
1411 gl_list_node_set_value (result_entries,
1412 result_entries_pointers[k],
1418 ASSERT (!entry_equals (ancestor_file.entries[i],
1420 c = XMALLOC (struct conflict);
1421 c->num_old_entries = 1;
1423 XNMALLOC (c->num_old_entries, struct entry *);
1424 c->old_entries[0] = ancestor_file.entries[i];
1425 c->num_modified_entries = 1;
1426 c->modified_entries =
1427 XNMALLOC (c->num_modified_entries, struct entry *);
1428 c->modified_entries[0] = changed_entry;
1429 gl_list_add_last (result_conflicts, c);
1439 See whether the num_changed entries still exist
1440 unchanged in mainstream_file and are still
1444 bool linear_unchanged;
1446 k_first = index_mapping[i_first];
1449 && entry_equals (ancestor_file.entries[i_first],
1450 mainstream_file.entries[k_first]));
1451 if (linear_unchanged)
1454 for (i = i_first + 1; i <= edit->i2; i++)
1455 if (!(index_mapping[i] == k_first + (i - i_first)
1456 && entry_equals (ancestor_file.entries[i],
1457 mainstream_file.entries[index_mapping[i]])))
1459 linear_unchanged = false;
1463 if (linear_unchanged)
1465 gl_list_node_t node_for_insert =
1466 result_entries_pointers[k_first];
1469 for (j = edit->j2; j >= edit->j1; j--)
1471 struct entry *new_entry = modified_file.entries[j];
1472 gl_list_add_before (result_entries, node_for_insert, new_entry);
1474 for (i = edit->i1; i <= edit->i2; i++)
1476 ssize_t k = index_mapping[i];
1478 ASSERT (entry_equals (ancestor_file.entries[i],
1479 mainstream_file.entries[k]));
1480 gl_list_node_set_value (result_entries,
1481 result_entries_pointers[k],
1490 struct conflict *c = XMALLOC (struct conflict);
1492 c->num_old_entries = edit->i2 - edit->i1 + 1;
1494 XNMALLOC (c->num_old_entries, struct entry *);
1495 for (i = edit->i1; i <= edit->i2; i++)
1496 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1497 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1498 c->modified_entries =
1499 XNMALLOC (c->num_modified_entries, struct entry *);
1500 for (j = edit->j1; j <= edit->j2; j++)
1501 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1502 gl_list_add_last (result_conflicts, c);
1510 /* Output the result. */
1512 FILE *fp = fopen (destination_file_name, "w");
1515 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1516 exit (EXIT_FAILURE);
1519 /* Output the conflicts at the top. */
1521 size_t n = gl_list_size (result_conflicts);
1523 for (i = 0; i < n; i++)
1524 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1526 /* Output the modified and unmodified entries, in order. */
1528 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1530 gl_list_node_t node;
1531 while (gl_list_iterator_next (&iter, &elt, &node))
1532 entry_write (fp, (struct entry *) elt);
1533 gl_list_iterator_free (&iter);
1536 if (fwriteerror (fp))
1538 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1539 exit (EXIT_FAILURE);
1543 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);