1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 effects to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
163 /* Cache for the hash code. */
164 bool hashcode_cached;
169 The memory region passed by the caller must of indefinite extent. It is
170 *not* copied here. */
171 static struct entry *
172 entry_create (char *string, size_t length)
174 struct entry *result = XMALLOC (struct entry);
175 result->string = string;
176 result->length = length;
177 result->hashcode_cached = false;
181 /* Compare two entries for equality. */
183 entry_equals (const void *elt1, const void *elt2)
185 const struct entry *entry1 = (const struct entry *) elt1;
186 const struct entry *entry2 = (const struct entry *) elt2;
187 return entry1->length == entry2->length
188 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
191 /* Return a hash code of the contents of a ChangeLog entry. */
193 entry_hashcode (const void *elt)
195 struct entry *entry = (struct entry *) elt;
196 if (!entry->hashcode_cached)
198 /* See http://www.haible.de/bruno/hashfunc.html. */
203 for (s = entry->string, n = entry->length; n > 0; s++, n--)
204 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
207 entry->hashcode_cached = true;
209 return entry->hashcode;
212 /* Perform a fuzzy comparison of two ChangeLog entries.
213 Return a similarity measure of the two entries, a value between 0 and 1.
214 0 stands for very distinct, 1 for identical. */
216 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
218 /* fstrcmp works only on NUL terminated strings. */
222 if (memchr (entry1->string, '\0', entry1->length) != NULL)
224 if (memchr (entry2->string, '\0', entry2->length) != NULL)
226 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
229 memcpy (p, entry1->string, entry1->length);
232 memcpy (p, entry2->string, entry2->length);
236 similarity = fstrcmp (memory, memory + entry1->length + 1);
241 /* This structure represents an entire ChangeLog file, after it was read
243 struct changelog_file
245 /* The entries, as a list. */
246 gl_list_t /* <struct entry *> */ entries_list;
247 /* The entries, as a list in opposite direction. */
248 gl_list_t /* <struct entry *> */ entries_reversed;
249 /* The entries, as an array. */
251 struct entry **entries;
254 /* Read a ChangeLog file into memory.
255 Return the contents in *RESULT. */
257 read_changelog_file (const char *filename, struct changelog_file *result)
259 /* Read the file in text mode, otherwise it's hard to recognize empty
262 char *contents = read_file (filename, &length);
263 if (contents == NULL)
265 fprintf (stderr, "could not read file '%s'\n", filename);
269 result->entries_list =
270 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
272 result->entries_reversed =
273 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
275 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
276 at a line following a blank line and that starts with a non-whitespace
277 character, or at the beginning of a file.
278 Split the file contents into entries. */
280 char *contents_end = contents + length;
281 char *start = contents;
282 while (start < contents_end)
284 /* Search the end of the current entry. */
288 while (ptr < contents_end)
290 ptr = memchr (ptr, '\n', contents_end - ptr);
297 if (contents_end - ptr >= 2
299 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
306 curr = entry_create (start, ptr - start);
307 gl_list_add_last (result->entries_list, curr);
308 gl_list_add_first (result->entries_reversed, curr);
314 result->num_entries = gl_list_size (result->entries_list);
315 result->entries = XNMALLOC (result->num_entries, struct entry *);
318 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
321 while (gl_list_iterator_next (&iter, &elt, &node))
322 result->entries[index++] = (struct entry *) elt;
323 gl_list_iterator_free (&iter);
324 ASSERT (index == result->num_entries);
328 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
329 Return a set of two arrays:
330 - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
331 from FILE1 is not found in FILE2).
332 - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
333 from FILE2 is not found in FILE1).
334 The correspondence also takes into account small modifications; i.e. the
335 indicated relation is not equality of entries but best-match similarity
338 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
341 /* Mapping from indices in file1 to indices in file2. */
342 ssize_t *index_mapping;
343 /* Mapping from indices in file2 to indices in file1. */
344 ssize_t *index_mapping_reverse;
345 size_t n1 = file1->num_entries;
346 size_t n2 = file2->num_entries;
349 index_mapping = XNMALLOC (n1, ssize_t);
350 for (i = 0; i < n1; i++)
351 index_mapping[i] = -1;
353 index_mapping_reverse = XNMALLOC (n2, ssize_t);
354 for (j = 0; j < n2; j++)
355 index_mapping_reverse[j] = -1;
357 for (i = n1 - 1; i >= 0; i--)
358 /* Take an entry from file1. */
359 if (index_mapping[i] < 0)
361 struct entry *entry = file1->entries[i];
362 /* Search whether it occurs in file2. */
363 j = gl_list_indexof (file2->entries_reversed, entry);
367 /* Found an exact correspondence. */
368 ASSERT (index_mapping_reverse[j] < 0);
369 index_mapping[i] = j;
370 index_mapping_reverse[j] = i;
371 /* Look for more occurrences of the same entry. */
382 gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
387 gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
391 curr_i = n1 - 1 - next_i;
392 curr_j = n2 - 1 - next_j;
393 ASSERT (index_mapping[curr_i] < 0);
394 ASSERT (index_mapping_reverse[curr_j] < 0);
395 index_mapping[curr_i] = curr_j;
396 index_mapping_reverse[curr_j] = curr_i;
402 for (i = n1 - 1; i >= 0; i--)
403 /* Take an entry from file1. */
404 if (index_mapping[i] < 0)
406 struct entry *entry_i = file1->entries[i];
407 /* Search whether it approximately occurs in file2. */
409 double best_j_similarity = 0.0;
410 for (j = n2 - 1; j >= 0; j--)
411 if (index_mapping_reverse[j] < 0)
413 double similarity = entry_fstrcmp (entry_i, file2->entries[j]);
414 if (similarity > best_j_similarity)
417 best_j_similarity = similarity;
420 if (best_j_similarity >= FSTRCMP_THRESHOLD)
422 /* Found a similar entry in file2. */
423 struct entry *entry_j = file2->entries[best_j];
424 /* Search whether it approximately occurs in file1 at index i. */
426 double best_i_similarity = 0.0;
428 for (ii = n1 - 1; ii >= 0; ii--)
429 if (index_mapping[ii] < 0)
432 entry_fstrcmp (file1->entries[ii], entry_j);
433 if (similarity > best_i_similarity)
436 best_i_similarity = similarity;
439 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
441 index_mapping[i] = best_j;
442 index_mapping_reverse[best_j] = i;
447 result[0] = index_mapping;
448 result[1] = index_mapping_reverse;
451 /* An "edit" is a textual modification performed by the user, that needs to
452 be applied to the other file. */
455 /* Some consecutive entries were added. */
457 /* Some consecutive entries were removed; some other consecutive entries
458 were added at the same position. (Not necessarily the same number of
461 /* Some consecutive entries were removed. */
465 /* This structure represents an edit. */
469 /* Range of indices into the entries of FILE1. */
470 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
471 /* Range of indices into the entries of FILE2. */
472 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
475 /* This structure represents the differences from one file, FILE1, to another
479 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
480 from FILE1 is not found in FILE2). */
481 ssize_t *index_mapping;
482 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
483 from FILE2 is not found in FILE1). */
484 ssize_t *index_mapping_reverse;
485 /* The edits that transform FILE1 into FILE2. */
490 /* Import the difference detection algorithm from GNU diff. */
491 #define ELEMENT struct entry *
492 #define EQUAL entry_equals
493 #define OFFSET ssize_t
494 #define EXTRA_CONTEXT_FIELDS \
495 ssize_t *index_mapping; \
496 ssize_t *index_mapping_reverse;
497 #define NOTE_DELETE(ctxt, xoff) \
498 ctxt->index_mapping[xoff] = -1
499 #define NOTE_INSERT(ctxt, yoff) \
500 ctxt->index_mapping_reverse[yoff] = -1
503 /* Compute the differences between the entries of FILE1 and the entries of
506 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
507 struct differences *result)
509 /* Unlike compute_mapping, which mostly ignores the order of the entries and
510 therefore works well when some entries are permuted, here we use the order.
511 I think this is needed in order to distinguish changes from
512 additions+removals; I don't know how to say what is a "change" if the
513 files are considered as unordered sets of entries. */
515 size_t n1 = file1->num_entries;
516 size_t n2 = file2->num_entries;
519 gl_list_t /* <struct edit *> */ edits;
521 ctxt.xvec = file1->entries;
522 ctxt.yvec = file2->entries;
523 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
524 for (i = 0; i < n1; i++)
525 ctxt.index_mapping[i] = 0;
526 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
527 for (j = 0; j < n2; j++)
528 ctxt.index_mapping_reverse[j] = 0;
529 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
530 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
531 ctxt.too_expensive = n1 + n2;
533 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
534 each removed or added entry. */
535 compareseq (0, n1, 0, n2, 0, &ctxt);
537 /* Complete the index_mapping and index_mapping_reverse arrays. */
540 while (i < n1 || j < n2)
542 while (i < n1 && ctxt.index_mapping[i] < 0)
544 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
546 ASSERT ((i < n1) == (j < n2));
547 if (i == n1 && j == n2)
549 ctxt.index_mapping[i] = j;
550 ctxt.index_mapping_reverse[j] = i;
555 /* Create the edits. */
556 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
559 while (i < n1 || j < n2)
565 e = XMALLOC (struct edit);
569 gl_list_add_last (edits, e);
576 e = XMALLOC (struct edit);
580 gl_list_add_last (edits, e);
583 if (ctxt.index_mapping[i] >= 0)
585 if (ctxt.index_mapping_reverse[j] >= 0)
587 ASSERT (ctxt.index_mapping[i] == j);
588 ASSERT (ctxt.index_mapping_reverse[j] == i);
595 ASSERT (ctxt.index_mapping_reverse[j] < 0);
596 e = XMALLOC (struct edit);
601 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
603 gl_list_add_last (edits, e);
608 if (ctxt.index_mapping_reverse[j] >= 0)
611 ASSERT (ctxt.index_mapping[i] < 0);
612 e = XMALLOC (struct edit);
617 while (i < n1 && ctxt.index_mapping[i] < 0);
619 gl_list_add_last (edits, e);
624 ASSERT (ctxt.index_mapping[i] < 0);
625 ASSERT (ctxt.index_mapping_reverse[j] < 0);
626 e = XMALLOC (struct edit);
631 while (i < n1 && ctxt.index_mapping[i] < 0);
636 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
638 gl_list_add_last (edits, e);
643 result->index_mapping = ctxt.index_mapping;
644 result->index_mapping_reverse = ctxt.index_mapping_reverse;
645 result->num_edits = gl_list_size (edits);
646 result->edits = XNMALLOC (result->num_edits, struct edit *);
649 gl_list_iterator_t iter = gl_list_iterator (edits);
652 while (gl_list_iterator_next (&iter, &elt, &node))
653 result->edits[index++] = (struct edit *) elt;
654 gl_list_iterator_free (&iter);
655 ASSERT (index == result->num_edits);
659 /* An empty entry. */
660 static struct entry empty_entry = { NULL, 0 };
662 /* Return the end a paragraph.
664 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
665 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
666 it is the start of a blank line or the end of the entry. */
668 find_paragraph_end (const struct entry *entry, size_t offset)
670 const char *string = entry->string;
671 size_t length = entry->length;
675 const char *nl = memchr (string + offset, '\n', length - offset);
678 offset = (nl - string) + 1;
679 if (offset < length && string[offset] == '\n')
684 /* Split a merged entry.
685 Given an old entry of the form
688 and a new entry of the form
692 where the two titles are the same and BODY and BODY' are very similar,
693 this computes two new entries
700 If the entries don't have this form, it returns false. */
702 try_split_merged_entry (const struct entry *old_entry,
703 const struct entry *new_entry,
704 struct entry *new_split[2])
706 size_t old_title_len = find_paragraph_end (old_entry, 0);
707 size_t new_title_len = find_paragraph_end (new_entry, 0);
708 struct entry old_body;
709 struct entry new_body;
710 size_t best_split_offset;
711 double best_similarity;
715 if (!(old_title_len == new_title_len
716 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
719 old_body.string = old_entry->string + old_title_len;
720 old_body.length = old_entry->length - old_title_len;
722 /* Determine where to split the new entry.
723 This is done by maximizing the similarity between BODY and BODY'. */
724 best_split_offset = split_offset = new_title_len;
725 best_similarity = 0.0;
730 new_body.string = new_entry->string + split_offset;
731 new_body.length = new_entry->length - split_offset;
732 similarity = entry_fstrcmp (&old_body, &new_body);
733 if (similarity > best_similarity)
735 best_split_offset = split_offset;
736 best_similarity = similarity;
738 if (best_similarity == 1.0)
739 /* It cannot get better. */
742 if (split_offset < new_entry->length)
743 split_offset = find_paragraph_end (new_entry, split_offset + 1);
748 /* BODY' should not be empty. */
749 if (best_split_offset == new_entry->length)
751 ASSERT (new_entry->string[best_split_offset] == '\n');
753 /* A certain similarity between BODY and BODY' is required. */
754 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
757 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
760 size_t len1 = new_title_len;
761 size_t len2 = new_entry->length - best_split_offset;
762 char *combined = XNMALLOC (len1 + len2, char);
763 memcpy (combined, new_entry->string, len1);
764 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
765 new_split[1] = entry_create (combined, len1 + len2);
771 /* Write the contents of an entry to the output stream FP. */
773 entry_write (FILE *fp, struct entry *entry)
775 if (entry->length > 0)
776 fwrite (entry->string, 1, entry->length, fp);
779 /* This structure represents a conflict.
780 A conflict can occur for various reasons. */
783 /* Parts from the ancestor file. */
784 size_t num_old_entries;
785 struct entry **old_entries;
786 /* Parts of the modified file. */
787 size_t num_modified_entries;
788 struct entry **modified_entries;
791 /* Write a conflict to the output stream FP, including markers. */
793 conflict_write (FILE *fp, struct conflict *c)
797 /* Use the same syntax as git's default merge driver.
798 Don't indent the contents of the entries (with things like ">" or "-"),
799 otherwise the user needs more textual editing to resolve the conflict. */
800 fputs ("<<<<<<<\n", fp);
801 for (i = 0; i < c->num_old_entries; i++)
802 entry_write (fp, c->old_entries[i]);
803 fputs ("=======\n", fp);
804 for (i = 0; i < c->num_modified_entries; i++)
805 entry_write (fp, c->modified_entries[i]);
806 fputs (">>>>>>>\n", fp);
810 static const struct option long_options[] =
812 { "help", no_argument, NULL, 'h' },
813 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
814 { "version", no_argument, NULL, 'V' },
818 /* Print a usage mesage and exit. */
822 if (status != EXIT_SUCCESS)
823 fprintf (stderr, "Try `%s --help' for more information.\n",
827 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
830 printf ("Merges independent modifications of a ChangeLog style file.\n");
831 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
832 printf ("A-FILE-NAME names the publicly modified file.\n");
833 printf ("B-FILE-NAME names the user-modified file.\n");
834 printf ("Writes the merged file into A-FILE-NAME.\n");
836 printf ("Operation modifiers:\n");
838 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
839 Use this if you have the habit to merge unrelated\n\
840 entries into a single one, separated only by a\n\
841 newline, just because they happened on the same\n\
844 printf ("Informative output:\n");
845 printf (" -h, --help display this help and exit\n");
846 printf (" -V, --version output version information and exit\n");
848 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
856 main (int argc, char *argv[])
861 bool split_merged_entry;
863 /* Set program name for messages. */
864 set_program_name (argv[0]);
866 /* Set default values for variables. */
869 split_merged_entry = false;
871 /* Parse command line options. */
872 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
875 case '\0': /* Long option. */
883 case CHAR_MAX + 1: /* --split-merged-entry */
884 split_merged_entry = true;
887 usage (EXIT_FAILURE);
892 /* Version information is requested. */
893 printf ("%s\n", program_name);
894 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
895 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
896 This is free software: you are free to change and redistribute it.\n\
897 There is NO WARRANTY, to the extent permitted by law.\n\
900 printf ("Written by %s.\n", "Bruno Haible");
906 /* Help is requested. */
907 usage (EXIT_SUCCESS);
910 /* Test argument count. */
911 if (optind + 3 != argc)
912 error (EXIT_FAILURE, 0, "expected three arguments");
915 const char *ancestor_file_name; /* O-FILE-NAME */
916 const char *destination_file_name; /* A-FILE-NAME */
918 const char *other_file_name; /* B-FILE-NAME */
919 const char *mainstream_file_name;
920 const char *modified_file_name;
921 struct changelog_file ancestor_file;
922 struct changelog_file mainstream_file;
923 struct changelog_file modified_file;
924 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
925 ssize_t *index_mapping;
926 /* Mapping from indices in mainstream_file to indices in ancestor_file. */
927 ssize_t *index_mapping_reverse;
928 struct differences diffs;
929 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
930 gl_list_t /* <struct entry *> */ result_entries;
931 gl_list_t /* <struct conflict *> */ result_conflicts;
933 ancestor_file_name = argv[optind];
934 destination_file_name = argv[optind + 1];
935 other_file_name = argv[optind + 2];
937 /* Heuristic to determine whether it's a pull in downstream direction
938 (e.g. pull from a centralized server) or a pull in upstream direction
939 (e.g. "git stash apply").
941 For ChangeLog this distinction is important. The difference between
942 an "upstream" and a "downstream" repository is that more people are
943 looking at the "upstream" repository. They want to be informed about
944 changes and expect them to be shown at the top of the ChangeLog.
945 When a user pulls downstream, on the other hand, he has two options:
946 a) He gets the change entries from the central repository also at the
947 top of his ChangeLog, and his own changes come after them.
948 b) He gets the change entries from the central repository after those
949 he has collected for his branch. His own change entries stay at
950 the top of the ChangeLog file.
951 In the case a) he has to reorder the ChangeLog before he can commit.
952 No one does that. So most people want b).
953 In other words, the order of entries in a ChangeLog should represent
954 the order in which they have flown (or will flow) into the *central*
957 But in git this is fundamentally indistinguishable, because when Linus
958 pulls patches from akpm and akpm pulls patches from Linus, it's not
959 clear which of the two is more "upstream". Also, when you have many
960 branches in a repository and pull from one to another, "git" has no way
961 to know which branch is more "upstream" than the other. The git-tag(1)
962 manual page also says:
963 "One important aspect of git is it is distributed, and being
964 distributed largely means there is no inherent "upstream" or
965 "downstream" in the system."
966 Therefore anyone who attempts to produce a ChangeLog from the merge
969 Here we allow the user to specify the pull direction through an
970 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
971 environment variables are not set, we assume a "simple single user"
972 usage pattern: He manages local changes through stashes and uses
973 "git pull" only to pull downstream.
975 How to distinguish these situation? There are several hints:
976 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
977 a "git pull", it is set to 'pull '. During a "git pull --rebase",
978 it is set to 'pull --rebase'.
979 - During a "git stash apply", there is an environment variable of
980 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
984 var = getenv ("GIT_DOWNSTREAM");
985 if (var != NULL && var[0] != '\0')
989 var = getenv ("GIT_UPSTREAM");
990 if (var != NULL && var[0] != '\0')
994 var = getenv ("GIT_REFLOG_ACTION");
995 #if 0 /* Debugging code */
996 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
999 && ((strncmp (var, "pull", 4) == 0
1000 && c_strstr (var, " --rebase") == NULL)
1001 || strncmp (var, "merge origin", 12) == 0))
1005 /* "git stash apply", "git rebase" and similar. */
1012 #if 0 /* Debugging code */
1015 printf ("First line of %%A:\n");
1016 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1017 printf ("First line of %%B:\n");
1018 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1019 printf ("Guessing calling convention: %s\n",
1021 ? "%A = modified by user, %B = upstream"
1022 : "%A = upstream, %B = modified by user");
1028 mainstream_file_name = other_file_name;
1029 modified_file_name = destination_file_name;
1033 mainstream_file_name = destination_file_name;
1034 modified_file_name = other_file_name;
1037 /* Read the three files into memory. */
1038 read_changelog_file (ancestor_file_name, &ancestor_file);
1039 read_changelog_file (mainstream_file_name, &mainstream_file);
1040 read_changelog_file (modified_file_name, &modified_file);
1042 /* Compute correspondence between the entries of ancestor_file and of
1046 compute_mapping (&ancestor_file, &mainstream_file, result);
1047 index_mapping = result[0];
1048 index_mapping_reverse = result[1];
1051 /* Compute differences between the entries of ancestor_file and of
1053 compute_differences (&ancestor_file, &modified_file, &diffs);
1055 /* Compute the result. */
1056 result_entries_pointers =
1057 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1059 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1063 for (k = 0; k < mainstream_file.num_entries; k++)
1064 result_entries_pointers[k] =
1065 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1068 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1071 for (e = 0; e < diffs.num_edits; e++)
1073 struct edit *edit = diffs.edits[e];
1079 /* An addition to the top of modified_file.
1080 Apply it to the top of mainstream_file. */
1082 for (j = edit->j2; j >= edit->j1; j--)
1084 struct entry *added_entry = modified_file.entries[j];
1085 gl_list_add_first (result_entries, added_entry);
1094 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1095 ASSERT (i_before >= 0);
1096 i_after = (edit->j2 + 1 == modified_file.num_entries
1097 ? ancestor_file.num_entries
1098 : diffs.index_mapping_reverse[edit->j2 + 1]);
1099 ASSERT (i_after >= 0);
1100 ASSERT (i_after == i_before + 1);
1101 /* An addition between ancestor_file.entries[i_before] and
1102 ancestor_file.entries[i_after]. See whether these two
1103 entries still exist in mainstream_file and are still
1105 k_before = index_mapping[i_before];
1106 k_after = (i_after == ancestor_file.num_entries
1107 ? mainstream_file.num_entries
1108 : index_mapping[i_after]);
1109 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1111 /* Yes, the entry before and after are still neighbours
1112 in mainstream_file. Apply the addition between
1114 if (k_after == mainstream_file.num_entries)
1117 for (j = edit->j1; j <= edit->j2; j++)
1119 struct entry *added_entry = modified_file.entries[j];
1120 gl_list_add_last (result_entries, added_entry);
1125 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1127 for (j = edit->j1; j <= edit->j2; j++)
1129 struct entry *added_entry = modified_file.entries[j];
1130 gl_list_add_before (result_entries, node_k_after, added_entry);
1136 /* It's not clear where the additions should be applied.
1137 Let the user decide. */
1138 struct conflict *c = XMALLOC (struct conflict);
1140 c->num_old_entries = 0;
1141 c->old_entries = NULL;
1142 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1143 c->modified_entries =
1144 XNMALLOC (c->num_modified_entries, struct entry *);
1145 for (j = edit->j1; j <= edit->j2; j++)
1146 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1147 gl_list_add_last (result_conflicts, c);
1153 /* Apply the removals one by one. */
1155 for (i = edit->i1; i <= edit->i2; i++)
1157 struct entry *removed_entry = ancestor_file.entries[i];
1158 ssize_t k = index_mapping[i];
1160 && entry_equals (removed_entry,
1161 mainstream_file.entries[k]))
1163 /* The entry to be removed still exists in
1164 mainstream_file. Remove it. */
1165 gl_list_node_set_value (result_entries,
1166 result_entries_pointers[k],
1171 /* The entry to be removed was already removed or was
1172 modified. This is a conflict. */
1173 struct conflict *c = XMALLOC (struct conflict);
1174 c->num_old_entries = 1;
1176 XNMALLOC (c->num_old_entries, struct entry *);
1177 c->old_entries[0] = removed_entry;
1178 c->num_modified_entries = 0;
1179 c->modified_entries = NULL;
1180 gl_list_add_last (result_conflicts, c);
1188 /* When the user usually merges entries from the same day,
1189 and this edit is at the top of the file: */
1190 if (split_merged_entry && edit->j1 == 0)
1192 /* Test whether the change is "simple merged", i.e. whether
1193 it consists of additions, followed by an augmentation of
1194 the first changed entry, followed by small changes of the
1207 modified_entry_n. */
1208 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1210 struct entry *split[2];
1211 bool simple_merged =
1212 try_split_merged_entry (ancestor_file.entries[edit->i1],
1213 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1218 for (i = edit->i1 + 1; i <= edit->i2; i++)
1219 if (entry_fstrcmp (ancestor_file.entries[i],
1220 modified_file.entries[i + edit->j2 - edit->i2])
1221 < FSTRCMP_THRESHOLD)
1223 simple_merged = false;
1229 /* Apply the additions at the top of modified_file.
1230 Apply each of the single-entry changes
1232 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1233 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1235 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1236 gl_list_add_first (result_entries, split[0]);
1237 /* The additions. */
1238 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1240 struct entry *added_entry = modified_file.entries[j];
1241 gl_list_add_first (result_entries, added_entry);
1243 /* Now the single-entry changes. */
1244 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1246 struct entry *changed_entry =
1247 (j == edit->j1 + num_added
1249 : modified_file.entries[j]);
1250 size_t i = j + edit->i2 - edit->j2;
1251 ssize_t k = index_mapping[i];
1253 && entry_equals (ancestor_file.entries[i],
1254 mainstream_file.entries[k]))
1256 gl_list_node_set_value (result_entries,
1257 result_entries_pointers[k],
1262 struct conflict *c = XMALLOC (struct conflict);
1263 c->num_old_entries = 1;
1265 XNMALLOC (c->num_old_entries, struct entry *);
1266 c->old_entries[0] = ancestor_file.entries[i];
1267 c->num_modified_entries = 1;
1268 c->modified_entries =
1269 XNMALLOC (c->num_modified_entries, struct entry *);
1270 c->modified_entries[0] = changed_entry;
1271 gl_list_add_last (result_conflicts, c);
1281 /* Test whether the change is "simple", i.e. whether it
1282 consists of small changes to the old ChangeLog entries
1283 and additions before them:
1293 modified_entry_n. */
1294 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1298 for (i = edit->i1; i <= edit->i2; i++)
1299 if (entry_fstrcmp (ancestor_file.entries[i],
1300 modified_file.entries[i + edit->j2 - edit->i2])
1301 < FSTRCMP_THRESHOLD)
1311 /* Apply the additions and each of the single-entry
1312 changes separately. */
1313 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1314 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1317 /* A simple change at the top of modified_file.
1318 Apply it to the top of mainstream_file. */
1320 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1322 struct entry *added_entry = modified_file.entries[j];
1323 gl_list_add_first (result_entries, added_entry);
1325 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1327 struct entry *changed_entry = modified_file.entries[j];
1328 size_t i = j + edit->i2 - edit->j2;
1329 ssize_t k = index_mapping[i];
1331 && entry_equals (ancestor_file.entries[i],
1332 mainstream_file.entries[k]))
1334 gl_list_node_set_value (result_entries,
1335 result_entries_pointers[k],
1340 struct conflict *c = XMALLOC (struct conflict);
1341 c->num_old_entries = 1;
1343 XNMALLOC (c->num_old_entries, struct entry *);
1344 c->old_entries[0] = ancestor_file.entries[i];
1345 c->num_modified_entries = 1;
1346 c->modified_entries =
1347 XNMALLOC (c->num_modified_entries, struct entry *);
1348 c->modified_entries[0] = changed_entry;
1349 gl_list_add_last (result_conflicts, c);
1359 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1360 ASSERT (i_before >= 0);
1361 /* A simple change after ancestor_file.entries[i_before].
1362 See whether this entry and the following num_changed
1363 entries still exist in mainstream_file and are still
1365 k_before = index_mapping[i_before];
1366 linear = (k_before >= 0);
1370 for (i = i_before + 1; i <= i_before + num_changed; i++)
1371 if (index_mapping[i] != k_before + (i - i_before))
1379 gl_list_node_t node_for_insert =
1380 result_entries_pointers[k_before + 1];
1382 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1384 struct entry *added_entry = modified_file.entries[j];
1385 gl_list_add_before (result_entries, node_for_insert, added_entry);
1387 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1389 struct entry *changed_entry = modified_file.entries[j];
1390 size_t i = j + edit->i2 - edit->j2;
1391 ssize_t k = index_mapping[i];
1393 if (entry_equals (ancestor_file.entries[i],
1394 mainstream_file.entries[k]))
1396 gl_list_node_set_value (result_entries,
1397 result_entries_pointers[k],
1402 struct conflict *c = XMALLOC (struct conflict);
1403 c->num_old_entries = 1;
1405 XNMALLOC (c->num_old_entries, struct entry *);
1406 c->old_entries[0] = ancestor_file.entries[i];
1407 c->num_modified_entries = 1;
1408 c->modified_entries =
1409 XNMALLOC (c->num_modified_entries, struct entry *);
1410 c->modified_entries[0] = changed_entry;
1411 gl_list_add_last (result_conflicts, c);
1421 See whether the num_changed entries still exist
1422 unchanged in mainstream_file and are still
1426 bool linear_unchanged;
1428 k_first = index_mapping[i_first];
1431 && entry_equals (ancestor_file.entries[i_first],
1432 mainstream_file.entries[k_first]));
1433 if (linear_unchanged)
1436 for (i = i_first + 1; i <= edit->i2; i++)
1437 if (!(index_mapping[i] == k_first + (i - i_first)
1438 && entry_equals (ancestor_file.entries[i],
1439 mainstream_file.entries[index_mapping[i]])))
1441 linear_unchanged = false;
1445 if (linear_unchanged)
1447 gl_list_node_t node_for_insert =
1448 result_entries_pointers[k_first];
1451 for (j = edit->j2; j >= edit->j1; j--)
1453 struct entry *new_entry = modified_file.entries[j];
1454 gl_list_add_before (result_entries, node_for_insert, new_entry);
1456 for (i = edit->i1; i <= edit->i2; i++)
1458 ssize_t k = index_mapping[i];
1460 ASSERT (entry_equals (ancestor_file.entries[i],
1461 mainstream_file.entries[k]));
1462 gl_list_node_set_value (result_entries,
1463 result_entries_pointers[k],
1472 struct conflict *c = XMALLOC (struct conflict);
1474 c->num_old_entries = edit->i2 - edit->i1 + 1;
1476 XNMALLOC (c->num_old_entries, struct entry *);
1477 for (i = edit->i1; i <= edit->i2; i++)
1478 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1479 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1480 c->modified_entries =
1481 XNMALLOC (c->num_modified_entries, struct entry *);
1482 for (j = edit->j1; j <= edit->j2; j++)
1483 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1484 gl_list_add_last (result_conflicts, c);
1492 /* Output the result. */
1494 FILE *fp = fopen (destination_file_name, "w");
1497 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1498 exit (EXIT_FAILURE);
1501 /* Output the conflicts at the top. */
1503 size_t n = gl_list_size (result_conflicts);
1505 for (i = 0; i < n; i++)
1506 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1508 /* Output the modified and unmodified entries, in order. */
1510 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1512 gl_list_node_t node;
1513 while (gl_list_iterator_next (&iter, &elt, &node))
1514 entry_write (fp, (struct entry *) elt);
1515 gl_list_iterator_free (&iter);
1518 if (fwriteerror (fp))
1520 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1521 exit (EXIT_FAILURE);
1525 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);