1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 effects to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
165 /* Compare two entries for equality. */
167 entry_equals (const void *elt1, const void *elt2)
169 const struct entry *entry1 = (const struct entry *) elt1;
170 const struct entry *entry2 = (const struct entry *) elt2;
171 return entry1->length == entry2->length
172 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
175 /* Return a hash code of the contents of a ChangeLog entry. */
177 entry_hashcode (const void *elt)
179 const struct entry *entry = (const struct entry *) elt;
180 /* See http://www.haible.de/bruno/hashfunc.html. */
185 for (s = entry->string, n = entry->length; n > 0; s++, n--)
186 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
191 /* Perform a fuzzy comparison of two ChangeLog entries.
192 Return a similarity measure of the two entries, a value between 0 and 1.
193 0 stands for very distinct, 1 for identical. */
195 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
197 /* fstrcmp works only on NUL terminated strings. */
201 if (memchr (entry1->string, '\0', entry1->length) != NULL)
203 if (memchr (entry2->string, '\0', entry2->length) != NULL)
205 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
208 memcpy (p, entry1->string, entry1->length);
211 memcpy (p, entry2->string, entry2->length);
215 similarity = fstrcmp (memory, memory + entry1->length + 1);
220 /* This structure represents an entire ChangeLog file, after it was read
222 struct changelog_file
224 /* The entries, as a list. */
225 gl_list_t /* <struct entry *> */ entries_list;
226 /* The entries, as a list in opposite direction. */
227 gl_list_t /* <struct entry *> */ entries_reversed;
228 /* The entries, as an array. */
230 struct entry **entries;
233 /* Read a ChangeLog file into memory.
234 Return the contents in *RESULT. */
236 read_changelog_file (const char *filename, struct changelog_file *result)
238 /* Read the file in text mode, otherwise it's hard to recognize empty
241 char *contents = read_file (filename, &length);
242 if (contents == NULL)
244 fprintf (stderr, "could not read file '%s'\n", filename);
248 result->entries_list =
249 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
251 result->entries_reversed =
252 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
254 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
255 at a line following a blank line and that starts with a non-whitespace
256 character, or at the beginning of a file.
257 Split the file contents into entries. */
259 char *contents_end = contents + length;
260 char *start = contents;
261 while (start < contents_end)
263 /* Search the end of the current entry. */
267 while (ptr < contents_end)
269 ptr = memchr (ptr, '\n', contents_end - ptr);
276 if (contents_end - ptr >= 2
278 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
285 curr = XMALLOC (struct entry);
286 curr->string = start;
287 curr->length = ptr - start;
288 gl_list_add_last (result->entries_list, curr);
289 gl_list_add_first (result->entries_reversed, curr);
295 result->num_entries = gl_list_size (result->entries_list);
296 result->entries = XNMALLOC (result->num_entries, struct entry *);
299 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
302 while (gl_list_iterator_next (&iter, &elt, &node))
303 result->entries[index++] = (struct entry *) elt;
304 gl_list_iterator_free (&iter);
305 ASSERT (index == result->num_entries);
309 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
310 Return a set of two arrays:
311 - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
312 from FILE1 is not found in FILE2).
313 - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
314 from FILE2 is not found in FILE1).
315 The correspondence also takes into account small modifications; i.e. the
316 indicated relation is not equality of entries but best-match similarity
319 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
322 /* Mapping from indices in file1 to indices in file2. */
323 ssize_t *index_mapping;
324 /* Mapping from indices in file2 to indices in file1. */
325 ssize_t *index_mapping_reverse;
326 size_t n1 = file1->num_entries;
327 size_t n2 = file2->num_entries;
330 index_mapping = XNMALLOC (n1, ssize_t);
331 for (i = 0; i < n1; i++)
332 index_mapping[i] = -1;
334 index_mapping_reverse = XNMALLOC (n2, ssize_t);
335 for (j = 0; j < n2; j++)
336 index_mapping_reverse[j] = -1;
338 for (i = n1 - 1; i >= 0; i--)
339 /* Take an entry from file1. */
340 if (index_mapping[i] < 0)
342 struct entry *entry = file1->entries[i];
343 /* Search whether it occurs in file2. */
344 j = gl_list_indexof (file2->entries_reversed, entry);
348 /* Found an exact correspondence. */
349 ASSERT (index_mapping_reverse[j] < 0);
350 index_mapping[i] = j;
351 index_mapping_reverse[j] = i;
352 /* Look for more occurrences of the same entry. */
363 gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
368 gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
372 curr_i = n1 - 1 - next_i;
373 curr_j = n2 - 1 - next_j;
374 ASSERT (index_mapping[curr_i] < 0);
375 ASSERT (index_mapping_reverse[curr_j] < 0);
376 index_mapping[curr_i] = curr_j;
377 index_mapping_reverse[curr_j] = curr_i;
383 for (i = n1 - 1; i >= 0; i--)
384 /* Take an entry from file1. */
385 if (index_mapping[i] < 0)
387 struct entry *entry_i = file1->entries[i];
388 /* Search whether it approximately occurs in file2. */
390 double best_j_similarity = 0.0;
391 for (j = n2 - 1; j >= 0; j--)
392 if (index_mapping_reverse[j] < 0)
394 double similarity = entry_fstrcmp (entry_i, file2->entries[j]);
395 if (similarity > best_j_similarity)
398 best_j_similarity = similarity;
401 if (best_j_similarity >= FSTRCMP_THRESHOLD)
403 /* Found a similar entry in file2. */
404 struct entry *entry_j = file2->entries[best_j];
405 /* Search whether it approximately occurs in file1 at index i. */
407 double best_i_similarity = 0.0;
409 for (ii = n1 - 1; ii >= 0; ii--)
410 if (index_mapping[ii] < 0)
413 entry_fstrcmp (file1->entries[ii], entry_j);
414 if (similarity > best_i_similarity)
417 best_i_similarity = similarity;
420 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
422 index_mapping[i] = best_j;
423 index_mapping_reverse[best_j] = i;
428 result[0] = index_mapping;
429 result[1] = index_mapping_reverse;
432 /* An "edit" is a textual modification performed by the user, that needs to
433 be applied to the other file. */
436 /* Some consecutive entries were added. */
438 /* Some consecutive entries were removed; some other consecutive entries
439 were added at the same position. (Not necessarily the same number of
442 /* Some consecutive entries were removed. */
446 /* This structure represents an edit. */
450 /* Range of indices into the entries of FILE1. */
451 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
452 /* Range of indices into the entries of FILE2. */
453 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
456 /* This structure represents the differences from one file, FILE1, to another
460 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
461 from FILE1 is not found in FILE2). */
462 ssize_t *index_mapping;
463 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
464 from FILE2 is not found in FILE1). */
465 ssize_t *index_mapping_reverse;
466 /* The edits that transform FILE1 into FILE2. */
471 /* Import the difference detection algorithm from GNU diff. */
472 #define ELEMENT struct entry *
473 #define EQUAL entry_equals
474 #define OFFSET ssize_t
475 #define EXTRA_CONTEXT_FIELDS \
476 ssize_t *index_mapping; \
477 ssize_t *index_mapping_reverse;
478 #define NOTE_DELETE(ctxt, xoff) \
479 ctxt->index_mapping[xoff] = -1
480 #define NOTE_INSERT(ctxt, yoff) \
481 ctxt->index_mapping_reverse[yoff] = -1
484 /* Compute the differences between the entries of FILE1 and the entries of
487 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
488 struct differences *result)
490 /* Unlike compute_mapping, which mostly ignores the order of the entries and
491 therefore works well when some entries are permuted, here we use the order.
492 I think this is needed in order to distinguish changes from
493 additions+removals; I don't know how to say what is a "change" if the
494 files are considered as unordered sets of entries. */
496 size_t n1 = file1->num_entries;
497 size_t n2 = file2->num_entries;
500 gl_list_t /* <struct edit *> */ edits;
502 ctxt.xvec = file1->entries;
503 ctxt.yvec = file2->entries;
504 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
505 for (i = 0; i < n1; i++)
506 ctxt.index_mapping[i] = 0;
507 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
508 for (j = 0; j < n2; j++)
509 ctxt.index_mapping_reverse[j] = 0;
510 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
511 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
512 ctxt.too_expensive = n1 + n2;
514 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
515 each removed or added entry. */
516 compareseq (0, n1, 0, n2, 0, &ctxt);
518 /* Complete the index_mapping and index_mapping_reverse arrays. */
521 while (i < n1 || j < n2)
523 while (i < n1 && ctxt.index_mapping[i] < 0)
525 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
527 ASSERT ((i < n1) == (j < n2));
528 if (i == n1 && j == n2)
530 ctxt.index_mapping[i] = j;
531 ctxt.index_mapping_reverse[j] = i;
536 /* Create the edits. */
537 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
540 while (i < n1 || j < n2)
546 e = XMALLOC (struct edit);
550 gl_list_add_last (edits, e);
557 e = XMALLOC (struct edit);
561 gl_list_add_last (edits, e);
564 if (ctxt.index_mapping[i] >= 0)
566 if (ctxt.index_mapping_reverse[j] >= 0)
568 ASSERT (ctxt.index_mapping[i] == j);
569 ASSERT (ctxt.index_mapping_reverse[j] == i);
576 ASSERT (ctxt.index_mapping_reverse[j] < 0);
577 e = XMALLOC (struct edit);
582 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
584 gl_list_add_last (edits, e);
589 if (ctxt.index_mapping_reverse[j] >= 0)
592 ASSERT (ctxt.index_mapping[i] < 0);
593 e = XMALLOC (struct edit);
598 while (i < n1 && ctxt.index_mapping[i] < 0);
600 gl_list_add_last (edits, e);
605 ASSERT (ctxt.index_mapping[i] < 0);
606 ASSERT (ctxt.index_mapping_reverse[j] < 0);
607 e = XMALLOC (struct edit);
612 while (i < n1 && ctxt.index_mapping[i] < 0);
617 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
619 gl_list_add_last (edits, e);
624 result->index_mapping = ctxt.index_mapping;
625 result->index_mapping_reverse = ctxt.index_mapping_reverse;
626 result->num_edits = gl_list_size (edits);
627 result->edits = XNMALLOC (result->num_edits, struct edit *);
630 gl_list_iterator_t iter = gl_list_iterator (edits);
633 while (gl_list_iterator_next (&iter, &elt, &node))
634 result->edits[index++] = (struct edit *) elt;
635 gl_list_iterator_free (&iter);
636 ASSERT (index == result->num_edits);
640 /* An empty entry. */
641 static struct entry empty_entry = { NULL, 0 };
643 /* Return the end a paragraph.
645 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
646 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
647 it is the start of a blank line or the end of the entry. */
649 find_paragraph_end (const struct entry *entry, size_t offset)
651 const char *string = entry->string;
652 size_t length = entry->length;
656 const char *nl = memchr (string + offset, '\n', length - offset);
659 offset = (nl - string) + 1;
660 if (offset < length && string[offset] == '\n')
665 /* Split a merged entry.
666 Given an old entry of the form
669 and a new entry of the form
673 where the two titles are the same and BODY and BODY' are very similar,
674 this computes two new entries
681 If the entries don't have this form, it returns false. */
683 try_split_merged_entry (const struct entry *old_entry,
684 const struct entry *new_entry,
685 struct entry *new_split[2])
687 size_t old_title_len = find_paragraph_end (old_entry, 0);
688 size_t new_title_len = find_paragraph_end (new_entry, 0);
689 struct entry old_body;
690 struct entry new_body;
691 size_t best_split_offset;
692 double best_similarity;
696 if (!(old_title_len == new_title_len
697 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
700 old_body.string = old_entry->string + old_title_len;
701 old_body.length = old_entry->length - old_title_len;
703 /* Determine where to split the new entry.
704 This is done by maximizing the similarity between BODY and BODY'. */
705 best_split_offset = split_offset = new_title_len;
706 best_similarity = 0.0;
711 new_body.string = new_entry->string + split_offset;
712 new_body.length = new_entry->length - split_offset;
713 similarity = entry_fstrcmp (&old_body, &new_body);
714 if (similarity > best_similarity)
716 best_split_offset = split_offset;
717 best_similarity = similarity;
719 if (best_similarity == 1.0)
720 /* It cannot get better. */
723 if (split_offset < new_entry->length)
724 split_offset = find_paragraph_end (new_entry, split_offset + 1);
729 /* BODY' should not be empty. */
730 if (best_split_offset == new_entry->length)
732 ASSERT (new_entry->string[best_split_offset] == '\n');
734 /* A certain similarity between BODY and BODY' is required. */
735 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
738 new_split[0] = XMALLOC (struct entry);
739 new_split[0]->string = new_entry->string;
740 new_split[0]->length = best_split_offset + 1;
742 new_split[1] = XMALLOC (struct entry);
744 size_t len1 = new_title_len;
745 size_t len2 = new_entry->length - best_split_offset;
746 char *combined = XNMALLOC (len1 + len2, char);
747 memcpy (combined, new_entry->string, len1);
748 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
749 new_split[1]->string = combined;
750 new_split[1]->length = len1 + len2;
756 /* Write the contents of an entry to the output stream FP. */
758 entry_write (FILE *fp, struct entry *entry)
760 if (entry->length > 0)
761 fwrite (entry->string, 1, entry->length, fp);
764 /* This structure represents a conflict.
765 A conflict can occur for various reasons. */
768 /* Parts from the ancestor file. */
769 size_t num_old_entries;
770 struct entry **old_entries;
771 /* Parts of the modified file. */
772 size_t num_modified_entries;
773 struct entry **modified_entries;
776 /* Write a conflict to the output stream FP, including markers. */
778 conflict_write (FILE *fp, struct conflict *c)
782 /* Use the same syntax as git's default merge driver.
783 Don't indent the contents of the entries (with things like ">" or "-"),
784 otherwise the user needs more textual editing to resolve the conflict. */
785 fputs ("<<<<<<<\n", fp);
786 for (i = 0; i < c->num_old_entries; i++)
787 entry_write (fp, c->old_entries[i]);
788 fputs ("=======\n", fp);
789 for (i = 0; i < c->num_modified_entries; i++)
790 entry_write (fp, c->modified_entries[i]);
791 fputs (">>>>>>>\n", fp);
795 static const struct option long_options[] =
797 { "help", no_argument, NULL, 'h' },
798 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
799 { "version", no_argument, NULL, 'V' },
803 /* Print a usage mesage and exit. */
807 if (status != EXIT_SUCCESS)
808 fprintf (stderr, "Try `%s --help' for more information.\n",
812 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
815 printf ("Merges independent modifications of a ChangeLog style file.\n");
816 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
817 printf ("A-FILE-NAME names the publicly modified file.\n");
818 printf ("B-FILE-NAME names the user-modified file.\n");
819 printf ("Writes the merged file into A-FILE-NAME.\n");
821 printf ("Operation modifiers:\n");
823 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
824 Use this if you have the habit to merge unrelated\n\
825 entries into a single one, separated only by a\n\
826 newline, just because they happened on the same\n\
829 printf ("Informative output:\n");
830 printf (" -h, --help display this help and exit\n");
831 printf (" -V, --version output version information and exit\n");
833 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
841 main (int argc, char *argv[])
846 bool split_merged_entry;
848 /* Set program name for messages. */
849 set_program_name (argv[0]);
851 /* Set default values for variables. */
854 split_merged_entry = false;
856 /* Parse command line options. */
857 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
860 case '\0': /* Long option. */
868 case CHAR_MAX + 1: /* --split-merged-entry */
869 split_merged_entry = true;
872 usage (EXIT_FAILURE);
877 /* Version information is requested. */
878 printf ("%s\n", program_name);
879 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
880 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
881 This is free software: you are free to change and redistribute it.\n\
882 There is NO WARRANTY, to the extent permitted by law.\n\
885 printf ("Written by %s.\n", "Bruno Haible");
891 /* Help is requested. */
892 usage (EXIT_SUCCESS);
895 /* Test argument count. */
896 if (optind + 3 != argc)
897 error (EXIT_FAILURE, 0, "expected three arguments");
900 const char *ancestor_file_name; /* O-FILE-NAME */
901 const char *destination_file_name; /* A-FILE-NAME */
903 const char *other_file_name; /* B-FILE-NAME */
904 const char *mainstream_file_name;
905 const char *modified_file_name;
906 struct changelog_file ancestor_file;
907 struct changelog_file mainstream_file;
908 struct changelog_file modified_file;
909 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
910 ssize_t *index_mapping;
911 /* Mapping from indices in mainstream_file to indices in ancestor_file. */
912 ssize_t *index_mapping_reverse;
913 struct differences diffs;
914 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
915 gl_list_t /* <struct entry *> */ result_entries;
916 gl_list_t /* <struct conflict *> */ result_conflicts;
918 ancestor_file_name = argv[optind];
919 destination_file_name = argv[optind + 1];
920 other_file_name = argv[optind + 2];
922 /* Heuristic to determine whether it's a pull in downstream direction
923 (e.g. pull from a centralized server) or a pull in upstream direction
924 (e.g. "git stash apply").
926 For ChangeLog this distinction is important. The difference between
927 an "upstream" and a "downstream" repository is that more people are
928 looking at the "upstream" repository. They want to be informed about
929 changes and expect them to be shown at the top of the ChangeLog.
930 When a user pulls downstream, on the other hand, he has two options:
931 a) He gets the change entries from the central repository also at the
932 top of his ChangeLog, and his own changes come after them.
933 b) He gets the change entries from the central repository after those
934 he has collected for his branch. His own change entries stay at
935 the top of the ChangeLog file.
936 In the case a) he has to reorder the ChangeLog before he can commit.
937 No one does that. So most people want b).
938 In other words, the order of entries in a ChangeLog should represent
939 the order in which they have flown (or will flow) into the *central*
942 But in git this is fundamentally indistinguishable, because when Linus
943 pulls patches from akpm and akpm pulls patches from Linus, it's not
944 clear which of the two is more "upstream". Also, when you have many
945 branches in a repository and pull from one to another, "git" has no way
946 to know which branch is more "upstream" than the other. The git-tag(1)
947 manual page also says:
948 "One important aspect of git is it is distributed, and being
949 distributed largely means there is no inherent "upstream" or
950 "downstream" in the system."
951 Therefore anyone who attempts to produce a ChangeLog from the merge
954 Here we allow the user to specify the pull direction through an
955 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
956 environment variables are not set, we assume a "simple single user"
957 usage pattern: He manages local changes through stashes and uses
958 "git pull" only to pull downstream.
960 How to distinguish these situation? There are several hints:
961 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
962 a "git pull", it is set to 'pull '. During a "git pull --rebase",
963 it is set to 'pull --rebase'.
964 - During a "git stash apply", there is an environment variable of
965 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
969 var = getenv ("GIT_DOWNSTREAM");
970 if (var != NULL && var[0] != '\0')
974 var = getenv ("GIT_UPSTREAM");
975 if (var != NULL && var[0] != '\0')
979 var = getenv ("GIT_REFLOG_ACTION");
980 #if 0 /* Debugging code */
981 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
984 && ((strncmp (var, "pull", 4) == 0
985 && c_strstr (var, " --rebase") == NULL)
986 || strncmp (var, "merge origin", 12) == 0))
990 /* "git stash apply", "git rebase" and similar. */
997 #if 0 /* Debugging code */
1000 printf ("First line of %%A:\n");
1001 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1002 printf ("First line of %%B:\n");
1003 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1004 printf ("Guessing calling convention: %s\n",
1006 ? "%A = modified by user, %B = upstream"
1007 : "%A = upstream, %B = modified by user");
1013 mainstream_file_name = other_file_name;
1014 modified_file_name = destination_file_name;
1018 mainstream_file_name = destination_file_name;
1019 modified_file_name = other_file_name;
1022 /* Read the three files into memory. */
1023 read_changelog_file (ancestor_file_name, &ancestor_file);
1024 read_changelog_file (mainstream_file_name, &mainstream_file);
1025 read_changelog_file (modified_file_name, &modified_file);
1027 /* Compute correspondence between the entries of ancestor_file and of
1031 compute_mapping (&ancestor_file, &mainstream_file, result);
1032 index_mapping = result[0];
1033 index_mapping_reverse = result[1];
1036 /* Compute differences between the entries of ancestor_file and of
1038 compute_differences (&ancestor_file, &modified_file, &diffs);
1040 /* Compute the result. */
1041 result_entries_pointers =
1042 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1044 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1048 for (k = 0; k < mainstream_file.num_entries; k++)
1049 result_entries_pointers[k] =
1050 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1053 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1056 for (e = 0; e < diffs.num_edits; e++)
1058 struct edit *edit = diffs.edits[e];
1064 /* An addition to the top of modified_file.
1065 Apply it to the top of mainstream_file. */
1067 for (j = edit->j2; j >= edit->j1; j--)
1069 struct entry *added_entry = modified_file.entries[j];
1070 gl_list_add_first (result_entries, added_entry);
1079 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1080 ASSERT (i_before >= 0);
1081 i_after = (edit->j2 + 1 == modified_file.num_entries
1082 ? ancestor_file.num_entries
1083 : diffs.index_mapping_reverse[edit->j2 + 1]);
1084 ASSERT (i_after >= 0);
1085 ASSERT (i_after == i_before + 1);
1086 /* An addition between ancestor_file.entries[i_before] and
1087 ancestor_file.entries[i_after]. See whether these two
1088 entries still exist in mainstream_file and are still
1090 k_before = index_mapping[i_before];
1091 k_after = (i_after == ancestor_file.num_entries
1092 ? mainstream_file.num_entries
1093 : index_mapping[i_after]);
1094 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1096 /* Yes, the entry before and after are still neighbours
1097 in mainstream_file. Apply the addition between
1099 if (k_after == mainstream_file.num_entries)
1102 for (j = edit->j1; j <= edit->j2; j++)
1104 struct entry *added_entry = modified_file.entries[j];
1105 gl_list_add_last (result_entries, added_entry);
1110 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1112 for (j = edit->j1; j <= edit->j2; j++)
1114 struct entry *added_entry = modified_file.entries[j];
1115 gl_list_add_before (result_entries, node_k_after, added_entry);
1121 /* It's not clear where the additions should be applied.
1122 Let the user decide. */
1123 struct conflict *c = XMALLOC (struct conflict);
1125 c->num_old_entries = 0;
1126 c->old_entries = NULL;
1127 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1128 c->modified_entries =
1129 XNMALLOC (c->num_modified_entries, struct entry *);
1130 for (j = edit->j1; j <= edit->j2; j++)
1131 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1132 gl_list_add_last (result_conflicts, c);
1138 /* Apply the removals one by one. */
1140 for (i = edit->i1; i <= edit->i2; i++)
1142 struct entry *removed_entry = ancestor_file.entries[i];
1143 ssize_t k = index_mapping[i];
1145 && entry_equals (removed_entry,
1146 mainstream_file.entries[k]))
1148 /* The entry to be removed still exists in
1149 mainstream_file. Remove it. */
1150 gl_list_node_set_value (result_entries,
1151 result_entries_pointers[k],
1156 /* The entry to be removed was already removed or was
1157 modified. This is a conflict. */
1158 struct conflict *c = XMALLOC (struct conflict);
1159 c->num_old_entries = 1;
1161 XNMALLOC (c->num_old_entries, struct entry *);
1162 c->old_entries[0] = removed_entry;
1163 c->num_modified_entries = 0;
1164 c->modified_entries = NULL;
1165 gl_list_add_last (result_conflicts, c);
1173 /* When the user usually merges entries from the same day,
1174 and this edit is at the top of the file: */
1175 if (split_merged_entry && edit->j1 == 0)
1177 /* Test whether the change is "simple merged", i.e. whether
1178 it consists of additions, followed by an augmentation of
1179 the first changed entry, followed by small changes of the
1192 modified_entry_n. */
1193 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1195 struct entry *split[2];
1196 bool simple_merged =
1197 try_split_merged_entry (ancestor_file.entries[edit->i1],
1198 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1203 for (i = edit->i1 + 1; i <= edit->i2; i++)
1204 if (entry_fstrcmp (ancestor_file.entries[i],
1205 modified_file.entries[i + edit->j2 - edit->i2])
1206 < FSTRCMP_THRESHOLD)
1208 simple_merged = false;
1214 /* Apply the additions at the top of modified_file.
1215 Apply each of the single-entry changes
1217 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1218 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1220 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1221 gl_list_add_first (result_entries, split[0]);
1222 /* The additions. */
1223 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1225 struct entry *added_entry = modified_file.entries[j];
1226 gl_list_add_first (result_entries, added_entry);
1228 /* Now the single-entry changes. */
1229 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1231 struct entry *changed_entry =
1232 (j == edit->j1 + num_added
1234 : modified_file.entries[j]);
1235 size_t i = j + edit->i2 - edit->j2;
1236 ssize_t k = index_mapping[i];
1238 && entry_equals (ancestor_file.entries[i],
1239 mainstream_file.entries[k]))
1241 gl_list_node_set_value (result_entries,
1242 result_entries_pointers[k],
1247 struct conflict *c = XMALLOC (struct conflict);
1248 c->num_old_entries = 1;
1250 XNMALLOC (c->num_old_entries, struct entry *);
1251 c->old_entries[0] = ancestor_file.entries[i];
1252 c->num_modified_entries = 1;
1253 c->modified_entries =
1254 XNMALLOC (c->num_modified_entries, struct entry *);
1255 c->modified_entries[0] = changed_entry;
1256 gl_list_add_last (result_conflicts, c);
1266 /* Test whether the change is "simple", i.e. whether it
1267 consists of small changes to the old ChangeLog entries
1268 and additions before them:
1278 modified_entry_n. */
1279 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1283 for (i = edit->i1; i <= edit->i2; i++)
1284 if (entry_fstrcmp (ancestor_file.entries[i],
1285 modified_file.entries[i + edit->j2 - edit->i2])
1286 < FSTRCMP_THRESHOLD)
1296 /* Apply the additions and each of the single-entry
1297 changes separately. */
1298 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1299 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1302 /* A simple change at the top of modified_file.
1303 Apply it to the top of mainstream_file. */
1305 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1307 struct entry *added_entry = modified_file.entries[j];
1308 gl_list_add_first (result_entries, added_entry);
1310 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1312 struct entry *changed_entry = modified_file.entries[j];
1313 size_t i = j + edit->i2 - edit->j2;
1314 ssize_t k = index_mapping[i];
1316 && entry_equals (ancestor_file.entries[i],
1317 mainstream_file.entries[k]))
1319 gl_list_node_set_value (result_entries,
1320 result_entries_pointers[k],
1325 struct conflict *c = XMALLOC (struct conflict);
1326 c->num_old_entries = 1;
1328 XNMALLOC (c->num_old_entries, struct entry *);
1329 c->old_entries[0] = ancestor_file.entries[i];
1330 c->num_modified_entries = 1;
1331 c->modified_entries =
1332 XNMALLOC (c->num_modified_entries, struct entry *);
1333 c->modified_entries[0] = changed_entry;
1334 gl_list_add_last (result_conflicts, c);
1344 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1345 ASSERT (i_before >= 0);
1346 /* A simple change after ancestor_file.entries[i_before].
1347 See whether this entry and the following num_changed
1348 entries still exist in mainstream_file and are still
1350 k_before = index_mapping[i_before];
1351 linear = (k_before >= 0);
1355 for (i = i_before + 1; i <= i_before + num_changed; i++)
1356 if (index_mapping[i] != k_before + (i - i_before))
1364 gl_list_node_t node_for_insert =
1365 result_entries_pointers[k_before + 1];
1367 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1369 struct entry *added_entry = modified_file.entries[j];
1370 gl_list_add_before (result_entries, node_for_insert, added_entry);
1372 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1374 struct entry *changed_entry = modified_file.entries[j];
1375 size_t i = j + edit->i2 - edit->j2;
1376 ssize_t k = index_mapping[i];
1378 if (entry_equals (ancestor_file.entries[i],
1379 mainstream_file.entries[k]))
1381 gl_list_node_set_value (result_entries,
1382 result_entries_pointers[k],
1387 struct conflict *c = XMALLOC (struct conflict);
1388 c->num_old_entries = 1;
1390 XNMALLOC (c->num_old_entries, struct entry *);
1391 c->old_entries[0] = ancestor_file.entries[i];
1392 c->num_modified_entries = 1;
1393 c->modified_entries =
1394 XNMALLOC (c->num_modified_entries, struct entry *);
1395 c->modified_entries[0] = changed_entry;
1396 gl_list_add_last (result_conflicts, c);
1406 See whether the num_changed entries still exist
1407 unchanged in mainstream_file and are still
1411 bool linear_unchanged;
1413 k_first = index_mapping[i_first];
1416 && entry_equals (ancestor_file.entries[i_first],
1417 mainstream_file.entries[k_first]));
1418 if (linear_unchanged)
1421 for (i = i_first + 1; i <= edit->i2; i++)
1422 if (!(index_mapping[i] == k_first + (i - i_first)
1423 && entry_equals (ancestor_file.entries[i],
1424 mainstream_file.entries[index_mapping[i]])))
1426 linear_unchanged = false;
1430 if (linear_unchanged)
1432 gl_list_node_t node_for_insert =
1433 result_entries_pointers[k_first];
1436 for (j = edit->j2; j >= edit->j1; j--)
1438 struct entry *new_entry = modified_file.entries[j];
1439 gl_list_add_before (result_entries, node_for_insert, new_entry);
1441 for (i = edit->i1; i <= edit->i2; i++)
1443 ssize_t k = index_mapping[i];
1445 ASSERT (entry_equals (ancestor_file.entries[i],
1446 mainstream_file.entries[k]));
1447 gl_list_node_set_value (result_entries,
1448 result_entries_pointers[k],
1457 struct conflict *c = XMALLOC (struct conflict);
1459 c->num_old_entries = edit->i2 - edit->i1 + 1;
1461 XNMALLOC (c->num_old_entries, struct entry *);
1462 for (i = edit->i1; i <= edit->i2; i++)
1463 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1464 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1465 c->modified_entries =
1466 XNMALLOC (c->num_modified_entries, struct entry *);
1467 for (j = edit->j1; j <= edit->j2; j++)
1468 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1469 gl_list_add_last (result_conflicts, c);
1477 /* Output the result. */
1479 FILE *fp = fopen (destination_file_name, "w");
1482 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1483 exit (EXIT_FAILURE);
1486 /* Output the conflicts at the top. */
1488 size_t n = gl_list_size (result_conflicts);
1490 for (i = 0; i < n; i++)
1491 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1494 size_t n = gl_list_size (result_entries);
1496 for (i = 0; i < n; i++)
1497 entry_write (fp, (struct entry *) gl_list_get_at (result_entries, i));
1500 if (fwriteerror (fp))
1502 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1503 exit (EXIT_FAILURE);
1507 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);