1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 effects to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_linked_list.h"
138 #include "xmalloca.h"
141 #include "c-strstr.h"
142 #include "fwriteerror.h"
144 #define ASSERT(expr) \
152 #define FSTRCMP_THRESHOLD 0.6
153 #define FSTRCMP_STRICTER_THRESHOLD 0.8
155 /* Representation of a ChangeLog entry.
156 The string may contain NUL bytes; therefore it is represented as a plain
157 opaque memory region. */
164 /* Compare two entries for equality. */
166 entry_equals (const void *elt1, const void *elt2)
168 const struct entry *entry1 = (const struct entry *) elt1;
169 const struct entry *entry2 = (const struct entry *) elt2;
170 return entry1->length == entry2->length
171 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
174 /* Return a hash code of the contents of a ChangeLog entry. */
176 entry_hashcode (const void *elt)
178 const struct entry *entry = (const struct entry *) elt;
179 /* See http://www.haible.de/bruno/hashfunc.html. */
184 for (s = entry->string, n = entry->length; n > 0; s++, n--)
185 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
190 /* Perform a fuzzy comparison of two ChangeLog entries.
191 Return a similarity measure of the two entries, a value between 0 and 1.
192 0 stands for very distinct, 1 for identical. */
194 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
196 /* fstrcmp works only on NUL terminated strings. */
200 if (memchr (entry1->string, '\0', entry1->length) != NULL)
202 if (memchr (entry2->string, '\0', entry2->length) != NULL)
204 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
207 memcpy (p, entry1->string, entry1->length);
210 memcpy (p, entry2->string, entry2->length);
214 similarity = fstrcmp (memory, memory + entry1->length + 1);
219 /* This structure represents an entire ChangeLog file, after it was read
221 struct changelog_file
223 /* The entries, as a list. */
224 gl_list_t /* <struct entry *> */ entries_list;
225 /* The entries, as a list in opposite direction. */
226 gl_list_t /* <struct entry *> */ entries_reversed;
227 /* The entries, as an array. */
229 struct entry **entries;
232 /* Read a ChangeLog file into memory.
233 Return the contents in *RESULT. */
235 read_changelog_file (const char *filename, struct changelog_file *result)
237 /* Read the file in text mode, otherwise it's hard to recognize empty
240 char *contents = read_file (filename, &length);
241 if (contents == NULL)
243 fprintf (stderr, "could not read file '%s'\n", filename);
247 result->entries_list =
248 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
250 result->entries_reversed =
251 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
253 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
254 at a line following a blank line and that starts with a non-whitespace
255 character, or at the beginning of a file.
256 Split the file contents into entries. */
258 char *contents_end = contents + length;
259 char *start = contents;
260 while (start < contents_end)
262 /* Search the end of the current entry. */
266 while (ptr < contents_end)
268 ptr = memchr (ptr, '\n', contents_end - ptr);
275 if (contents_end - ptr >= 2
277 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
284 curr = XMALLOC (struct entry);
285 curr->string = start;
286 curr->length = ptr - start;
287 gl_list_add_last (result->entries_list, curr);
288 gl_list_add_first (result->entries_reversed, curr);
294 result->num_entries = gl_list_size (result->entries_list);
295 result->entries = XNMALLOC (result->num_entries, struct entry *);
298 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
301 while (gl_list_iterator_next (&iter, &elt, &node))
302 result->entries[index++] = (struct entry *) elt;
303 gl_list_iterator_free (&iter);
304 ASSERT (index == result->num_entries);
308 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
309 Return a set of two arrays:
310 - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
311 from FILE1 is not found in FILE2).
312 - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
313 from FILE2 is not found in FILE1).
314 The correspondence also takes into account small modifications; i.e. the
315 indicated relation is not equality of entries but best-match similarity
318 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
321 /* Mapping from indices in file1 to indices in file2. */
322 ssize_t *index_mapping;
323 /* Mapping from indices in file2 to indices in file1. */
324 ssize_t *index_mapping_reverse;
325 size_t n1 = file1->num_entries;
326 size_t n2 = file2->num_entries;
329 index_mapping = XNMALLOC (n1, ssize_t);
330 for (i = 0; i < n1; i++)
331 index_mapping[i] = -1;
333 index_mapping_reverse = XNMALLOC (n2, ssize_t);
334 for (j = 0; j < n2; j++)
335 index_mapping_reverse[j] = -1;
337 for (i = n1 - 1; i >= 0; i--)
338 /* Take an entry from file1. */
339 if (index_mapping[i] < 0)
341 struct entry *entry = file1->entries[i];
342 /* Search whether it occurs in file2. */
343 j = gl_list_indexof (file2->entries_reversed, entry);
347 /* Found an exact correspondence. */
348 ASSERT (index_mapping_reverse[j] < 0);
349 index_mapping[i] = j;
350 index_mapping_reverse[j] = i;
351 /* Look for more occurrences of the same entry. */
362 gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
367 gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
371 curr_i = n1 - 1 - next_i;
372 curr_j = n2 - 1 - next_j;
373 ASSERT (index_mapping[curr_i] < 0);
374 ASSERT (index_mapping_reverse[curr_j] < 0);
375 index_mapping[curr_i] = curr_j;
376 index_mapping_reverse[curr_j] = curr_i;
382 for (i = n1 - 1; i >= 0; i--)
383 /* Take an entry from file1. */
384 if (index_mapping[i] < 0)
386 struct entry *entry_i = file1->entries[i];
387 /* Search whether it approximately occurs in file2. */
389 double best_j_similarity = 0.0;
390 for (j = n2 - 1; j >= 0; j--)
391 if (index_mapping_reverse[j] < 0)
393 double similarity = entry_fstrcmp (entry_i, file2->entries[j]);
394 if (similarity > best_j_similarity)
397 best_j_similarity = similarity;
400 if (best_j_similarity >= FSTRCMP_THRESHOLD)
402 /* Found a similar entry in file2. */
403 struct entry *entry_j = file2->entries[best_j];
404 /* Search whether it approximately occurs in file1 at index i. */
406 double best_i_similarity = 0.0;
408 for (ii = n1 - 1; ii >= 0; ii--)
409 if (index_mapping[ii] < 0)
412 entry_fstrcmp (file1->entries[ii], entry_j);
413 if (similarity > best_i_similarity)
416 best_i_similarity = similarity;
419 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
421 index_mapping[i] = best_j;
422 index_mapping_reverse[best_j] = i;
427 result[0] = index_mapping;
428 result[1] = index_mapping_reverse;
431 /* An "edit" is a textual modification performed by the user, that needs to
432 be applied to the other file. */
435 /* Some consecutive entries were added. */
437 /* Some consecutive entries were removed; some other consecutive entries
438 were added at the same position. (Not necessarily the same number of
441 /* Some consecutive entries were removed. */
445 /* This structure represents an edit. */
449 /* Range of indices into the entries of FILE1. */
450 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
451 /* Range of indices into the entries of FILE2. */
452 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
455 /* This structure represents the differences from one file, FILE1, to another
459 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
460 from FILE1 is not found in FILE2). */
461 ssize_t *index_mapping;
462 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
463 from FILE2 is not found in FILE1). */
464 ssize_t *index_mapping_reverse;
465 /* The edits that transform FILE1 into FILE2. */
470 /* Import the difference detection algorithm from GNU diff. */
471 #define ELEMENT struct entry *
472 #define EQUAL entry_equals
473 #define OFFSET ssize_t
474 #define EXTRA_CONTEXT_FIELDS \
475 ssize_t *index_mapping; \
476 ssize_t *index_mapping_reverse;
477 #define NOTE_DELETE(ctxt, xoff) \
478 ctxt->index_mapping[xoff] = -1
479 #define NOTE_INSERT(ctxt, yoff) \
480 ctxt->index_mapping_reverse[yoff] = -1
483 /* Compute the differences between the entries of FILE1 and the entries of
486 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
487 struct differences *result)
489 /* Unlike compute_mapping, which mostly ignores the order of the entries and
490 therefore works well when some entries are permuted, here we use the order.
491 I think this is needed in order to distinguish changes from
492 additions+removals; I don't know how to say what is a "change" if the
493 files are considered as unordered sets of entries. */
495 size_t n1 = file1->num_entries;
496 size_t n2 = file2->num_entries;
499 gl_list_t /* <struct edit *> */ edits;
501 ctxt.xvec = file1->entries;
502 ctxt.yvec = file2->entries;
503 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
504 for (i = 0; i < n1; i++)
505 ctxt.index_mapping[i] = 0;
506 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
507 for (j = 0; j < n2; j++)
508 ctxt.index_mapping_reverse[j] = 0;
509 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
510 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
511 ctxt.too_expensive = n1 + n2;
513 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
514 each removed or added entry. */
515 compareseq (0, n1, 0, n2, 0, &ctxt);
517 /* Complete the index_mapping and index_mapping_reverse arrays. */
520 while (i < n1 || j < n2)
522 while (i < n1 && ctxt.index_mapping[i] < 0)
524 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
526 ASSERT ((i < n1) == (j < n2));
527 if (i == n1 && j == n2)
529 ctxt.index_mapping[i] = j;
530 ctxt.index_mapping_reverse[j] = i;
535 /* Create the edits. */
536 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
539 while (i < n1 || j < n2)
545 e = XMALLOC (struct edit);
549 gl_list_add_last (edits, e);
556 e = XMALLOC (struct edit);
560 gl_list_add_last (edits, e);
563 if (ctxt.index_mapping[i] >= 0)
565 if (ctxt.index_mapping_reverse[j] >= 0)
567 ASSERT (ctxt.index_mapping[i] == j);
568 ASSERT (ctxt.index_mapping_reverse[j] == i);
575 ASSERT (ctxt.index_mapping_reverse[j] < 0);
576 e = XMALLOC (struct edit);
581 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
583 gl_list_add_last (edits, e);
588 if (ctxt.index_mapping_reverse[j] >= 0)
591 ASSERT (ctxt.index_mapping[i] < 0);
592 e = XMALLOC (struct edit);
597 while (i < n1 && ctxt.index_mapping[i] < 0);
599 gl_list_add_last (edits, e);
604 ASSERT (ctxt.index_mapping[i] < 0);
605 ASSERT (ctxt.index_mapping_reverse[j] < 0);
606 e = XMALLOC (struct edit);
611 while (i < n1 && ctxt.index_mapping[i] < 0);
616 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
618 gl_list_add_last (edits, e);
623 result->index_mapping = ctxt.index_mapping;
624 result->index_mapping_reverse = ctxt.index_mapping_reverse;
625 result->num_edits = gl_list_size (edits);
626 result->edits = XNMALLOC (result->num_edits, struct edit *);
629 gl_list_iterator_t iter = gl_list_iterator (edits);
632 while (gl_list_iterator_next (&iter, &elt, &node))
633 result->edits[index++] = (struct edit *) elt;
634 gl_list_iterator_free (&iter);
635 ASSERT (index == result->num_edits);
639 /* An empty entry. */
640 static struct entry empty_entry = { NULL, 0 };
642 /* Return the end a paragraph.
644 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
645 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
646 it is the start of a blank line or the end of the entry. */
648 find_paragraph_end (const struct entry *entry, size_t offset)
650 const char *string = entry->string;
651 size_t length = entry->length;
655 const char *nl = memchr (string + offset, '\n', length - offset);
658 offset = (nl - string) + 1;
659 if (offset < length && string[offset] == '\n')
664 /* Split a merged entry.
665 Given an old entry of the form
668 and a new entry of the form
672 where the two titles are the same and BODY and BODY' are very similar,
673 this computes two new entries
680 If the entries don't have this form, it returns false. */
682 try_split_merged_entry (const struct entry *old_entry,
683 const struct entry *new_entry,
684 struct entry *new_split[2])
686 size_t old_title_len = find_paragraph_end (old_entry, 0);
687 size_t new_title_len = find_paragraph_end (new_entry, 0);
688 struct entry old_body;
689 struct entry new_body;
690 size_t best_split_offset;
691 double best_similarity;
695 if (!(old_title_len == new_title_len
696 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
699 old_body.string = old_entry->string + old_title_len;
700 old_body.length = old_entry->length - old_title_len;
702 /* Determine where to split the new entry.
703 This is done by maximizing the similarity between BODY and BODY'. */
704 best_split_offset = split_offset = new_title_len;
705 best_similarity = 0.0;
710 new_body.string = new_entry->string + split_offset;
711 new_body.length = new_entry->length - split_offset;
712 similarity = entry_fstrcmp (&old_body, &new_body);
713 if (similarity > best_similarity)
715 best_split_offset = split_offset;
716 best_similarity = similarity;
718 if (best_similarity == 1.0)
719 /* It cannot get better. */
722 if (split_offset < new_entry->length)
723 split_offset = find_paragraph_end (new_entry, split_offset + 1);
728 /* BODY' should not be empty. */
729 if (best_split_offset == new_entry->length)
731 ASSERT (new_entry->string[best_split_offset] == '\n');
733 /* A certain similarity between BODY and BODY' is required. */
734 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
737 new_split[0] = XMALLOC (struct entry);
738 new_split[0]->string = new_entry->string;
739 new_split[0]->length = best_split_offset + 1;
741 new_split[1] = XMALLOC (struct entry);
743 size_t len1 = new_title_len;
744 size_t len2 = new_entry->length - best_split_offset;
745 char *combined = XNMALLOC (len1 + len2, char);
746 memcpy (combined, new_entry->string, len1);
747 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
748 new_split[1]->string = combined;
749 new_split[1]->length = len1 + len2;
755 /* Write the contents of an entry to the output stream FP. */
757 entry_write (FILE *fp, struct entry *entry)
759 if (entry->length > 0)
760 fwrite (entry->string, 1, entry->length, fp);
763 /* This structure represents a conflict.
764 A conflict can occur for various reasons. */
767 /* Parts from the ancestor file. */
768 size_t num_old_entries;
769 struct entry **old_entries;
770 /* Parts of the modified file. */
771 size_t num_modified_entries;
772 struct entry **modified_entries;
775 /* Write a conflict to the output stream FP, including markers. */
777 conflict_write (FILE *fp, struct conflict *c)
781 /* Use the same syntax as git's default merge driver.
782 Don't indent the contents of the entries (with things like ">" or "-"),
783 otherwise the user needs more textual editing to resolve the conflict. */
784 fputs ("<<<<<<<\n", fp);
785 for (i = 0; i < c->num_old_entries; i++)
786 entry_write (fp, c->old_entries[i]);
787 fputs ("=======\n", fp);
788 for (i = 0; i < c->num_modified_entries; i++)
789 entry_write (fp, c->modified_entries[i]);
790 fputs (">>>>>>>\n", fp);
794 static const struct option long_options[] =
796 { "help", no_argument, NULL, 'h' },
797 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
798 { "version", no_argument, NULL, 'V' },
802 /* Print a usage mesage and exit. */
806 if (status != EXIT_SUCCESS)
807 fprintf (stderr, "Try `%s --help' for more information.\n",
811 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
814 printf ("Merges independent modifications of a ChangeLog style file.\n");
815 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
816 printf ("A-FILE-NAME names the publicly modified file.\n");
817 printf ("B-FILE-NAME names the user-modified file.\n");
818 printf ("Writes the merged file into A-FILE-NAME.\n");
820 printf ("Operation modifiers:\n");
822 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
823 Use this if you have the habit to merge unrelated\n\
824 entries into a single one, separated only by a\n\
825 newline, just because they happened on the same\n\
828 printf ("Informative output:\n");
829 printf (" -h, --help display this help and exit\n");
830 printf (" -V, --version output version information and exit\n");
832 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
840 main (int argc, char *argv[])
845 bool split_merged_entry;
847 /* Set program name for messages. */
848 set_program_name (argv[0]);
850 /* Set default values for variables. */
853 split_merged_entry = false;
855 /* Parse command line options. */
856 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
859 case '\0': /* Long option. */
867 case CHAR_MAX + 1: /* --split-merged-entry */
868 split_merged_entry = true;
871 usage (EXIT_FAILURE);
876 /* Version information is requested. */
877 printf ("%s\n", program_name);
878 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
879 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
880 This is free software: you are free to change and redistribute it.\n\
881 There is NO WARRANTY, to the extent permitted by law.\n\
884 printf ("Written by %s.\n", "Bruno Haible");
890 /* Help is requested. */
891 usage (EXIT_SUCCESS);
894 /* Test argument count. */
895 if (optind + 3 != argc)
896 error (EXIT_FAILURE, 0, "expected three arguments");
899 const char *ancestor_file_name; /* O-FILE-NAME */
900 const char *destination_file_name; /* A-FILE-NAME */
902 const char *other_file_name; /* B-FILE-NAME */
903 const char *mainstream_file_name;
904 const char *modified_file_name;
905 struct changelog_file ancestor_file;
906 struct changelog_file mainstream_file;
907 struct changelog_file modified_file;
908 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
909 ssize_t *index_mapping;
910 /* Mapping from indices in mainstream_file to indices in ancestor_file. */
911 ssize_t *index_mapping_reverse;
912 struct differences diffs;
913 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
914 gl_list_t /* <struct entry *> */ result_entries;
915 gl_list_t /* <struct conflict *> */ result_conflicts;
917 ancestor_file_name = argv[optind];
918 destination_file_name = argv[optind + 1];
919 other_file_name = argv[optind + 2];
921 /* Heuristic to determine whether it's a pull in downstream direction
922 (e.g. pull from a centralized server) or a pull in upstream direction
923 (e.g. "git stash apply").
925 For ChangeLog this distinction is important. The difference between
926 an "upstream" and a "downstream" repository is that more people are
927 looking at the "upstream" repository. They want to be informed about
928 changes and expect them to be shown at the top of the ChangeLog.
929 When a user pulls downstream, on the other hand, he has two options:
930 a) He gets the change entries from the central repository also at the
931 top of his ChangeLog, and his own changes come after them.
932 b) He gets the change entries from the central repository after those
933 he has collected for his branch. His own change entries stay at
934 the top of the ChangeLog file.
935 In the case a) he has to reorder the ChangeLog before he can commit.
936 No one does that. So most people want b).
937 In other words, the order of entries in a ChangeLog should represent
938 the order in which they have flown (or will flow) into the *central*
941 But in git this is fundamentally indistinguishable, because when Linus
942 pulls patches from akpm and akpm pulls patches from Linus, it's not
943 clear which of the two is more "upstream". Also, when you have many
944 branches in a repository and pull from one to another, "git" has no way
945 to know which branch is more "upstream" than the other. The git-tag(1)
946 manual page also says:
947 "One important aspect of git is it is distributed, and being
948 distributed largely means there is no inherent "upstream" or
949 "downstream" in the system."
950 Therefore anyone who attempts to produce a ChangeLog from the merge
953 Here we allow the user to specify the pull direction through an
954 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
955 environment variables are not set, we assume a "simple single user"
956 usage pattern: He manages local changes through stashes and uses
957 "git pull" only to pull downstream.
959 How to distinguish these situation? There are several hints:
960 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
961 a "git pull", it is set to 'pull '. During a "git pull --rebase",
962 it is set to 'pull --rebase'.
963 - During a "git stash apply", there is an environment variable of
964 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
968 var = getenv ("GIT_DOWNSTREAM");
969 if (var != NULL && var[0] != '\0')
973 var = getenv ("GIT_UPSTREAM");
974 if (var != NULL && var[0] != '\0')
978 var = getenv ("GIT_REFLOG_ACTION");
979 #if 0 /* Debugging code */
980 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
983 && ((strncmp (var, "pull", 4) == 0
984 && c_strstr (var, " --rebase") == NULL)
985 || strncmp (var, "merge origin", 12) == 0))
989 /* "git stash apply", "git rebase" and similar. */
996 #if 0 /* Debugging code */
999 printf ("First line of %%A:\n");
1000 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1001 printf ("First line of %%B:\n");
1002 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1003 printf ("Guessing calling convention: %s\n",
1005 ? "%A = modified by user, %B = upstream"
1006 : "%A = upstream, %B = modified by user");
1012 mainstream_file_name = other_file_name;
1013 modified_file_name = destination_file_name;
1017 mainstream_file_name = destination_file_name;
1018 modified_file_name = other_file_name;
1021 /* Read the three files into memory. */
1022 read_changelog_file (ancestor_file_name, &ancestor_file);
1023 read_changelog_file (mainstream_file_name, &mainstream_file);
1024 read_changelog_file (modified_file_name, &modified_file);
1026 /* Compute correspondence between the entries of ancestor_file and of
1030 compute_mapping (&ancestor_file, &mainstream_file, result);
1031 index_mapping = result[0];
1032 index_mapping_reverse = result[1];
1035 /* Compute differences between the entries of ancestor_file and of
1037 compute_differences (&ancestor_file, &modified_file, &diffs);
1039 /* Compute the result. */
1040 result_entries_pointers =
1041 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1043 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1047 for (k = 0; k < mainstream_file.num_entries; k++)
1048 result_entries_pointers[k] =
1049 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1052 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1055 for (e = 0; e < diffs.num_edits; e++)
1057 struct edit *edit = diffs.edits[e];
1063 /* An addition to the top of modified_file.
1064 Apply it to the top of mainstream_file. */
1066 for (j = edit->j2; j >= edit->j1; j--)
1068 struct entry *added_entry = modified_file.entries[j];
1069 gl_list_add_first (result_entries, added_entry);
1078 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1079 ASSERT (i_before >= 0);
1080 i_after = (edit->j2 + 1 == modified_file.num_entries
1081 ? ancestor_file.num_entries
1082 : diffs.index_mapping_reverse[edit->j2 + 1]);
1083 ASSERT (i_after >= 0);
1084 ASSERT (i_after == i_before + 1);
1085 /* An addition between ancestor_file.entries[i_before] and
1086 ancestor_file.entries[i_after]. See whether these two
1087 entries still exist in mainstream_file and are still
1089 k_before = index_mapping[i_before];
1090 k_after = (i_after == ancestor_file.num_entries
1091 ? mainstream_file.num_entries
1092 : index_mapping[i_after]);
1093 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1095 /* Yes, the entry before and after are still neighbours
1096 in mainstream_file. Apply the addition between
1098 if (k_after == mainstream_file.num_entries)
1101 for (j = edit->j1; j <= edit->j2; j++)
1103 struct entry *added_entry = modified_file.entries[j];
1104 gl_list_add_last (result_entries, added_entry);
1109 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1111 for (j = edit->j1; j <= edit->j2; j++)
1113 struct entry *added_entry = modified_file.entries[j];
1114 gl_list_add_before (result_entries, node_k_after, added_entry);
1120 /* It's not clear where the additions should be applied.
1121 Let the user decide. */
1122 struct conflict *c = XMALLOC (struct conflict);
1124 c->num_old_entries = 0;
1125 c->old_entries = NULL;
1126 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1127 c->modified_entries =
1128 XNMALLOC (c->num_modified_entries, struct entry *);
1129 for (j = edit->j1; j <= edit->j2; j++)
1130 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1131 gl_list_add_last (result_conflicts, c);
1137 /* Apply the removals one by one. */
1139 for (i = edit->i1; i <= edit->i2; i++)
1141 struct entry *removed_entry = ancestor_file.entries[i];
1142 ssize_t k = index_mapping[i];
1144 && entry_equals (removed_entry,
1145 mainstream_file.entries[k]))
1147 /* The entry to be removed still exists in
1148 mainstream_file. Remove it. */
1149 gl_list_node_set_value (result_entries,
1150 result_entries_pointers[k],
1155 /* The entry to be removed was already removed or was
1156 modified. This is a conflict. */
1157 struct conflict *c = XMALLOC (struct conflict);
1158 c->num_old_entries = 1;
1160 XNMALLOC (c->num_old_entries, struct entry *);
1161 c->old_entries[0] = removed_entry;
1162 c->num_modified_entries = 0;
1163 c->modified_entries = NULL;
1164 gl_list_add_last (result_conflicts, c);
1172 /* When the user usually merges entries from the same day,
1173 and this edit is at the top of the file: */
1174 if (split_merged_entry && edit->j1 == 0)
1176 /* Test whether the change is "simple merged", i.e. whether
1177 it consists of additions, followed by an augmentation of
1178 the first changed entry, followed by small changes of the
1191 modified_entry_n. */
1192 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1194 struct entry *split[2];
1195 bool simple_merged =
1196 try_split_merged_entry (ancestor_file.entries[edit->i1],
1197 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1202 for (i = edit->i1 + 1; i <= edit->i2; i++)
1203 if (entry_fstrcmp (ancestor_file.entries[i],
1204 modified_file.entries[i + edit->j2 - edit->i2])
1205 < FSTRCMP_THRESHOLD)
1207 simple_merged = false;
1213 /* Apply the additions at the top of modified_file.
1214 Apply each of the single-entry changes
1216 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1217 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1219 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1220 gl_list_add_first (result_entries, split[0]);
1221 /* The additions. */
1222 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1224 struct entry *added_entry = modified_file.entries[j];
1225 gl_list_add_first (result_entries, added_entry);
1227 /* Now the single-entry changes. */
1228 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1230 struct entry *changed_entry =
1231 (j == edit->j1 + num_added
1233 : modified_file.entries[j]);
1234 size_t i = j + edit->i2 - edit->j2;
1235 ssize_t k = index_mapping[i];
1237 && entry_equals (ancestor_file.entries[i],
1238 mainstream_file.entries[k]))
1240 gl_list_node_set_value (result_entries,
1241 result_entries_pointers[k],
1246 struct conflict *c = XMALLOC (struct conflict);
1247 c->num_old_entries = 1;
1249 XNMALLOC (c->num_old_entries, struct entry *);
1250 c->old_entries[0] = ancestor_file.entries[i];
1251 c->num_modified_entries = 1;
1252 c->modified_entries =
1253 XNMALLOC (c->num_modified_entries, struct entry *);
1254 c->modified_entries[0] = changed_entry;
1255 gl_list_add_last (result_conflicts, c);
1265 /* Test whether the change is "simple", i.e. whether it
1266 consists of small changes to the old ChangeLog entries
1267 and additions before them:
1277 modified_entry_n. */
1278 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1282 for (i = edit->i1; i <= edit->i2; i++)
1283 if (entry_fstrcmp (ancestor_file.entries[i],
1284 modified_file.entries[i + edit->j2 - edit->i2])
1285 < FSTRCMP_THRESHOLD)
1295 /* Apply the additions and each of the single-entry
1296 changes separately. */
1297 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1298 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1301 /* A simple change at the top of modified_file.
1302 Apply it to the top of mainstream_file. */
1304 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1306 struct entry *added_entry = modified_file.entries[j];
1307 gl_list_add_first (result_entries, added_entry);
1309 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1311 struct entry *changed_entry = modified_file.entries[j];
1312 size_t i = j + edit->i2 - edit->j2;
1313 ssize_t k = index_mapping[i];
1315 && entry_equals (ancestor_file.entries[i],
1316 mainstream_file.entries[k]))
1318 gl_list_node_set_value (result_entries,
1319 result_entries_pointers[k],
1324 struct conflict *c = XMALLOC (struct conflict);
1325 c->num_old_entries = 1;
1327 XNMALLOC (c->num_old_entries, struct entry *);
1328 c->old_entries[0] = ancestor_file.entries[i];
1329 c->num_modified_entries = 1;
1330 c->modified_entries =
1331 XNMALLOC (c->num_modified_entries, struct entry *);
1332 c->modified_entries[0] = changed_entry;
1333 gl_list_add_last (result_conflicts, c);
1343 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1344 ASSERT (i_before >= 0);
1345 /* A simple change after ancestor_file.entries[i_before].
1346 See whether this entry and the following num_changed
1347 entries still exist in mainstream_file and are still
1349 k_before = index_mapping[i_before];
1350 linear = (k_before >= 0);
1354 for (i = i_before + 1; i <= i_before + num_changed; i++)
1355 if (index_mapping[i] != k_before + (i - i_before))
1363 gl_list_node_t node_for_insert =
1364 result_entries_pointers[k_before + 1];
1366 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1368 struct entry *added_entry = modified_file.entries[j];
1369 gl_list_add_before (result_entries, node_for_insert, added_entry);
1371 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1373 struct entry *changed_entry = modified_file.entries[j];
1374 size_t i = j + edit->i2 - edit->j2;
1375 ssize_t k = index_mapping[i];
1377 if (entry_equals (ancestor_file.entries[i],
1378 mainstream_file.entries[k]))
1380 gl_list_node_set_value (result_entries,
1381 result_entries_pointers[k],
1386 struct conflict *c = XMALLOC (struct conflict);
1387 c->num_old_entries = 1;
1389 XNMALLOC (c->num_old_entries, struct entry *);
1390 c->old_entries[0] = ancestor_file.entries[i];
1391 c->num_modified_entries = 1;
1392 c->modified_entries =
1393 XNMALLOC (c->num_modified_entries, struct entry *);
1394 c->modified_entries[0] = changed_entry;
1395 gl_list_add_last (result_conflicts, c);
1405 See whether the num_changed entries still exist
1406 unchanged in mainstream_file and are still
1410 bool linear_unchanged;
1412 k_first = index_mapping[i_first];
1415 && entry_equals (ancestor_file.entries[i_first],
1416 mainstream_file.entries[k_first]));
1417 if (linear_unchanged)
1420 for (i = i_first + 1; i <= edit->i2; i++)
1421 if (!(index_mapping[i] == k_first + (i - i_first)
1422 && entry_equals (ancestor_file.entries[i],
1423 mainstream_file.entries[index_mapping[i]])))
1425 linear_unchanged = false;
1429 if (linear_unchanged)
1431 gl_list_node_t node_for_insert =
1432 result_entries_pointers[k_first];
1435 for (j = edit->j2; j >= edit->j1; j--)
1437 struct entry *new_entry = modified_file.entries[j];
1438 gl_list_add_before (result_entries, node_for_insert, new_entry);
1440 for (i = edit->i1; i <= edit->i2; i++)
1442 ssize_t k = index_mapping[i];
1444 ASSERT (entry_equals (ancestor_file.entries[i],
1445 mainstream_file.entries[k]));
1446 gl_list_node_set_value (result_entries,
1447 result_entries_pointers[k],
1456 struct conflict *c = XMALLOC (struct conflict);
1458 c->num_old_entries = edit->i2 - edit->i1 + 1;
1460 XNMALLOC (c->num_old_entries, struct entry *);
1461 for (i = edit->i1; i <= edit->i2; i++)
1462 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1463 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1464 c->modified_entries =
1465 XNMALLOC (c->num_modified_entries, struct entry *);
1466 for (j = edit->j1; j <= edit->j2; j++)
1467 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1468 gl_list_add_last (result_conflicts, c);
1476 /* Output the result. */
1478 FILE *fp = fopen (destination_file_name, "w");
1481 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1482 exit (EXIT_FAILURE);
1485 /* Output the conflicts at the top. */
1487 size_t n = gl_list_size (result_conflicts);
1489 for (i = 0; i < n; i++)
1490 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1493 size_t n = gl_list_size (result_entries);
1495 for (i = 0; i < n; i++)
1496 entry_write (fp, (struct entry *) gl_list_get_at (result_entries, i));
1499 if (fwriteerror (fp))
1501 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1502 exit (EXIT_FAILURE);
1506 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);