To: vim_dev@googlegroups.com Subject: Patch 8.2.3921 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.3921 Problem: The way xdiff is used is inefficient. Solution: Use hunk_func instead of the out_line callback. (Lewis Russell, closes #9344) Files: src/diff.c *** ../vim-8.2.3920/src/diff.c 2021-12-27 17:21:38.004449137 +0000 --- src/diff.c 2021-12-28 13:50:11.975517843 +0000 *************** *** 64,69 **** --- 64,77 ---- garray_T dout_ga; // used for internal diff } diffout_T; + // used for recording hunks from xdiff + typedef struct { + linenr_T lnum_orig; + long count_orig; + linenr_T lnum_new; + long count_new; + } diffhunk_T; + // two diff inputs and one result typedef struct { diffin_T dio_orig; // original file input *************** *** 84,95 **** #ifdef FEAT_FOLDING static void diff_fold_update(diff_T *dp, int skip_idx); #endif ! static void diff_read(int idx_orig, int idx_new, diffout_T *fname); static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new); static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp); ! static int parse_diff_ed(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); ! static int parse_diff_unified(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); ! static int xdiff_out(void *priv, mmbuffer_t *mb, int nbuf); #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \ for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next) --- 92,103 ---- #ifdef FEAT_FOLDING static void diff_fold_update(diff_T *dp, int skip_idx); #endif ! static void diff_read(int idx_orig, int idx_new, diffio_T *dio); static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new); static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp); ! static int parse_diff_ed(char_u *line, diffhunk_T *hunk); ! static int parse_diff_unified(char_u *line, diffhunk_T *hunk); ! static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv); #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \ for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next) *************** *** 880,886 **** continue; // Read the diff output and add each entry to the diff list. ! diff_read(idx_orig, idx_new, &dio->dio_diff); clear_diffin(&dio->dio_new); clear_diffout(&dio->dio_diff); --- 888,894 ---- continue; // Read the diff output and add each entry to the diff list. ! diff_read(idx_orig, idx_new, dio); clear_diffin(&dio->dio_new); clear_diffout(&dio->dio_diff); *************** *** 1118,1124 **** emit_cfg.ctxlen = 0; // don't need any diff_context here emit_cb.priv = &diffio->dio_diff; ! emit_cb.out_line = xdiff_out; if (xdl_diff(&diffio->dio_orig.din_mmfile, &diffio->dio_new.din_mmfile, ¶m, &emit_cfg, &emit_cb) < 0) --- 1126,1132 ---- emit_cfg.ctxlen = 0; // don't need any diff_context here emit_cb.priv = &diffio->dio_diff; ! emit_cfg.hunk_func = xdiff_out; if (xdl_diff(&diffio->dio_orig.din_mmfile, &diffio->dio_new.din_mmfile, ¶m, &emit_cfg, &emit_cb) < 0) *************** *** 1482,1490 **** wp->w_p_crb = TRUE; if (!(diff_flags & DIFF_FOLLOWWRAP)) { ! if (!wp->w_p_diff) wp->w_p_wrap_save = wp->w_p_wrap; ! wp->w_p_wrap = FALSE; } # ifdef FEAT_FOLDING if (!wp->w_p_diff) --- 1490,1498 ---- wp->w_p_crb = TRUE; if (!(diff_flags & DIFF_FOLLOWWRAP)) { ! if (!wp->w_p_diff) wp->w_p_wrap_save = wp->w_p_wrap; ! wp->w_p_wrap = FALSE; } # ifdef FEAT_FOLDING if (!wp->w_p_diff) *************** *** 1549,1555 **** if (!(diff_flags & DIFF_FOLLOWWRAP)) { if (!wp->w_p_wrap) ! wp->w_p_wrap = wp->w_p_wrap_save; } #ifdef FEAT_FOLDING free_string_option(wp->w_p_fdm); --- 1557,1563 ---- if (!(diff_flags & DIFF_FOLLOWWRAP)) { if (!wp->w_p_wrap) ! wp->w_p_wrap = wp->w_p_wrap_save; } #ifdef FEAT_FOLDING free_string_option(wp->w_p_fdm); *************** *** 1607,1626 **** diff_read( int idx_orig, // idx of original file int idx_new, // idx of new file ! diffout_T *dout) // diff output { FILE *fd = NULL; int line_idx = 0; diff_T *dprev = NULL; diff_T *dp = curtab->tp_first_diff; diff_T *dn, *dpl; char_u linebuf[LBUFLEN]; // only need to hold the diff line char_u *line; long off; int i; - linenr_T lnum_orig, lnum_new; - long count_orig, count_new; int notset = TRUE; // block "*dp" not set yet enum { DIFF_ED, DIFF_UNIFIED, --- 1615,1635 ---- diff_read( int idx_orig, // idx of original file int idx_new, // idx of new file ! diffio_T *dio) // diff output { FILE *fd = NULL; int line_idx = 0; diff_T *dprev = NULL; diff_T *dp = curtab->tp_first_diff; diff_T *dn, *dpl; + diffout_T *dout = &dio->dio_diff; char_u linebuf[LBUFLEN]; // only need to hold the diff line char_u *line; long off; int i; int notset = TRUE; // block "*dp" not set yet + diffhunk_T *hunk; + enum { DIFF_ED, DIFF_UNIFIED, *************** *** 1641,1715 **** } } ! for (;;) { ! if (fd == NULL) { ! if (line_idx >= dout->dout_ga.ga_len) ! break; // did last line ! line = ((char_u **)dout->dout_ga.ga_data)[line_idx++]; ! } ! else ! { ! if (vim_fgets(linebuf, LBUFLEN, fd)) ! break; // end of file ! line = linebuf; ! } ! ! if (diffstyle == DIFF_NONE) ! { ! // Determine diff style. ! // ed like diff looks like this: ! // {first}[,{last}]c{first}[,{last}] ! // {first}a{first}[,{last}] ! // {first}[,{last}]d{first} ! // ! // unified diff looks like this: ! // --- file1 2018-03-20 13:23:35.783153140 +0100 ! // +++ file2 2018-03-20 13:23:41.183156066 +0100 ! // @@ -1,3 +1,5 @@ ! if (isdigit(*line)) ! diffstyle = DIFF_ED; ! else if ((STRNCMP(line, "@@ ", 3) == 0)) ! diffstyle = DIFF_UNIFIED; ! else if ((STRNCMP(line, "--- ", 4) == 0) ! && (vim_fgets(linebuf, LBUFLEN, fd) == 0) ! && (STRNCMP(line, "+++ ", 4) == 0) ! && (vim_fgets(linebuf, LBUFLEN, fd) == 0) ! && (STRNCMP(line, "@@ ", 3) == 0)) ! diffstyle = DIFF_UNIFIED; ! else ! // Format not recognized yet, skip over this line. Cygwin diff ! // may put a warning at the start of the file. ! continue; } ! if (diffstyle == DIFF_ED) ! { ! if (!isdigit(*line)) ! continue; // not the start of a diff block ! if (parse_diff_ed(line, &lnum_orig, &count_orig, ! &lnum_new, &count_new) == FAIL) ! continue; ! } ! else if (diffstyle == DIFF_UNIFIED) { ! if (STRNCMP(line, "@@ ", 3) != 0) ! continue; // not the start of a diff block ! if (parse_diff_unified(line, &lnum_orig, &count_orig, ! &lnum_new, &count_new) == FAIL) ! continue; } else { ! emsg(_("E959: Invalid diff format.")); ! break; } // Go over blocks before the change, for which orig and new are equal. // Copy blocks from orig to new. while (dp != NULL ! && lnum_orig > dp->df_lnum[idx_orig] + dp->df_count[idx_orig]) { if (notset) diff_copy_entry(dprev, dp, idx_orig, idx_new); --- 1650,1743 ---- } } ! if (!dio->dio_internal) { ! hunk = ALLOC_ONE(diffhunk_T); ! if (hunk == NULL) { ! emsg(_("E98: Cannot read diff output")); ! return; } + } ! for (;;) ! { ! if (dio->dio_internal) { ! if (line_idx >= dout->dout_ga.ga_len) { ! break; // did last line ! } ! hunk = ((diffhunk_T **)dout->dout_ga.ga_data)[line_idx++]; } else { ! if (fd == NULL) ! { ! if (line_idx >= dout->dout_ga.ga_len) ! break; // did last line ! line = ((char_u **)dout->dout_ga.ga_data)[line_idx++]; ! } ! else ! { ! if (vim_fgets(linebuf, LBUFLEN, fd)) ! break; // end of file ! line = linebuf; ! } ! ! if (diffstyle == DIFF_NONE) ! { ! // Determine diff style. ! // ed like diff looks like this: ! // {first}[,{last}]c{first}[,{last}] ! // {first}a{first}[,{last}] ! // {first}[,{last}]d{first} ! // ! // unified diff looks like this: ! // --- file1 2018-03-20 13:23:35.783153140 +0100 ! // +++ file2 2018-03-20 13:23:41.183156066 +0100 ! // @@ -1,3 +1,5 @@ ! if (isdigit(*line)) ! diffstyle = DIFF_ED; ! else if ((STRNCMP(line, "@@ ", 3) == 0)) ! diffstyle = DIFF_UNIFIED; ! else if ((STRNCMP(line, "--- ", 4) == 0) ! && (vim_fgets(linebuf, LBUFLEN, fd) == 0) ! && (STRNCMP(line, "+++ ", 4) == 0) ! && (vim_fgets(linebuf, LBUFLEN, fd) == 0) ! && (STRNCMP(line, "@@ ", 3) == 0)) ! diffstyle = DIFF_UNIFIED; ! else ! // Format not recognized yet, skip over this line. Cygwin ! // diff may put a warning at the start of the file. ! continue; ! } ! ! if (diffstyle == DIFF_ED) ! { ! if (!isdigit(*line)) ! continue; // not the start of a diff block ! if (parse_diff_ed(line, hunk) == FAIL) ! continue; ! } ! else if (diffstyle == DIFF_UNIFIED) ! { ! if (STRNCMP(line, "@@ ", 3) != 0) ! continue; // not the start of a diff block ! if (parse_diff_unified(line, hunk) == FAIL) ! continue; ! } ! else ! { ! emsg(_("E959: Invalid diff format.")); ! break; ! } } // Go over blocks before the change, for which orig and new are equal. // Copy blocks from orig to new. while (dp != NULL ! && hunk->lnum_orig > dp->df_lnum[idx_orig] ! + dp->df_count[idx_orig]) { if (notset) diff_copy_entry(dprev, dp, idx_orig, idx_new); *************** *** 1719,1759 **** } if (dp != NULL ! && lnum_orig <= dp->df_lnum[idx_orig] + dp->df_count[idx_orig] ! && lnum_orig + count_orig >= dp->df_lnum[idx_orig]) { // New block overlaps with existing block(s). // First find last block that overlaps. for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next) ! if (lnum_orig + count_orig < dpl->df_next->df_lnum[idx_orig]) break; // If the newly found block starts before the old one, set the // start back a number of lines. ! off = dp->df_lnum[idx_orig] - lnum_orig; if (off > 0) { for (i = idx_orig; i < idx_new; ++i) if (curtab->tp_diffbuf[i] != NULL) dp->df_lnum[i] -= off; ! dp->df_lnum[idx_new] = lnum_new; ! dp->df_count[idx_new] = count_new; } else if (notset) { // new block inside existing one, adjust new block ! dp->df_lnum[idx_new] = lnum_new + off; ! dp->df_count[idx_new] = count_new - off; } else // second overlap of new block with existing block ! dp->df_count[idx_new] += count_new - count_orig + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig] - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]); // Adjust the size of the block to include all the lines to the // end of the existing block or the new diff, whatever ends last. ! off = (lnum_orig + count_orig) - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]); if (off < 0) { --- 1747,1789 ---- } if (dp != NULL ! && hunk->lnum_orig <= dp->df_lnum[idx_orig] ! + dp->df_count[idx_orig] ! && hunk->lnum_orig + hunk->count_orig >= dp->df_lnum[idx_orig]) { // New block overlaps with existing block(s). // First find last block that overlaps. for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next) ! if (hunk->lnum_orig + hunk->count_orig ! < dpl->df_next->df_lnum[idx_orig]) break; // If the newly found block starts before the old one, set the // start back a number of lines. ! off = dp->df_lnum[idx_orig] - hunk->lnum_orig; if (off > 0) { for (i = idx_orig; i < idx_new; ++i) if (curtab->tp_diffbuf[i] != NULL) dp->df_lnum[i] -= off; ! dp->df_lnum[idx_new] = hunk->lnum_new; ! dp->df_count[idx_new] = hunk->count_new; } else if (notset) { // new block inside existing one, adjust new block ! dp->df_lnum[idx_new] = hunk->lnum_new + off; ! dp->df_count[idx_new] = hunk->count_new - off; } else // second overlap of new block with existing block ! dp->df_count[idx_new] += hunk->count_new - hunk->count_orig + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig] - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]); // Adjust the size of the block to include all the lines to the // end of the existing block or the new diff, whatever ends last. ! off = (hunk->lnum_orig + hunk->count_orig) - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]); if (off < 0) { *************** *** 1785,1794 **** if (dp == NULL) goto done; ! dp->df_lnum[idx_orig] = lnum_orig; ! dp->df_count[idx_orig] = count_orig; ! dp->df_lnum[idx_new] = lnum_new; ! dp->df_count[idx_new] = count_new; // Set values for other buffers, these must be equal to the // original buffer, otherwise there would have been a change --- 1815,1824 ---- if (dp == NULL) goto done; ! dp->df_lnum[idx_orig] = hunk->lnum_orig; ! dp->df_count[idx_orig] = hunk->count_orig; ! dp->df_lnum[idx_new] = hunk->lnum_new; ! dp->df_count[idx_new] = hunk->count_new; // Set values for other buffers, these must be equal to the // original buffer, otherwise there would have been a change *************** *** 1811,1816 **** --- 1841,1849 ---- } done: + if (!dio->dio_internal) + vim_free(hunk); + if (fd != NULL) fclose(fd); } *************** *** 3137,3146 **** static int parse_diff_ed( char_u *line, ! linenr_T *lnum_orig, ! long *count_orig, ! linenr_T *lnum_new, ! long *count_new) { char_u *p; long f1, l1, f2, l2; --- 3170,3176 ---- static int parse_diff_ed( char_u *line, ! diffhunk_T *hunk) { char_u *p; long f1, l1, f2, l2; *************** *** 3175,3197 **** if (difftype == 'a') { ! *lnum_orig = f1 + 1; ! *count_orig = 0; } else { ! *lnum_orig = f1; ! *count_orig = l1 - f1 + 1; } if (difftype == 'd') { ! *lnum_new = f2 + 1; ! *count_new = 0; } else { ! *lnum_new = f2; ! *count_new = l2 - f2 + 1; } return OK; } --- 3205,3227 ---- if (difftype == 'a') { ! hunk->lnum_orig = f1 + 1; ! hunk->count_orig = 0; } else { ! hunk->lnum_orig = f1; ! hunk->count_orig = l1 - f1 + 1; } if (difftype == 'd') { ! hunk->lnum_new = f2 + 1; ! hunk->count_new = 0; } else { ! hunk->lnum_new = f2; ! hunk->count_new = l2 - f2 + 1; } return OK; } *************** *** 3203,3212 **** static int parse_diff_unified( char_u *line, ! linenr_T *lnum_orig, ! long *count_orig, ! linenr_T *lnum_new, ! long *count_new) { char_u *p; long oldline, oldcount, newline, newcount; --- 3233,3239 ---- static int parse_diff_unified( char_u *line, ! diffhunk_T *hunk) { char_u *p; long oldline, oldcount, newline, newcount; *************** *** 3245,3254 **** if (newline == 0) newline = 1; ! *lnum_orig = oldline; ! *count_orig = oldcount; ! *lnum_new = newline; ! *count_new = newcount; return OK; } --- 3272,3281 ---- if (newline == 0) newline = 1; ! hunk->lnum_orig = oldline; ! hunk->count_orig = oldcount; ! hunk->lnum_new = newline; ! hunk->count_new = newcount; return OK; } *************** *** 3261,3286 **** * Stores the diff output in a grow array. */ static int ! xdiff_out(void *priv, mmbuffer_t *mb, int nbuf) { diffout_T *dout = (diffout_T *)priv; ! char_u *p; ! ! // The header line always comes by itself, text lines in at least two ! // parts. We drop the text part. ! if (nbuf > 1) ! return 0; ! // sanity check ! if (STRNCMP(mb[0].ptr, "@@ ", 3) != 0) ! return 0; if (ga_grow(&dout->dout_ga, 1) == FAIL) return -1; ! p = vim_strnsave((char_u *)mb[0].ptr, mb[0].size); ! if (p == NULL) ! return -1; ! ((char_u **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p; return 0; } --- 3288,3314 ---- * Stores the diff output in a grow array. */ static int ! xdiff_out( ! long start_a, ! long count_a, ! long start_b, ! long count_b, ! void *priv) { diffout_T *dout = (diffout_T *)priv; ! diffhunk_T *p = ALLOC_ONE(diffhunk_T); ! if (p == NULL) ! return -1; if (ga_grow(&dout->dout_ga, 1) == FAIL) return -1; ! ! p->lnum_orig = start_a + 1; ! p->count_orig = count_a; ! p->lnum_new = start_b + 1; ! p->count_new = count_b; ! ((diffhunk_T **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p; return 0; } *** ../vim-8.2.3920/src/version.c 2021-12-28 13:15:02.444896152 +0000 --- src/version.c 2021-12-28 13:51:02.295452062 +0000 *************** *** 751,752 **** --- 751,754 ---- { /* Add new patch number below this line */ + /**/ + 3921, /**/ -- You cannot have a baby in one month by getting nine women pregnant. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///