To: vim_dev@googlegroups.com Subject: Patch 8.0.0190 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.0.0190 Problem: Detecting duplicate tags uses a slow linear search. Solution: Use a much faster hash table solution. (James McCoy, closes #1046) But don't add hi_keylen, it makes hash tables 50% bigger. Files: src/tag.c *** ../vim-8.0.0189/src/tag.c 2016-12-01 21:32:28.678025257 +0100 --- src/tag.c 2017-01-15 16:47:11.816668598 +0100 *************** *** 35,43 **** } tagptrs_T; /* ! * The matching tags are first stored in ga_match[]. In which one depends on ! * the priority of the match. ! * At the end, the matches from ga_match[] are concatenated, to make a list * sorted on priority. */ #define MT_ST_CUR 0 /* static match in current file */ --- 35,43 ---- } tagptrs_T; /* ! * The matching tags are first stored in one of the ht_match[] hash tables. In ! * which one depends on the priority of the match. ! * At the end, all the matches from ht_match[] are concatenated, to make a list * sorted on priority. */ #define MT_ST_CUR 0 /* static match in current file */ *************** *** 1341,1352 **** int is_etag; /* current file is emaces style */ #endif ! struct match_found ! { ! int len; /* nr of chars of match[] to be compared */ ! char_u match[1]; /* actually longer */ ! } *mfp, *mfp2; ! garray_T ga_match[MT_COUNT]; int match_count = 0; /* number of matches found */ char_u **matches; int mtt; --- 1341,1349 ---- int is_etag; /* current file is emaces style */ #endif ! char_u *mfp; ! hashtab_T ht_match[MT_COUNT]; ! hash_T hash = 0; int match_count = 0; /* number of matches found */ char_u **matches; int mtt; *************** *** 1402,1417 **** vimconv.vc_type = CONV_NONE; #endif ! /* ! * Allocate memory for the buffers that are used ! */ lbuf = alloc(lbuf_size); tag_fname = alloc(MAXPATHL + 1); #ifdef FEAT_EMACS_TAGS ebuf = alloc(LSIZE); #endif for (mtt = 0; mtt < MT_COUNT; ++mtt) ! ga_init2(&ga_match[mtt], (int)sizeof(struct match_found *), 100); /* check for out of memory situation */ if (lbuf == NULL || tag_fname == NULL --- 1399,1414 ---- vimconv.vc_type = CONV_NONE; #endif ! /* ! * Allocate memory for the buffers that are used ! */ lbuf = alloc(lbuf_size); tag_fname = alloc(MAXPATHL + 1); #ifdef FEAT_EMACS_TAGS ebuf = alloc(LSIZE); #endif for (mtt = 0; mtt < MT_COUNT; ++mtt) ! hash_init(&ht_match[mtt]); /* check for out of memory situation */ if (lbuf == NULL || tag_fname == NULL *************** *** 2206,2215 **** } /* ! * If a match is found, add it to ga_match[]. */ if (match) { #ifdef FEAT_CSCOPE if (use_cscope) { --- 2203,2214 ---- } /* ! * If a match is found, add it to ht_match[]. */ if (match) { + int len = 0; + #ifdef FEAT_CSCOPE if (use_cscope) { *************** *** 2262,2440 **** } /* ! * Add the found match in ga_match[mtt], avoiding duplicates. * Store the info we need later, which depends on the kind of * tags we are dealing with. */ ! if (ga_grow(&ga_match[mtt], 1) == OK) { - int len; - int heuristic; - - if (help_only) - { #ifdef FEAT_MULTI_LANG # define ML_EXTRA 3 #else # define ML_EXTRA 0 #endif ! /* ! * Append the help-heuristic number after the ! * tagname, for sorting it later. ! */ ! *tagp.tagname_end = NUL; ! len = (int)(tagp.tagname_end - tagp.tagname); ! mfp = (struct match_found *) ! alloc((int)sizeof(struct match_found) + len ! + 10 + ML_EXTRA); ! if (mfp != NULL) ! { ! /* "len" includes the language and the NUL, but ! * not the priority. */ ! mfp->len = len + ML_EXTRA + 1; ! #define ML_HELP_LEN 6 ! p = mfp->match; ! STRCPY(p, tagp.tagname); #ifdef FEAT_MULTI_LANG ! p[len] = '@'; ! STRCPY(p + len + 1, help_lang); #endif ! heuristic = help_heuristic(tagp.tagname, ! match_re ? matchoff : 0, !match_no_ic); #ifdef FEAT_MULTI_LANG ! heuristic += help_pri; #endif ! sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", ! heuristic); ! } ! *tagp.tagname_end = TAB; } ! else if (name_only) { ! if (get_it_again) ! { ! char_u *temp_end = tagp.command; ! if (*temp_end == '/') ! while (*temp_end && *temp_end != '\r' ! && *temp_end != '\n' ! && *temp_end != '$') ! temp_end++; ! if (tagp.command + 2 < temp_end) ! { ! len = (int)(temp_end - tagp.command - 2); ! mfp = (struct match_found *)alloc( ! (int)sizeof(struct match_found) + len); ! if (mfp != NULL) ! { ! mfp->len = len + 1; /* include the NUL */ ! p = mfp->match; ! vim_strncpy(p, tagp.command + 2, len); ! } ! } ! else ! mfp = NULL; ! get_it_again = FALSE; ! } ! else { ! len = (int)(tagp.tagname_end - tagp.tagname); ! mfp = (struct match_found *)alloc( ! (int)sizeof(struct match_found) + len); if (mfp != NULL) ! { ! mfp->len = len + 1; /* include the NUL */ ! p = mfp->match; ! vim_strncpy(p, tagp.tagname, len); ! } ! ! /* if wanted, re-read line to get long form too */ ! if (State & INSERT) ! get_it_again = p_sft; } } else { ! /* Save the tag in a buffer. ! * Emacs tag: ! * other tag: ! * without Emacs tags: ! */ ! len = (int)STRLEN(tag_fname) ! + (int)STRLEN(lbuf) + 3; #ifdef FEAT_EMACS_TAGS ! if (is_etag) ! len += (int)STRLEN(ebuf) + 1; ! else ! ++len; #endif ! mfp = (struct match_found *)alloc( ! (int)sizeof(struct match_found) + len); ! if (mfp != NULL) ! { ! mfp->len = len; ! p = mfp->match; ! p[0] = mtt; ! STRCPY(p + 1, tag_fname); #ifdef BACKSLASH_IN_FILENAME ! /* Ignore differences in slashes, avoid adding ! * both path/file and path\file. */ ! slash_adjust(p + 1); #endif ! s = p + 1 + STRLEN(tag_fname) + 1; #ifdef FEAT_EMACS_TAGS ! if (is_etag) ! { ! STRCPY(s, ebuf); ! s += STRLEN(ebuf) + 1; ! } ! else ! *s++ = NUL; ! #endif ! STRCPY(s, lbuf); } } ! if (mfp != NULL) ! { ! /* ! * Don't add identical matches. ! * This can take a lot of time when finding many ! * matches, check for CTRL-C now and then. ! * Add all cscope tags, because they are all listed. ! */ #ifdef FEAT_CSCOPE ! if (use_cscope) ! i = -1; ! else #endif ! for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; ) ! { ! mfp2 = ((struct match_found **) ! (ga_match[mtt].ga_data))[i]; ! if (mfp2->len == mfp->len ! && memcmp(mfp2->match, mfp->match, ! (size_t)mfp->len) == 0) ! break; ! fast_breakcheck(); ! } ! if (i < 0) { ! ((struct match_found **)(ga_match[mtt].ga_data)) ! [ga_match[mtt].ga_len++] = mfp; ! ++match_count; } else ! vim_free(mfp); } ! } ! else /* Out of memory! Just forget about the rest. */ ! { ! retval = OK; ! stop_searching = TRUE; ! break; } } #ifdef FEAT_CSCOPE --- 2261,2429 ---- } /* ! * Add the found match in ht_match[mtt]. * Store the info we need later, which depends on the kind of * tags we are dealing with. */ ! if (help_only) { #ifdef FEAT_MULTI_LANG # define ML_EXTRA 3 #else # define ML_EXTRA 0 #endif ! /* ! * Append the help-heuristic number after the tagname, for ! * sorting it later. The heuristic is ignored for ! * detecting duplicates. ! * The format is {tagname}@{lang}NUL{heuristic}NUL ! */ ! *tagp.tagname_end = NUL; ! len = (int)(tagp.tagname_end - tagp.tagname); ! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 10 + ML_EXTRA + 1); ! if (mfp != NULL) ! { ! int heuristic; ! ! p = mfp; ! STRCPY(p, tagp.tagname); #ifdef FEAT_MULTI_LANG ! p[len] = '@'; ! STRCPY(p + len + 1, help_lang); #endif ! heuristic = help_heuristic(tagp.tagname, ! match_re ? matchoff : 0, !match_no_ic); #ifdef FEAT_MULTI_LANG ! heuristic += help_pri; #endif ! sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", ! heuristic); } ! *tagp.tagname_end = TAB; ! } ! else if (name_only) ! { ! if (get_it_again) { ! char_u *temp_end = tagp.command; ! if (*temp_end == '/') ! while (*temp_end && *temp_end != '\r' ! && *temp_end != '\n' ! && *temp_end != '$') ! temp_end++; ! if (tagp.command + 2 < temp_end) { ! len = (int)(temp_end - tagp.command - 2); ! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1); if (mfp != NULL) ! vim_strncpy(mfp, tagp.command + 2, len); } + else + mfp = NULL; + get_it_again = FALSE; } else { ! len = (int)(tagp.tagname_end - tagp.tagname); ! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1); ! if (mfp != NULL) ! vim_strncpy(mfp, tagp.tagname, len); ! ! /* if wanted, re-read line to get long form too */ ! if (State & INSERT) ! get_it_again = p_sft; ! } ! } ! else ! { ! #define TAG_SEP 0x01 ! size_t tag_fname_len = STRLEN(tag_fname); #ifdef FEAT_EMACS_TAGS ! size_t ebuf_len = 0; #endif ! ! /* Save the tag in a buffer. ! * Use 0x01 to separate fields (Can't use NUL, because the ! * hash key is terminated by NUL). ! * Emacs tag: <0x01><0x01> ! * other tag: <0x01><0x01> ! * without Emacs tags: <0x01> ! */ ! len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3; ! #ifdef FEAT_EMACS_TAGS ! if (is_etag) ! { ! ebuf_len = STRLEN(ebuf); ! len += (int)ebuf_len + 1; ! } ! else ! ++len; ! #endif ! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1); ! if (mfp != NULL) ! { ! p = mfp; ! p[0] = mtt; ! STRCPY(p + 1, tag_fname); #ifdef BACKSLASH_IN_FILENAME ! /* Ignore differences in slashes, avoid adding ! * both path/file and path\file. */ ! slash_adjust(p + 1); #endif ! p[tag_fname_len + 1] = TAG_SEP; ! s = p + 1 + tag_fname_len + 1; #ifdef FEAT_EMACS_TAGS ! if (is_etag) ! { ! STRCPY(s, ebuf); ! s[ebuf_len] = TAG_SEP; ! s += ebuf_len + 1; } + else + *s++ = TAG_SEP; + #endif + STRCPY(s, lbuf); } + } ! if (mfp != NULL) ! { ! hashitem_T *hi; ! ! /* ! * Don't add identical matches. ! * Add all cscope tags, because they are all listed. ! * "mfp" is used as a hash key, there is a NUL byte to end ! * the part matters for comparing, more bytes may follow ! * after it. E.g. help tags store the priority after the ! * NUL. ! */ #ifdef FEAT_CSCOPE ! if (use_cscope) ! hash++; ! else #endif ! hash = hash_hash(mfp); ! hi = hash_lookup(&ht_match[mtt], mfp, hash); ! if (HASHITEM_EMPTY(hi)) ! { ! if (hash_add_item(&ht_match[mtt], hi, mfp, hash) ! == FAIL) { ! /* Out of memory! Just forget about the rest. */ ! retval = OK; ! stop_searching = TRUE; ! break; } else ! ++match_count; } ! else ! /* duplicate tag, drop it */ ! vim_free(mfp); } } #ifdef FEAT_CSCOPE *************** *** 2532,2538 **** #endif /* ! * Move the matches from the ga_match[] arrays into one list of * matches. When retval == FAIL, free the matches. */ if (retval == FAIL) --- 2521,2527 ---- #endif /* ! * Move the matches from the ht_match[] arrays into one list of * matches. When retval == FAIL, free the matches. */ if (retval == FAIL) *************** *** 2546,2567 **** match_count = 0; for (mtt = 0; mtt < MT_COUNT; ++mtt) { ! for (i = 0; i < ga_match[mtt].ga_len; ++i) { ! mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i]; ! if (matches == NULL) ! vim_free(mfp); ! else { ! /* To avoid allocating memory again we turn the struct ! * match_found into a string. For help the priority was not ! * included in the length. */ ! mch_memmove(mfp, mfp->match, ! (size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0))); ! matches[match_count++] = (char_u *)mfp; } } ! ga_clear(&ga_match[mtt]); } *matchesp = matches; --- 2535,2563 ---- match_count = 0; for (mtt = 0; mtt < MT_COUNT; ++mtt) { ! hashitem_T *hi; ! long_u todo; ! ! todo = (long)ht_match[mtt].ht_used; ! for (hi = ht_match[mtt].ht_array; todo > 0; ++hi) { ! if (!HASHITEM_EMPTY(hi)) { ! mfp = hi->hi_key; ! if (matches == NULL) ! vim_free(mfp); ! else ! { ! /* now change the TAG_SEP back to NUL */ ! for (p = mfp; *p != NUL; ++p) ! if (*p == TAG_SEP) ! *p = NUL; ! matches[match_count++] = (char_u *)mfp; ! } ! todo--; } } ! hash_clear(&ht_match[mtt]); } *matchesp = matches; *** ../vim-8.0.0189/src/version.c 2017-01-15 15:22:14.162467173 +0100 --- src/version.c 2017-01-15 15:51:01.027012795 +0100 *************** *** 766,767 **** --- 766,769 ---- { /* Add new patch number below this line */ + /**/ + 190, /**/ -- This is an airconditioned room, do not open Windows. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///