To: vim_dev@googlegroups.com Subject: Patch 8.2.4562 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.4562 Problem: Linear tag search is not optimal. Solution: Improve linear tag search performance. (Yegappan Lakshmanan, closes #9944) Files: src/tag.c *** ../vim-8.2.4561/src/tag.c 2022-03-12 17:38:26.354365193 +0000 --- src/tag.c 2022-03-13 19:22:12.280550180 +0000 *************** *** 1642,1675 **** typedef struct { tagsearch_state_T state; // tag search state char_u *tag_fname; // name of the tag file FILE *fp; // current tags file pointer - pat_T orgpat; // holds unconverted pattern info int flags; // flags used for tag search int tag_file_sorted; // !_TAG_FILE_SORTED value int get_searchpat; // used for 'showfulltag' int help_only; // only search for help tags - vimconv_T vimconv; - #ifdef FEAT_MULTI_LANG - char_u help_lang[3]; // lang of current tags file - int help_pri; // help language priority - char_u *help_lang_find; // lang to be found - int is_txt; // flag of file extension - #endif int did_open; // did open a tag file int mincount; // MAXCOL: find all matches // other: minimal number of matches int linear; // do a linear search ! char_u *lbuf; // line buffer ! int lbuf_size; // length of lbuf #ifdef FEAT_EMACS_TAGS int is_etag; // current file is emacs style char_u *ebuf; // additional buffer for etag fname #endif int match_count; // number of matches found garray_T ga_match[MT_COUNT]; // stores matches in sequence hashtab_T ht_match[MT_COUNT]; // stores matches by key - int stop_searching; // stop when match found or error } findtags_state_T; /* --- 1642,1675 ---- typedef struct { tagsearch_state_T state; // tag search state + int stop_searching; // stop when match found or error + pat_T *orgpat; // holds unconverted pattern info + char_u *lbuf; // line buffer + int lbuf_size; // length of lbuf char_u *tag_fname; // name of the tag file FILE *fp; // current tags file pointer int flags; // flags used for tag search int tag_file_sorted; // !_TAG_FILE_SORTED value int get_searchpat; // used for 'showfulltag' int help_only; // only search for help tags int did_open; // did open a tag file int mincount; // MAXCOL: find all matches // other: minimal number of matches int linear; // do a linear search ! vimconv_T vimconv; #ifdef FEAT_EMACS_TAGS int is_etag; // current file is emacs style char_u *ebuf; // additional buffer for etag fname #endif + #ifdef FEAT_MULTI_LANG + char_u help_lang[3]; // lang of current tags file + int help_pri; // help language priority + char_u *help_lang_find; // lang to be found + int is_txt; // flag of file extension + #endif int match_count; // number of matches found garray_T ga_match[MT_COUNT]; // stores matches in sequence hashtab_T ht_match[MT_COUNT]; // stores matches by key } findtags_state_T; /* *************** *** 1687,1695 **** st->tag_fname = alloc(MAXPATHL + 1); st->fp = NULL; ! st->orgpat.pat = pat; ! st->orgpat.len = (int)STRLEN(pat); ! st->orgpat.regmatch.regprog = NULL; st->flags = flags; st->tag_file_sorted = NUL; st->help_only = (flags & TAG_HELP); --- 1687,1696 ---- st->tag_fname = alloc(MAXPATHL + 1); st->fp = NULL; ! st->orgpat = ALLOC_ONE(pat_T); ! st->orgpat->pat = pat; ! st->orgpat->len = (int)STRLEN(pat); ! st->orgpat->regmatch.regprog = NULL; st->flags = flags; st->tag_file_sorted = NUL; st->help_only = (flags & TAG_HELP); *************** *** 1736,1742 **** { vim_free(st->tag_fname); vim_free(st->lbuf); ! vim_regfree(st->orgpat.regmatch.regprog); #ifdef FEAT_EMACS_TAGS vim_free(st->ebuf); #endif --- 1737,1744 ---- { vim_free(st->tag_fname); vim_free(st->lbuf); ! vim_regfree(st->orgpat->regmatch.regprog); ! vim_free(st->orgpat); #ifdef FEAT_EMACS_TAGS vim_free(st->ebuf); #endif *************** *** 1940,1945 **** --- 1942,1948 ---- st->fp = incstack[incstack_idx].fp; STRCPY(st->tag_fname, incstack[incstack_idx].etag_fname); vim_free(incstack[incstack_idx].etag_fname); + st->is_etag = TRUE; // (only etags can include) return TRUE; } *************** *** 2099,2110 **** { #ifdef FEAT_EMACS_TAGS if (emacs_tags_file_eof(st) == TRUE) - { // an included tags file. Continue processing the parent // tags file. - st->is_etag = TRUE; // (only etags can include) return TAGS_READ_IGNORE; - } #endif return TAGS_READ_EOF; } --- 2102,2110 ---- *************** *** 2193,2204 **** { st->state = TS_BINARY; *sortic = TRUE; ! st->orgpat.regmatch.rm_ic = (p_ic || !noic); } else st->state = TS_LINEAR; ! if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic) { // Binary search won't work for ignoring case, use linear // search. --- 2193,2204 ---- { st->state = TS_BINARY; *sortic = TRUE; ! st->orgpat->regmatch.rm_ic = (p_ic || !noic); } else st->state = TS_LINEAR; ! if (st->state == TS_BINARY && st->orgpat->regmatch.rm_ic && !*sortic) { // Binary search won't work for ignoring case, use linear // search. *************** *** 2241,2254 **** * Returns FAIL if a format error is encountered. */ static int ! findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp) { int status; // Figure out where the different strings are in this line. // For "normal" tags: Do a quick check if the tag matches. // This speeds up tag searching a lot! ! if (st->orgpat.headlen #ifdef FEAT_EMACS_TAGS && !st->is_etag #endif --- 2241,2261 ---- * Returns FAIL if a format error is encountered. */ static int ! findtags_parse_line( ! findtags_state_T *st, ! tagptrs_T *tagpp, ! findtags_match_args_T *margs, ! tagsearch_info_T *sinfo_p) { int status; + int i; + int cmplen; + int tagcmp; // Figure out where the different strings are in this line. // For "normal" tags: Do a quick check if the tag matches. // This speeds up tag searching a lot! ! if (st->orgpat->headlen #ifdef FEAT_EMACS_TAGS && !st->is_etag #endif *************** *** 2258,2346 **** tagpp->tagname = st->lbuf; tagpp->tagname_end = vim_strchr(st->lbuf, TAB); if (tagpp->tagname_end == NULL) - { // Corrupted tag line. ! return FAIL; ! } ! ! // Can be a matching tag, isolate the file name and command. ! tagpp->fname = tagpp->tagname_end + 1; ! tagpp->fname_end = vim_strchr(tagpp->fname, TAB); ! if (tagpp->fname_end == NULL) ! status = FAIL; ! else ! { ! tagpp->command = tagpp->fname_end + 1; ! status = OK; ! } ! } ! else ! status = parse_tag_line(st->lbuf, ! #ifdef FEAT_EMACS_TAGS ! st->is_etag, ! #endif ! tagpp); ! ! if (status == FAIL) ! return FAIL; ! ! #ifdef FEAT_EMACS_TAGS ! if (st->is_etag) ! tagpp->fname = st->ebuf; ! #endif ! ! return OK; ! } ! ! /* ! * Initialize the structure used for tag matching. ! */ ! static void ! findtags_matchargs_init(findtags_match_args_T *margs, int flags) ! { ! margs->matchoff = 0; // match offset ! margs->match_re = FALSE; // match with regexp ! margs->match_no_ic = FALSE; // matches with case ! margs->has_re = (flags & TAG_REGEXP); // regexp used ! margs->sortic = FALSE; // tag file sorted in nocase ! margs->sort_error = FALSE; // tags file not sorted ! } ! ! /* ! * Compares the tag name in 'tagpp->tagname' with a search pattern in ! * 'st->orgpat.head'. ! * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag ! * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a ! * binary search) and TAG_MATCH_STOP if all the tags are processed without a ! * match. Uses the values in 'margs' for doing the comparison. ! */ ! static tagmatch_status_T ! findtags_match_tag( ! findtags_state_T *st, ! tagptrs_T *tagpp, ! findtags_match_args_T *margs, ! tagsearch_info_T *sinfo_p) ! { ! int match = FALSE; ! int cmplen; ! int i; ! int tagcmp; ! // Skip this line if the length of the tag is different and ! // there is no regexp, or the tag is too short. ! if (st->orgpat.headlen ! #ifdef FEAT_EMACS_TAGS ! && !st->is_etag ! #endif ! ) ! { cmplen = (int)(tagpp->tagname_end - tagpp->tagname); if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; ! if (margs->has_re && st->orgpat.headlen < cmplen) ! cmplen = st->orgpat.headlen; ! else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen) ! return TAG_MATCH_FAIL; if (st->state == TS_BINARY) { --- 2265,2282 ---- tagpp->tagname = st->lbuf; tagpp->tagname_end = vim_strchr(st->lbuf, TAB); if (tagpp->tagname_end == NULL) // Corrupted tag line. ! return TAG_MATCH_FAIL; ! // Skip this line if the length of the tag is different and ! // there is no regexp, or the tag is too short. cmplen = (int)(tagpp->tagname_end - tagpp->tagname); if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; ! if ((st->flags & TAG_REGEXP) && st->orgpat->headlen < cmplen) ! cmplen = st->orgpat->headlen; ! else if (st->state == TS_LINEAR && st->orgpat->headlen != cmplen) ! return TAG_MATCH_NEXT; if (st->state == TS_BINARY) { *************** *** 2353,2370 **** // Compare the current tag with the searched tag. if (margs->sortic) ! tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat.head, (size_t)cmplen); else ! tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen); // A match with a shorter tag means to search forward. // A match with a longer tag means to search backward. if (tagcmp == 0) { ! if (cmplen < st->orgpat.headlen) tagcmp = -1; ! else if (cmplen > st->orgpat.headlen) tagcmp = 1; } --- 2289,2306 ---- // Compare the current tag with the searched tag. if (margs->sortic) ! tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat->head, (size_t)cmplen); else ! tagcmp = STRNCMP(tagpp->tagname, st->orgpat->head, cmplen); // A match with a shorter tag means to search forward. // A match with a longer tag means to search backward. if (tagcmp == 0) { ! if (cmplen < st->orgpat->headlen) tagcmp = -1; ! else if (cmplen > st->orgpat->headlen) tagcmp = 1; } *************** *** 2404,2410 **** } else if (st->state == TS_SKIP_BACK) { ! if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) st->state = TS_STEP_FORWARD; else // Have to skip back more. Restore the curr_offset --- 2340,2346 ---- } else if (st->state == TS_SKIP_BACK) { ! if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) st->state = TS_STEP_FORWARD; else // Have to skip back more. Restore the curr_offset *************** *** 2414,2420 **** } else if (st->state == TS_STEP_FORWARD) { ! if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) { if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset) return TAG_MATCH_STOP; // past last match --- 2350,2356 ---- } else if (st->state == TS_STEP_FORWARD) { ! if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) { if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset) return TAG_MATCH_STOP; // past last match *************** *** 2424,2432 **** } else // skip this match if it can't match ! if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) ! return TAG_MATCH_FAIL; } // First try matching with the pattern literally (also when it is // a regexp). --- 2360,2427 ---- } else // skip this match if it can't match ! if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) ! return TAG_MATCH_NEXT; ! ! // Can be a matching tag, isolate the file name and command. ! tagpp->fname = tagpp->tagname_end + 1; ! tagpp->fname_end = vim_strchr(tagpp->fname, TAB); ! if (tagpp->fname_end == NULL) ! status = FAIL; ! else ! { ! tagpp->command = tagpp->fname_end + 1; ! status = OK; ! } } + else + status = parse_tag_line(st->lbuf, + #ifdef FEAT_EMACS_TAGS + st->is_etag, + #endif + tagpp); + + if (status == FAIL) + return TAG_MATCH_FAIL; + + #ifdef FEAT_EMACS_TAGS + if (st->is_etag) + tagpp->fname = st->ebuf; + #endif + + return TAG_MATCH_SUCCESS; + } + + /* + * Initialize the structure used for tag matching. + */ + static void + findtags_matchargs_init(findtags_match_args_T *margs, int flags) + { + margs->matchoff = 0; // match offset + margs->match_re = FALSE; // match with regexp + margs->match_no_ic = FALSE; // matches with case + margs->has_re = (flags & TAG_REGEXP); // regexp used + margs->sortic = FALSE; // tag file sorted in nocase + margs->sort_error = FALSE; // tags file not sorted + } + + /* + * Compares the tag name in 'tagpp->tagname' with a search pattern in + * 'st->orgpat.head'. + * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag + * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a + * binary search) and TAG_MATCH_STOP if all the tags are processed without a + * match. Uses the values in 'margs' for doing the comparison. + */ + static tagmatch_status_T + findtags_match_tag( + findtags_state_T *st, + tagptrs_T *tagpp, + findtags_match_args_T *margs) + { + int match = FALSE; + int cmplen; // First try matching with the pattern literally (also when it is // a regexp). *************** *** 2434,2474 **** if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; // if tag length does not match, don't try comparing ! if (st->orgpat.len != cmplen) match = FALSE; else { ! if (st->orgpat.regmatch.rm_ic) { match = ! (MB_STRNICMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); if (match) margs->match_no_ic = ! (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); } else ! match = (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); } // Has a regexp: Also find tags matching regexp. margs->match_re = FALSE; ! if (!match && st->orgpat.regmatch.regprog != NULL) { int cc; cc = *tagpp->tagname_end; *tagpp->tagname_end = NUL; ! match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0); if (match) { ! margs->matchoff = (int)(st->orgpat.regmatch.startp[0] - tagpp->tagname); ! if (st->orgpat.regmatch.rm_ic) { ! st->orgpat.regmatch.rm_ic = FALSE; ! margs->match_no_ic = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0); ! st->orgpat.regmatch.rm_ic = TRUE; } } *tagpp->tagname_end = cc; --- 2429,2469 ---- if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; // if tag length does not match, don't try comparing ! if (st->orgpat->len != cmplen) match = FALSE; else { ! if (st->orgpat->regmatch.rm_ic) { match = ! (MB_STRNICMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); if (match) margs->match_no_ic = ! (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); } else ! match = (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); } // Has a regexp: Also find tags matching regexp. margs->match_re = FALSE; ! if (!match && st->orgpat->regmatch.regprog != NULL) { int cc; cc = *tagpp->tagname_end; *tagpp->tagname_end = NUL; ! match = vim_regexec(&st->orgpat->regmatch, tagpp->tagname, (colnr_T)0); if (match) { ! margs->matchoff = (int)(st->orgpat->regmatch.startp[0] - tagpp->tagname); ! if (st->orgpat->regmatch.rm_ic) { ! st->orgpat->regmatch.rm_ic = FALSE; ! margs->match_no_ic = vim_regexec(&st->orgpat->regmatch, tagpp->tagname, (colnr_T)0); ! st->orgpat->regmatch.rm_ic = TRUE; } } *tagpp->tagname_end = cc; *************** *** 2571,2577 **** else mtt = MT_GL_OTH; } ! if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic) mtt += MT_IC_OFF; if (margs->match_re) mtt += MT_RE_OFF; --- 2566,2572 ---- else mtt = MT_GL_OTH; } ! if (st->orgpat->regmatch.rm_ic && !margs->match_no_ic) mtt += MT_IC_OFF; if (margs->match_re) mtt += MT_RE_OFF; *************** *** 2857,2863 **** continue; } ! if (findtags_parse_line(st, &tagp) == FAIL) { semsg(_(e_format_error_in_tags_file_str), st->tag_fname); #ifdef FEAT_CSCOPE --- 2852,2863 ---- continue; } ! retval = findtags_parse_line(st, &tagp, margs, &search_info); ! if (retval == TAG_MATCH_NEXT) ! continue; ! if (retval == TAG_MATCH_STOP) ! break; ! if (retval == TAG_MATCH_FAIL) { semsg(_(e_format_error_in_tags_file_str), st->tag_fname); #ifdef FEAT_CSCOPE *************** *** 2868,2874 **** return; } ! retval = findtags_match_tag(st, &tagp, margs, &search_info); if (retval == TAG_MATCH_NEXT) continue; if (retval == TAG_MATCH_STOP) --- 2868,2874 ---- return; } ! retval = findtags_match_tag(st, &tagp, margs); if (retval == TAG_MATCH_NEXT) continue; if (retval == TAG_MATCH_STOP) *************** *** 2895,2901 **** { findtags_match_args_T margs; #ifdef FEAT_CSCOPE ! int use_cscope = FALSE; #endif st->vimconv.vc_type = CONV_NONE; --- 2895,2901 ---- { findtags_match_args_T margs; #ifdef FEAT_CSCOPE ! int use_cscope = (st->flags & TAG_CSCOPE); #endif st->vimconv.vc_type = CONV_NONE; *************** *** 2906,2912 **** // A file that doesn't exist is silently ignored. Only when not a // single file is found, an error message is given (further on). #ifdef FEAT_CSCOPE - use_cscope = (st->flags & TAG_CSCOPE); if (use_cscope) st->fp = NULL; // avoid GCC warning else --- 2906,2911 ---- *************** *** 3111,3138 **** { // When "@ab" is specified use only the "ab" language, otherwise // search all languages. ! if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@' ! && ASCII_ISALPHA(pat[st.orgpat.len - 2]) ! && ASCII_ISALPHA(pat[st.orgpat.len - 1])) { ! saved_pat = vim_strnsave(pat, st.orgpat.len - 3); if (saved_pat != NULL) { ! st.help_lang_find = &pat[st.orgpat.len - 2]; ! st.orgpat.pat = saved_pat; ! st.orgpat.len -= 3; } } } #endif ! if (p_tl != 0 && st.orgpat.len > p_tl) // adjust for 'taglength' ! st.orgpat.len = p_tl; save_emsg_off = emsg_off; emsg_off = TRUE; // don't want error for invalid RE here ! prepare_pats(&st.orgpat, has_re); emsg_off = save_emsg_off; ! if (has_re && st.orgpat.regmatch.regprog == NULL) goto findtag_end; #ifdef FEAT_EVAL --- 3110,3137 ---- { // When "@ab" is specified use only the "ab" language, otherwise // search all languages. ! if (st.orgpat->len > 3 && pat[st.orgpat->len - 3] == '@' ! && ASCII_ISALPHA(pat[st.orgpat->len - 2]) ! && ASCII_ISALPHA(pat[st.orgpat->len - 1])) { ! saved_pat = vim_strnsave(pat, st.orgpat->len - 3); if (saved_pat != NULL) { ! st.help_lang_find = &pat[st.orgpat->len - 2]; ! st.orgpat->pat = saved_pat; ! st.orgpat->len -= 3; } } } #endif ! if (p_tl != 0 && st.orgpat->len > p_tl) // adjust for 'taglength' ! st.orgpat->len = p_tl; save_emsg_off = emsg_off; emsg_off = TRUE; // don't want error for invalid RE here ! prepare_pats(st.orgpat, has_re); emsg_off = save_emsg_off; ! if (has_re && st.orgpat->regmatch.regprog == NULL) goto findtag_end; #ifdef FEAT_EVAL *************** *** 3164,3174 **** * When the tag file is case-fold sorted, it is either one or the other. * Only ignore case when TAG_NOIC not used or 'ignorecase' set. */ ! st.orgpat.regmatch.rm_ic = ((p_ic || !noic) ! && (findall || st.orgpat.headlen == 0 || !p_tbs)); for (round = 1; round <= 2; ++round) { ! st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2); /* * Try tag file names from tags option one by one. --- 3163,3173 ---- * When the tag file is case-fold sorted, it is either one or the other. * Only ignore case when TAG_NOIC not used or 'ignorecase' set. */ ! st.orgpat->regmatch.rm_ic = ((p_ic || !noic) ! && (findall || st.orgpat->headlen == 0 || !p_tbs)); for (round = 1; round <= 2; ++round) { ! st.linear = (st.orgpat->headlen == 0 || !p_tbs || round == 2); /* * Try tag file names from tags option one by one. *************** *** 3200,3206 **** // stop searching when already did a linear search, or when TAG_NOIC // used, and 'ignorecase' not set or already did case-ignore search if (st.stop_searching || st.linear || (!p_ic && noic) || ! st.orgpat.regmatch.rm_ic) break; # ifdef FEAT_CSCOPE if (use_cscope) --- 3199,3205 ---- // stop searching when already did a linear search, or when TAG_NOIC // used, and 'ignorecase' not set or already did case-ignore search if (st.stop_searching || st.linear || (!p_ic && noic) || ! st.orgpat->regmatch.rm_ic) break; # ifdef FEAT_CSCOPE if (use_cscope) *************** *** 3208,3214 **** # endif // try another time while ignoring case ! st.orgpat.regmatch.rm_ic = TRUE; } if (!st.stop_searching) --- 3207,3213 ---- # endif // try another time while ignoring case ! st.orgpat->regmatch.rm_ic = TRUE; } if (!st.stop_searching) *** ../vim-8.2.4561/src/version.c 2022-03-13 19:08:44.685537723 +0000 --- src/version.c 2022-03-13 19:23:35.912344047 +0000 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 4562, /**/ -- hundred-and-one symptoms of being an internet addict: 254. You wake up daily with your keyboard printed on your forehead. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///