To: vim_dev@googlegroups.com Subject: Patch 7.3.1169 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1169 Problem: New regexp engine: some work is done while executing a pattern, even though the result is predictable. Solution: Do the work while compiling the pattern. Files: src/regexp_nfa.c *** ../vim-7.3.1168/src/regexp_nfa.c 2013-06-11 18:42:28.000000000 +0200 --- src/regexp_nfa.c 2013-06-11 22:40:12.000000000 +0200 *************** *** 64,72 **** --- 64,76 ---- NFA_NOPEN, /* Start of subexpression marked with \%( */ NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ NFA_START_INVISIBLE, + NFA_START_INVISIBLE_FIRST, NFA_START_INVISIBLE_NEG, + NFA_START_INVISIBLE_NEG_FIRST, NFA_START_INVISIBLE_BEFORE, + NFA_START_INVISIBLE_BEFORE_FIRST, NFA_START_INVISIBLE_BEFORE_NEG, + NFA_START_INVISIBLE_BEFORE_NEG_FIRST, NFA_START_PATTERN, NFA_END_INVISIBLE, NFA_END_INVISIBLE_NEG, *************** *** 286,291 **** --- 290,296 ---- static int *re2post __ARGS((void)); static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1)); static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); + static void nfa_postprocess __ARGS((nfa_regprog_T *prog)); static int check_char_class __ARGS((int class, int c)); static void st_error __ARGS((int *postfix, int *end, int *p)); static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list)); *************** *** 297,302 **** --- 302,309 ---- static void nfa_regfree __ARGS((regprog_T *prog)); static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); + static int match_follows __ARGS((nfa_state_T *startstate, int depth)); + static int failure_chance __ARGS((nfa_state_T *state, int depth)); /* helper functions used when doing re2post() ... regatom() parsing */ #define EMIT(c) do { \ *************** *** 2040,2051 **** --- 2047,2066 ---- case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; + case NFA_START_INVISIBLE_FIRST: + STRCPY(code, "NFA_START_INVISIBLE_FIRST"); break; case NFA_START_INVISIBLE_NEG: STRCPY(code, "NFA_START_INVISIBLE_NEG"); break; + case NFA_START_INVISIBLE_NEG_FIRST: + STRCPY(code, "NFA_START_INVISIBLE_NEG_FIRST"); break; case NFA_START_INVISIBLE_BEFORE: STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; + case NFA_START_INVISIBLE_BEFORE_FIRST: + STRCPY(code, "NFA_START_INVISIBLE_BEFORE_FIRST"); break; case NFA_START_INVISIBLE_BEFORE_NEG: STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break; + case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: + STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG_FIRST"); break; case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break; *************** *** 3318,3323 **** --- 3333,3395 ---- #undef PUSH } + /* + * After building the NFA program, inspect it to add optimization hints. + */ + static void + nfa_postprocess(prog) + nfa_regprog_T *prog; + { + int i; + int c; + + for (i = 0; i < prog->nstate; ++i) + { + c = prog->state[i].c; + if (c == NFA_START_INVISIBLE + || c == NFA_START_INVISIBLE_NEG + || c == NFA_START_INVISIBLE_BEFORE + || c == NFA_START_INVISIBLE_BEFORE_NEG) + { + int directly; + + /* Do it directly when what follows is possibly the end of the + * match. */ + if (match_follows(prog->state[i].out1->out, 0)) + directly = TRUE; + else + { + int ch_invisible = failure_chance(prog->state[i].out, 0); + int ch_follows = failure_chance(prog->state[i].out1->out, 0); + + /* Postpone when the invisible match is expensive or has a + * lower chance of failing. */ + if (c == NFA_START_INVISIBLE_BEFORE + || c == NFA_START_INVISIBLE_BEFORE_NEG) + { + /* "before" matches are very expensive when + * unbounded, always prefer what follows then, + * unless what follows will always match. + * Otherwise strongly prefer what follows. */ + if (prog->state[i].val <= 0 && ch_follows > 0) + directly = FALSE; + else + directly = ch_follows * 10 < ch_invisible; + } + else + { + /* normal invisible, first do the one with the + * highest failure chance */ + directly = ch_follows < ch_invisible; + } + } + if (directly) + /* switch to the _FIRST state */ + ++prog->state[i].c; + } + } + } + /**************************************************************** * NFA execution code. ****************************************************************/ *************** *** 3457,3463 **** static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from)); static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs)); - static int match_follows __ARGS((nfa_state_T *startstate, int depth)); static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs)); static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int off)); static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip)); --- 3529,3534 ---- *************** *** 3703,3711 **** --- 3774,3786 ---- || match_follows(state->out1, depth + 1); case NFA_START_INVISIBLE: + case NFA_START_INVISIBLE_FIRST: case NFA_START_INVISIBLE_BEFORE: + case NFA_START_INVISIBLE_BEFORE_FIRST: case NFA_START_INVISIBLE_NEG: + case NFA_START_INVISIBLE_NEG_FIRST: case NFA_START_INVISIBLE_BEFORE_NEG: + case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: case NFA_COMPOSING: /* skip ahead to next state */ state = state->out1->out; *************** *** 4440,4446 **** } if (state->c == NFA_START_INVISIBLE_BEFORE ! || state->c == NFA_START_INVISIBLE_BEFORE_NEG) { /* The recursive match must end at the current position. When "pim" is * not NULL it specifies the current position. */ --- 4515,4523 ---- } if (state->c == NFA_START_INVISIBLE_BEFORE ! || state->c == NFA_START_INVISIBLE_BEFORE_FIRST ! || state->c == NFA_START_INVISIBLE_BEFORE_NEG ! || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST) { /* The recursive match must end at the current position. When "pim" is * not NULL it specifies the current position. */ *************** *** 4581,4587 **** return result; } - static int failure_chance __ARGS((nfa_state_T *state, int depth)); static int skip_to_start __ARGS((int c, colnr_T *colp)); static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text)); --- 4658,4663 ---- *************** *** 5093,5142 **** break; case NFA_START_INVISIBLE: case NFA_START_INVISIBLE_NEG: case NFA_START_INVISIBLE_BEFORE: case NFA_START_INVISIBLE_BEFORE_NEG: { - int directly = FALSE; - #ifdef ENABLE_LOG fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n", failure_chance(t->state->out, 0), failure_chance(t->state->out1->out, 0)); #endif ! /* Do it directly when what follows is possibly the end of ! * the match. ! * Do it directly if there already is a PIM. ! * Postpone when the invisible match is expensive or has a ! * lower chance of failing. */ ! if (match_follows(t->state->out1->out, 0) ! || t->pim.result != NFA_PIM_UNUSED) ! directly = TRUE; ! else ! { ! int ch_invisible = failure_chance(t->state->out, 0); ! int ch_follows = failure_chance(t->state->out1->out, 0); ! ! if (t->state->c == NFA_START_INVISIBLE_BEFORE ! || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG) ! { ! /* "before" matches are very expensive when ! * unbounded, always prefer what follows then, ! * unless what follows will always match. ! * Otherwise strongly prefer what follows. */ ! if (t->state->val <= 0 && ch_follows > 0) ! directly = FALSE; ! else ! directly = ch_follows * 10 < ch_invisible; ! } ! else ! { ! /* normal invisible, first do the one with the ! * highest failure chance */ ! directly = ch_follows < ch_invisible; ! } ! } ! if (directly) { /* * First try matching the invisible match, then what --- 5169,5194 ---- break; case NFA_START_INVISIBLE: + case NFA_START_INVISIBLE_FIRST: case NFA_START_INVISIBLE_NEG: + case NFA_START_INVISIBLE_NEG_FIRST: case NFA_START_INVISIBLE_BEFORE: + case NFA_START_INVISIBLE_BEFORE_FIRST: case NFA_START_INVISIBLE_BEFORE_NEG: + case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: { #ifdef ENABLE_LOG fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n", failure_chance(t->state->out, 0), failure_chance(t->state->out1->out, 0)); #endif ! /* Do it directly if there already is a PIM or when ! * nfa_postprocess() detected it will work better. */ ! if (t->pim.result != NFA_PIM_UNUSED ! || t->state->c == NFA_START_INVISIBLE_FIRST ! || t->state->c == NFA_START_INVISIBLE_NEG_FIRST ! || t->state->c == NFA_START_INVISIBLE_BEFORE_FIRST ! || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST) { /* * First try matching the invisible match, then what *************** *** 5148,5155 **** /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || t->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); --- 5200,5210 ---- /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || t->state->c == NFA_START_INVISIBLE_NEG_FIRST ! || t->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG ! || t->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); *************** *** 5920,5927 **** /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&pim->subs.norm, &m->norm); --- 5975,5985 ---- /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { /* Copy submatch info from the recursive call */ copy_sub_off(&pim->subs.norm, &m->norm); *************** *** 5944,5951 **** /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &pim->subs.norm); --- 6002,6012 ---- /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG ! || pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &pim->subs.norm); *************** *** 6413,6421 **** prog->has_backref = nfa_has_backref; prog->nsubexp = regnpar; prog->reganch = nfa_get_reganch(prog->start, 0); prog->regstart = nfa_get_regstart(prog->start, 0); - prog->match_text = nfa_get_match_text(prog->start); #ifdef ENABLE_LOG --- 6474,6483 ---- prog->has_backref = nfa_has_backref; prog->nsubexp = regnpar; + nfa_postprocess(prog); + prog->reganch = nfa_get_reganch(prog->start, 0); prog->regstart = nfa_get_regstart(prog->start, 0); prog->match_text = nfa_get_match_text(prog->start); #ifdef ENABLE_LOG *** ../vim-7.3.1168/src/version.c 2013-06-11 20:53:24.000000000 +0200 --- src/version.c 2013-06-11 22:43:27.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1169, /**/ -- hundred-and-one symptoms of being an internet addict: 156. You forget your friend's name but not her e-mail address. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///