To: vim_dev@googlegroups.com Subject: Patch 8.2.2278 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.2278 Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572) Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test_regexp_utf8.vim *** ../vim-8.2.2277/src/regexp.c 2020-09-07 18:53:18.383974577 +0200 --- src/regexp.c 2021-01-02 17:34:22.627683939 +0100 *************** *** 294,299 **** --- 294,300 ---- static char_u *regparse; // Input-scan pointer. static int regnpar; // () count. + static int wants_nfa; // regex should use NFA engine #ifdef FEAT_SYN_HL static int regnzpar; // \z() count. static int re_has_z; // \z item detected *************** *** 381,386 **** --- 382,390 ---- static char_u *cstrchr(char_u *, int); static int re_mult_next(char *what); static int reg_iswordc(int); + #ifdef FEAT_EVAL + static void report_re_switch(char_u *pat); + #endif static regengine_T bt_regengine; static regengine_T nfa_regengine; *************** *** 2662,2668 **** if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG ! if (regexp_engine != BACKTRACKING_ENGINE) // debugging log for NFA { FILE *f; f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); --- 2666,2672 ---- if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG ! if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine { FILE *f; f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); *************** *** 2686,2691 **** --- 2690,2698 ---- && called_emsg == called_emsg_before) { regexp_engine = BACKTRACKING_ENGINE; + #ifdef FEAT_EVAL + report_re_switch(expr); + #endif prog = bt_regengine.regcomp(expr, re_flags); } } *** ../vim-8.2.2277/src/regexp_nfa.c 2020-12-21 18:23:56.687287184 +0100 --- src/regexp_nfa.c 2021-01-02 17:40:10.658093030 +0100 *************** *** 253,258 **** --- 253,264 ---- static int *post_start; // holds the postfix form of r.e. static int *post_end; static int *post_ptr; + + // Set when the pattern should use the NFA engine. + // E.g. [[:upper:]] only allows 8bit characters for BT engine, + // while NFA engine handles multibyte characters correctly. + static int wants_nfa; + static int nstate; // Number of states in the NFA. static int istate; // Index in the state vector, used in alloc_state() *************** *** 306,311 **** --- 312,318 ---- return FAIL; post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = FALSE; rex.nfa_has_zend = FALSE; rex.nfa_has_backref = FALSE; *************** *** 1707,1712 **** --- 1714,1720 ---- EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = TRUE; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: *************** *** 1719,1724 **** --- 1727,1733 ---- EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = TRUE; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: *************** *** 2137,2145 **** // The engine is very inefficient (uses too many states) when the // maximum is much larger than the minimum and when the maximum is ! // large. Bail out if we can use the other engine. if ((nfa_re_flags & RE_AUTO) ! && (maxval > 500 || maxval > minval + 200)) return FAIL; // Ignore previous call to nfa_regatom() --- 2146,2160 ---- // The engine is very inefficient (uses too many states) when the // maximum is much larger than the minimum and when the maximum is ! // large. However, when maxval is MAX_LIMIT, it is okay, as this ! // will emit NFA_STAR. ! // Bail out if we can use the other engine, but only, when the ! // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} ! // does not work with with characters > 8 bit with the BT engine) if ((nfa_re_flags & RE_AUTO) ! && (maxval > 500 || maxval > minval + 200) ! && (maxval != MAX_LIMIT && minval < 200) ! && !wants_nfa) return FAIL; // Ignore previous call to nfa_regatom() *** ../vim-8.2.2277/src/testdir/test_regexp_utf8.vim 2020-12-21 14:54:28.844116987 +0100 --- src/testdir/test_regexp_utf8.vim 2021-01-02 17:34:22.627683939 +0100 *************** *** 510,515 **** --- 510,561 ---- bwipe! endfunc + " Check that [[:upper:]] matches for automatic engine + func Test_match_char_class_upper() + new + let _engine=®expengine + " Test 1: [[:upper:]]\{2,\} + set regexpengine=0 + call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) + call cursor(1,1) + let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\" + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 1') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + + " Test 2: [[:upper:]].\+ + let search_cmd='norm /\<[[:upper:]].\+\>' .. "\" + set regexpengine=0 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + set regexpengine=1 + exe search_cmd + call assert_equal(1, searchcount().total, 'TEST 2') + set regexpengine=2 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + + " Test 3: [[:lower:]]\+ + let search_cmd='norm /\<[[:lower:]]\+\>' .. "\" + set regexpengine=0 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 3 lower') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + + " clean up + let ®expengine=_engine + bwipe! + endfunc " vim: shiftwidth=2 sts=2 expandtab *** ../vim-8.2.2277/src/version.c 2021-01-02 17:06:12.538765972 +0100 --- src/version.c 2021-01-02 17:35:54.359249438 +0100 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 2278, /**/ -- How many light bulbs does it take to change a person? /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///