To: vim_dev@googlegroups.com Subject: Patch 8.2.1536 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.1536 Problem: Cannot get the class of a character; emoji widths are wrong in some environments. Solution: Add charclass(). Update some emoji widths. Add script to check emoji widths. Files: Filelist, runtime/doc/eval.txt, runtime/doc/usr_41.txt, src/evalfunc.c, src/mbyte.c, src/proto/mbyte.pro, src/testdir/emoji_list.vim, src/testdir/test_functions.vim *** ../vim-8.2.1535/Filelist 2020-08-13 22:47:20.369992748 +0200 --- Filelist 2020-08-28 22:02:41.425978440 +0200 *************** *** 197,202 **** --- 197,203 ---- src/testdir/samples/*.txt \ src/testdir/samples/test000 \ src/testdir/color_ramp.vim \ + src/testdir/emoji_list.vim \ src/testdir/silent.wav \ src/testdir/popupbounce.vim \ src/proto.h \ *** ../vim-8.2.1535/runtime/doc/eval.txt 2020-08-28 21:04:20.498881375 +0200 --- runtime/doc/eval.txt 2020-08-28 22:07:17.464773040 +0200 *************** *** 2413,2418 **** --- 2426,2432 ---- String status of channel {handle} changenr() Number current change number char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr} + charclass({string}) Number character class of {string} chdir({dir}) String change current working directory cindent({lnum}) Number C indent for line {lnum} clearmatches([{win}]) none clear all matches *************** *** 3504,3509 **** --- 3522,3539 ---- Can also be used as a |method|: > GetChar()->char2nr() + + charclass({string}) *charclass()* + Return the character class of the first character in {string}. + The character class is one of: + 0 blank + 1 punctuation + 2 word character + 3 emoji + other specific Unicode class + The class is used in patterns and word motions. + + chdir({dir}) *chdir()* Change the current working directory to {dir}. The scope of the directory change depends on the directory of the current *** ../vim-8.2.1535/runtime/doc/usr_41.txt 2020-08-28 21:04:20.498881375 +0200 --- runtime/doc/usr_41.txt 2020-08-28 22:08:37.648420528 +0200 *************** *** 592,597 **** --- 600,606 ---- strtrans() translate a string to make it printable tolower() turn a string to lowercase toupper() turn a string to uppercase + charclass() class of a character match() position where a pattern matches in a string matchend() position where a pattern match ends in a string matchstr() match of a pattern in a string *** ../vim-8.2.1535/src/evalfunc.c 2020-08-28 21:04:20.502881357 +0200 --- src/evalfunc.c 2020-08-28 21:18:30.577820602 +0200 *************** *** 564,569 **** --- 564,570 ---- {"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)}, {"changenr", 0, 0, 0, ret_number, f_changenr}, {"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr}, + {"charclass", 1, 1, FEARG_1, ret_number, f_charclass}, {"chdir", 1, 1, FEARG_1, ret_string, f_chdir}, {"cindent", 1, 1, FEARG_1, ret_number, f_cindent}, {"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches}, *** ../vim-8.2.1535/src/mbyte.c 2020-08-28 21:04:20.502881357 +0200 --- src/mbyte.c 2020-08-28 22:14:14.950930035 +0200 *************** *** 132,138 **** --- 132,140 ---- static int dbcs_ptr2cells_len(char_u *p, int size); static int dbcs_ptr2char(char_u *p); static int dbcs_head_off(char_u *base, char_u *p); + #ifdef FEAT_EVAL static int cw_value(int c); + #endif /* * Lookup table to quickly get the length in bytes of a UTF-8 character from *************** *** 1388,1395 **** {0x26ce, 0x26ce}, {0x26d4, 0x26d4}, {0x26ea, 0x26ea}, ! {0x26f2, 0x26f3}, ! {0x26f5, 0x26f5}, {0x26fa, 0x26fa}, {0x26fd, 0x26fd}, {0x2705, 0x2705}, --- 1390,1396 ---- {0x26ce, 0x26ce}, {0x26d4, 0x26d4}, {0x26ea, 0x26ea}, ! {0x26f2, 0x26f5}, {0x26fa, 0x26fa}, {0x26fd, 0x26fd}, {0x2705, 0x2705}, *************** *** 1490,1495 **** --- 1491,1511 ---- // based on http://unicode.org/emoji/charts/emoji-list.html static struct interval emoji_wide[] = { + {0x23ed, 0x23ef}, + {0x23f1, 0x23f2}, + {0x23f8, 0x23fa}, + {0x24c2, 0x24c2}, + {0x261d, 0x261d}, + {0x26c8, 0x26c8}, + {0x26cf, 0x26cf}, + {0x26d1, 0x26d1}, + {0x26d3, 0x26d3}, + {0x26e9, 0x26e9}, + {0x26f0, 0x26f1}, + {0x26f7, 0x26f9}, + {0x270c, 0x270d}, + {0x2934, 0x2935}, + {0x1f170, 0x1f189}, {0x1f1e6, 0x1f1ff}, {0x1f321, 0x1f321}, {0x1f324, 0x1f32c}, *************** *** 1533,1543 **** --- 1549,1563 ---- if (c >= 0x100) { + #if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS) int n; + #endif + #ifdef FEAT_EVAL n = cw_value(c); if (n != 0) return n; + #endif #ifdef USE_WCHAR_FUNCTIONS /* *************** *** 2667,2674 **** {0x3299, 0x3299}, {0x1f004, 0x1f004}, {0x1f0cf, 0x1f0cf}, ! {0x1f170, 0x1f171}, ! {0x1f17e, 0x1f17f}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a}, {0x1f1e6, 0x1f1ff}, --- 2687,2693 ---- {0x3299, 0x3299}, {0x1f004, 0x1f004}, {0x1f0cf, 0x1f0cf}, ! {0x1f170, 0x1f189}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a}, {0x1f1e6, 0x1f1ff}, *************** *** 2835,2840 **** --- 2854,2863 ---- return 1; // punctuation } + // emoji + if (intable(emoji_all, sizeof(emoji_all), c)) + return 3; + // binary search in table while (top >= bot) { *************** *** 2847,2856 **** return (int)classes[mid].class; } - // emoji - if (intable(emoji_all, sizeof(emoji_all), c)) - return 3; - // most other characters are "word" characters return 2; } --- 2870,2875 ---- *************** *** 5352,5357 **** --- 5371,5378 ---- return retval; } + #if defined(FEAT_EVAL) || defined(PROTO) + /* * Table set by setcellwidths(). */ *************** *** 5525,5527 **** --- 5546,5562 ---- cw_table = table; cw_table_size = l->lv_len; } + + void + f_charclass(typval_T *argvars, typval_T *rettv UNUSED) + { + if (argvars[0].v_type != VAR_STRING + || argvars[0].vval.v_string == NULL + || *argvars[0].vval.v_string == NUL) + { + emsg(_(e_stringreq)); + return; + } + rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string); + } + #endif *** ../vim-8.2.1535/src/proto/mbyte.pro 2020-08-28 21:04:20.502881357 +0200 --- src/proto/mbyte.pro 2020-08-28 21:23:20.604288455 +0200 *************** *** 85,88 **** --- 85,89 ---- char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp); char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp); void f_setcellwidths(typval_T *argvars, typval_T *rettv); + void f_charclass(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */ *** ../vim-8.2.1535/src/testdir/emoji_list.vim 2020-08-28 22:16:31.414375282 +0200 --- src/testdir/emoji_list.vim 2020-08-28 21:39:21.151554959 +0200 *************** *** 0 **** --- 1,22 ---- + " Script to fill the window with emoji characters, one per line. + + if &modified + new + else + enew + endif + + " Use a compiled Vim9 function for speed + def DoIt() + let lnum = 1 + for c in range(0x100, 0x1ffff) + let cs = nr2char(c) + if charclass(cs) == 3 + setline(lnum, '|' .. cs .. '| ' .. strwidth(cs)) + lnum += 1 + endif + endfor + enddef + + call DoIt() + set nomodified *** ../vim-8.2.1535/src/testdir/test_functions.vim 2020-08-23 17:33:43.773458055 +0200 --- src/testdir/test_functions.vim 2020-08-28 22:22:47.260947940 +0200 *************** *** 2077,2082 **** --- 2077,2089 ---- set encoding=utf-8 endfunc + func Test_charclass() + call assert_equal(0, charclass(' ')) + call assert_equal(1, charclass('.')) + call assert_equal(2, charclass('x')) + call assert_equal(3, charclass("\u203c")) + endfunc + func Test_eventhandler() call assert_equal(0, eventhandler()) endfunc *** ../vim-8.2.1535/src/version.c 2020-08-28 21:04:20.502881357 +0200 --- src/version.c 2020-08-28 21:20:42.189114372 +0200 *************** *** 756,757 **** --- 756,759 ---- { /* Add new patch number below this line */ + /**/ + 1536, /**/ -- DINGO: You must spank her well and after you have spanked her you may deal with her as you like and then ... spank me. AMAZING: And spank me! STUNNER: And me. LOVELY: And me. "Monty Python and the Holy Grail" PYTHON (MONTY) PICTURES LTD /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///