diff --git a/contrib/remind-conf-mode/remind-conf-mode.el b/contrib/remind-conf-mode/remind-conf-mode.el index 2983b5bd..d8db0103 100644 --- a/contrib/remind-conf-mode/remind-conf-mode.el +++ b/contrib/remind-conf-mode/remind-conf-mode.el @@ -166,13 +166,13 @@ (defconst remind-builtin-functions (sort (list "_" "abs" "access" "adawn" "adusk" "ampm" "ansicolor" "args" "asc" - "baseyr" "catch" "catcherr" "char" "choose" "coerce" "columns" "const" "current" "date" + "baseyr" "catch" "catcherr" "char" "choose" "codepoint" "coerce" "columns" "const" "current" "date" "datepart" "datetime" "dawn" "day" "daysinmon" "defined" "dosubst" "dusk" "easterdate" "escape" "eval" "evaltrig" "filedate" "filedatetime" "filedir" "filename" "getenv" "hebdate" "hebday" "hebmon" "hebyear" "hour" "htmlescape" "htmlstriptags" "iif" "index" "isany" "isconst" "isdst" "isleap" "isomitted" "language" "localtoutc" "lower" "max" - "mbindex" "mbstrlen" "mbsubstr" "min" + "mbasc" "mbindex" "mbstrlen" "mbsubstr" "min" "minsfromutc" "minute" "mon" "monnum" "moondate" "moondatetime" "moonphase" "moonrise" "moonrisedir" "moonset" "moonsetdir" "moontime" "multitrig" "ndawn" "ndusk" "nonconst" "nonomitted" "now" "ord" "orthodoxeaster" diff --git a/man/remind.1.in b/man/remind.1.in index 1f9247fd..f9cf1bd6 100644 --- a/man/remind.1.in +++ b/man/remind.1.in @@ -3651,12 +3651,17 @@ function has been defined previously. The \fBargs()\fR function is available only in versions of \fBRemind\fR from 03.00.04 and up. .TP .B asc(s_string) -Returns an \fBINT\fR that is the ASCII code of the first character +Returns an \fBINT\fR that is the ASCII code of the first byte in \fIstring\fR. As a special case, \fBasc("")\fR returns 0. For UTF-8 strings, this will return the UTF-8 byte with which the string begins, which is not likely to be very useful (and may indeed be negative on machines where \fBchar\fR is a signed type.) .TP +.B codepoint(s_string) +Returns an \fBINT\fR that is the code point of the first character +in \fIstring\fR, treating multi-byte characters correctly. As a special case, +\fBcodepoint("")\fR returns 0. +.TP .B baseyr() Returns the "base year" that was compiled into \fBRemind\fR (normally 1990.) All dates are stored internally as the number of days since @@ -3712,6 +3717,14 @@ It is easy to create invalid UTF-8 sequences; \fBchar\fR does not check for this. Note that none of the arguments can be 0, unless there is only one argument. As a special case, \fBchar(0)\fR returns "". .TP +.B mbhar(i_i1 [,i_i2...]) +This function can take any number of \fBINT\fR arguments. It returns +a \fBSTRING\fR consisting of the characters specified by the +arguments. Any codepoint may be supplied and a correct multi-byte +character string will be returned. Note that none of the arguments +can be 0, unless there is only one argument. As a special case, +\fBmbchar(0)\fR returns "". +.TP .B choose(i_index, x_arg1 [,x_arg2...]) \fBChoose\fR must take at least two arguments, the first of which is an \fBINT\fR. If \fIindex\fR is \fIn\fR, then the \fIn\fRth subsequent diff --git a/src/funcs.c b/src/funcs.c index 27fb59c1..4180f95f 100644 --- a/src/funcs.c +++ b/src/funcs.c @@ -87,6 +87,7 @@ static int FCatch (expr_node *, Value *, Value *, int *); static int FCatchErr (func_info *); static int FChar (func_info *); static int FChoose (expr_node *, Value *, Value *, int *); +static int FCodepoint (func_info *); static int FCoerce (func_info *); static int FColumns (func_info *); static int FCurrent (func_info *); @@ -126,6 +127,7 @@ static int FLanguage (func_info *); static int FLocalToUTC (func_info *); static int FLower (func_info *); static int FMax (func_info *); +static int FMbchar (func_info *); static int FMbindex (func_info *); static int FMbstrlen (func_info *); static int FMbsubstr (func_info *); @@ -268,6 +270,7 @@ BuiltinFunc Func[] = { { "catcherr", 0, 0, 0, FCatchErr, NULL }, { "char", 1, NO_MAX, 1, FChar, NULL }, { "choose", 2, NO_MAX, 1, NULL, FChoose }, /*NEW-STYLE*/ + { "codepoint", 1, 1, 1, FCodepoint, NULL }, { "coerce", 2, 2, 1, FCoerce, NULL }, { "columns", 0, 1, 0, FColumns, NULL }, { "const", 1, 1, 1, FNonconst, NULL }, @@ -308,6 +311,7 @@ BuiltinFunc Func[] = { { "localtoutc", 1, 1, 1, FLocalToUTC, NULL }, { "lower", 1, 1, 1, FLower, NULL }, { "max", 1, NO_MAX, 1, FMax, NULL }, + { "mbchar", 1, NO_MAX, 1, FMbchar, NULL }, { "mbindex", 2, 3, 1, FMbindex, NULL }, { "mbstrlen", 1, 1, 1, FMbstrlen, NULL }, { "mbsubstr", 2, 3, 1, FMbsubstr, NULL }, @@ -730,6 +734,28 @@ static int FAsc(func_info *info) return OK; } +/***************************************************************/ +/* */ +/* FCodepoint - wide-character codepoint of start of str */ +/* */ +/***************************************************************/ +static int FCodepoint(func_info *info) +{ + wchar_t arr[2]; + size_t len; + + ASSERT_TYPE(0, STR_TYPE); + + len = mbstowcs(arr, ARGSTR(0), sizeof(arr) / sizeof(arr[0])); + if (len == (size_t) -1) { + return E_BAD_MB_SEQ; + } + + RetVal.type = INT_TYPE; + RETVAL = (int) arr[0]; + return OK; +} + /***************************************************************/ /* */ /* FChar - build a string from ASCII values */ @@ -778,6 +804,62 @@ static int FChar(func_info *info) *(RetVal.v.str + Nargs) = 0; return OK; } + +/***************************************************************/ +/* */ +/* FMbchar - build a string from wide character code points */ +/* */ +/***************************************************************/ +static int FMbchar(func_info *info) +{ + + int i; + size_t len; + wchar_t *arr; + char *s; + + for (i=0; i "🙂🙂🙂🙂🙂xyzçççéfoo" substr("🙂🙂🙂🙂🙂xyzçççéfoo", 2) => "Ÿ™‚🙂🙂🙂🙂xyzçççéfoo" faces => "🙂🙂🙂🙂🙂xyzçççéfoo" substr("🙂🙂🙂🙂🙂xyzçççéfoo", 2, 9) => "Ÿ™‚🙂ð" +faces => "🙂🙂🙂🙂🙂xyzçççéfoo" +codepoint("🙂🙂🙂🙂🙂xyzçççéfoo") => 128578 +mbchar(128578, 162, 122) => "🙂¢z" +bad => "ÿ" +codepoint("ÿ") => Invalid multibyte sequence +../tests/test.rem(1762): codepoint(): Invalid multibyte sequence +codepoint("") => 0 +mbchar(0) => "" +mbchar(0, 120) => Number too low +../tests/test.rem(1765): mbchar(): Number too low +mbchar(120, 0) => Number too low +../tests/test.rem(1766): mbchar(): Number too low Variable hash table statistics: Entries: 100146; Buckets: 87719; Non-empty Buckets: 66303 Maxlen: 5; Minlen: 0; Avglen: 1.142; Stddev: 0.878; Avg nonempty len: 1.510 @@ -24592,6 +24604,7 @@ catch catcherr char choose +codepoint coerce columns const @@ -24632,6 +24645,7 @@ language localtoutc lower max +mbchar mbindex mbstrlen mbsubstr diff --git a/tests/test.rem b/tests/test.rem index fad6abb6..7d99a420 100644 --- a/tests/test.rem +++ b/tests/test.rem @@ -1757,6 +1757,14 @@ set a mbsubstr(bad, 1, 20) set a substr(faces, 2) set a substr(faces, 2, 9) +set a codepoint(faces) +set a mbchar(128578, 162, 122) +set a codepoint(bad) +set a codepoint("") +set a mbchar(0) +set a mbchar(0, 120) +set a mbchar(120, 0) + DEBUG -x # Don't want Remind to queue reminders EXIT