Add codepoint() and mbchar() functions.

This commit is contained in:
Dianne Skoll
2025-09-10 17:33:57 -04:00
parent 3c6191ba61
commit 55975154b1
5 changed files with 120 additions and 3 deletions

View File

@@ -166,13 +166,13 @@
(defconst remind-builtin-functions
(sort
(list "_" "abs" "access" "adawn" "adusk" "ampm" "ansicolor" "args" "asc"
"baseyr" "catch" "catcherr" "char" "choose" "coerce" "columns" "const" "current" "date"
"baseyr" "catch" "catcherr" "char" "choose" "codepoint" "coerce" "columns" "const" "current" "date"
"datepart" "datetime" "dawn" "day" "daysinmon" "defined" "dosubst"
"dusk" "easterdate" "escape" "eval" "evaltrig" "filedate" "filedatetime"
"filedir" "filename" "getenv" "hebdate" "hebday" "hebmon" "hebyear"
"hour" "htmlescape" "htmlstriptags" "iif" "index" "isany" "isconst" "isdst"
"isleap" "isomitted" "language" "localtoutc" "lower" "max"
"mbindex" "mbstrlen" "mbsubstr" "min"
"mbasc" "mbindex" "mbstrlen" "mbsubstr" "min"
"minsfromutc" "minute" "mon" "monnum" "moondate" "moondatetime"
"moonphase" "moonrise" "moonrisedir" "moonset" "moonsetdir" "moontime"
"multitrig" "ndawn" "ndusk" "nonconst" "nonomitted" "now" "ord" "orthodoxeaster"

View File

@@ -3651,12 +3651,17 @@ function has been defined previously. The \fBargs()\fR function is
available only in versions of \fBRemind\fR from 03.00.04 and up.
.TP
.B asc(s_string)
Returns an \fBINT\fR that is the ASCII code of the first character
Returns an \fBINT\fR that is the ASCII code of the first byte
in \fIstring\fR. As a special case, \fBasc("")\fR returns 0. For UTF-8
strings, this will return the UTF-8 byte with which the string
begins, which is not likely to be very useful (and may indeed be negative
on machines where \fBchar\fR is a signed type.)
.TP
.B codepoint(s_string)
Returns an \fBINT\fR that is the code point of the first character
in \fIstring\fR, treating multi-byte characters correctly. As a special case,
\fBcodepoint("")\fR returns 0.
.TP
.B baseyr()
Returns the "base year" that was compiled into \fBRemind\fR (normally
1990.) All dates are stored internally as the number of days since
@@ -3712,6 +3717,14 @@ It is easy to create invalid UTF-8 sequences; \fBchar\fR does not check
for this. Note that none of the arguments can be 0, unless there is only one
argument. As a special case, \fBchar(0)\fR returns "".
.TP
.B mbhar(i_i1 [,i_i2...])
This function can take any number of \fBINT\fR arguments. It returns
a \fBSTRING\fR consisting of the characters specified by the
arguments. Any codepoint may be supplied and a correct multi-byte
character string will be returned. Note that none of the arguments
can be 0, unless there is only one argument. As a special case,
\fBmbchar(0)\fR returns "".
.TP
.B choose(i_index, x_arg1 [,x_arg2...])
\fBChoose\fR must take at least two arguments, the first of which is
an \fBINT\fR. If \fIindex\fR is \fIn\fR, then the \fIn\fRth subsequent

View File

@@ -87,6 +87,7 @@ static int FCatch (expr_node *, Value *, Value *, int *);
static int FCatchErr (func_info *);
static int FChar (func_info *);
static int FChoose (expr_node *, Value *, Value *, int *);
static int FCodepoint (func_info *);
static int FCoerce (func_info *);
static int FColumns (func_info *);
static int FCurrent (func_info *);
@@ -126,6 +127,7 @@ static int FLanguage (func_info *);
static int FLocalToUTC (func_info *);
static int FLower (func_info *);
static int FMax (func_info *);
static int FMbchar (func_info *);
static int FMbindex (func_info *);
static int FMbstrlen (func_info *);
static int FMbsubstr (func_info *);
@@ -268,6 +270,7 @@ BuiltinFunc Func[] = {
{ "catcherr", 0, 0, 0, FCatchErr, NULL },
{ "char", 1, NO_MAX, 1, FChar, NULL },
{ "choose", 2, NO_MAX, 1, NULL, FChoose }, /*NEW-STYLE*/
{ "codepoint", 1, 1, 1, FCodepoint, NULL },
{ "coerce", 2, 2, 1, FCoerce, NULL },
{ "columns", 0, 1, 0, FColumns, NULL },
{ "const", 1, 1, 1, FNonconst, NULL },
@@ -308,6 +311,7 @@ BuiltinFunc Func[] = {
{ "localtoutc", 1, 1, 1, FLocalToUTC, NULL },
{ "lower", 1, 1, 1, FLower, NULL },
{ "max", 1, NO_MAX, 1, FMax, NULL },
{ "mbchar", 1, NO_MAX, 1, FMbchar, NULL },
{ "mbindex", 2, 3, 1, FMbindex, NULL },
{ "mbstrlen", 1, 1, 1, FMbstrlen, NULL },
{ "mbsubstr", 2, 3, 1, FMbsubstr, NULL },
@@ -730,6 +734,28 @@ static int FAsc(func_info *info)
return OK;
}
/***************************************************************/
/* */
/* FCodepoint - wide-character codepoint of start of str */
/* */
/***************************************************************/
static int FCodepoint(func_info *info)
{
wchar_t arr[2];
size_t len;
ASSERT_TYPE(0, STR_TYPE);
len = mbstowcs(arr, ARGSTR(0), sizeof(arr) / sizeof(arr[0]));
if (len == (size_t) -1) {
return E_BAD_MB_SEQ;
}
RetVal.type = INT_TYPE;
RETVAL = (int) arr[0];
return OK;
}
/***************************************************************/
/* */
/* FChar - build a string from ASCII values */
@@ -778,6 +804,62 @@ static int FChar(func_info *info)
*(RetVal.v.str + Nargs) = 0;
return OK;
}
/***************************************************************/
/* */
/* FMbchar - build a string from wide character code points */
/* */
/***************************************************************/
static int FMbchar(func_info *info)
{
int i;
size_t len;
wchar_t *arr;
char *s;
for (i=0; i<Nargs; i++) {
ASSERT_TYPE(i, INT_TYPE);
}
/* Special case of one arg - if given value 0, create empty string */
if (Nargs == 1) {
if (ARGV(0) == 0) {
return RetStrVal("", info);
}
}
arr = calloc(Nargs+1, sizeof(wchar_t));
if (!arr) {
return E_NO_MEM;
}
for (i=0; i<Nargs; i++) {
if (ARGV(i) <= 0) {
return E_2LOW;
}
arr[i] = (wchar_t) ARGV(i);
}
arr[Nargs] = (wchar_t) 0;
len = wcstombs(NULL, arr, 0);
if (len == (size_t) -1) {
free( (void *) arr);
return E_BAD_MB_SEQ;
}
s = malloc(len+1);
if (!s) {
free( (void *) arr);
return E_NO_MEM;
}
(void) wcstombs(s, arr, len+1);
free( (void *) arr);
RetVal.type = STR_TYPE;
RetVal.v.str = s;
return OK;
}
/***************************************************************/
/* */
/* Functions for extracting the components of a date. */

View File

@@ -16635,6 +16635,18 @@ faces => "🙂🙂🙂🙂🙂xyzçççéfoo"
substr("<22><EFBFBD><EFBFBD><EFBFBD><EFBFBD>凾xyzテァテァテァテゥfoo", 2) => "泗を泗を泗を泗を泗yzテァテァテァテゥfoo"
faces => "<22><EFBFBD><EFBFBD><EFBFBD><EFBFBD>凾xyzテァテァテァテゥfoo"
substr("<22><EFBFBD><EFBFBD><EFBFBD><EFBFBD>凾xyzテァテァテァテゥfoo", 2, 9) => "泗を泗を"
faces => "ðŸ™ðŸ™ðŸ™ðŸ™ðŸ™xyzçççéfoo"
codepoint("ðŸ™ðŸ™ðŸ™ðŸ™ðŸ™xyzçççéfoo") => 128578
mbchar(128578, 162, 122) => "ðŸ™Â¢z"
bad => "ÿ"
codepoint("ÿ") => Invalid multibyte sequence
../tests/test.rem(1762): codepoint(): Invalid multibyte sequence
codepoint("") => 0
mbchar(0) => ""
mbchar(0, 120) => Number too low
../tests/test.rem(1765): mbchar(): Number too low
mbchar(120, 0) => Number too low
../tests/test.rem(1766): mbchar(): Number too low
Variable hash table statistics:
Entries: 100146; Buckets: 87719; Non-empty Buckets: 66303
Maxlen: 5; Minlen: 0; Avglen: 1.142; Stddev: 0.878; Avg nonempty len: 1.510
@@ -24592,6 +24604,7 @@ catch
catcherr
char
choose
codepoint
coerce
columns
const
@@ -24632,6 +24645,7 @@ language
localtoutc
lower
max
mbchar
mbindex
mbstrlen
mbsubstr

View File

@@ -1757,6 +1757,14 @@ set a mbsubstr(bad, 1, 20)
set a substr(faces, 2)
set a substr(faces, 2, 9)
set a codepoint(faces)
set a mbchar(128578, 162, 122)
set a codepoint(bad)
set a codepoint("")
set a mbchar(0)
set a mbchar(0, 120)
set a mbchar(120, 0)
DEBUG -x
# Don't want Remind to queue reminders
EXIT