commit 4d85a682220a55d83e8c6460808329e72becca36
Author: Christoph Lohmann <20h@r-36.net>
Date: Mon, 21 May 2012 17:35:22 +0200
Initial commit.
Diffstat:
LICENSE | | | 21 | +++++++++++++++++++++ |
Makefile | | | 56 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
arg.h | | | 41 | +++++++++++++++++++++++++++++++++++++++++ |
config.mk | | | 23 | +++++++++++++++++++++++ |
utf8expr.1 | | | 41 | +++++++++++++++++++++++++++++++++++++++++ |
utf8expr.c | | | 173 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 355 insertions(+), 0 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT/X Consortium License
+
+© 2012 Christoph Lohmann <20h@r-36.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,56 @@
+# utf8expr – expr(1) for utf8
+# See LICENSE file for copyright and license details.
+
+include config.mk
+
+SRC = ${NAME}.c
+OBJ = ${SRC:.c=.o}
+
+all: options ${NAME}
+
+options:
+ @echo ${NAME} build options:
+ @echo "CFLAGS = ${CFLAGS}"
+ @echo "LDFLAGS = ${LDFLAGS}"
+ @echo "CC = ${CC}"
+
+.c.o:
+ @echo CC $<
+ @${CC} -c ${CFLAGS} $<
+
+${OBJ}: config.mk
+
+${NAME}: ${OBJ}
+ @echo CC -o $@
+ @${CC} -o $@ ${OBJ} ${LDFLAGS}
+
+clean:
+ @echo cleaning
+ @rm -f ${NAME} ${OBJ} ${NAME}-${VERSION}.tar.gz
+
+dist: clean
+ @echo creating dist tarball
+ @mkdir -p ${NAME}-${VERSION}
+ @cp -R LICENSE Makefile config.mk \
+ ${SRC} ${NAME}.8 *.h ${NAME}-${VERSION}
+ @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
+ @gzip ${NAME}-${VERSION}.tar
+ @rm -rf ${NAME}-${VERSION}
+
+install: all
+ @echo installing executable file to ${DESTDIR}${PREFIX}/bin
+ @mkdir -p ${DESTDIR}${PREFIX}/bin
+ @cp -f ${NAME} ${DESTDIR}${PREFIX}/bin
+ @chmod 755 ${DESTDIR}${PREFIX}/bin/${NAME}
+ @echo installing manual page to ${DESTDIR}${MANPREFIX}/man1
+ @mkdir -p ${DESTDIR}${MANPREFIX}/man1
+ @cp -f ${NAME}.1 ${DESTDIR}${MANPREFIX}/man1
+ @chmod 644 ${DESTDIR}${MANPREFIX}/man1/${NAME}.1
+
+uninstall:
+ @echo removing executable file from ${DESTDIR}${PREFIX}/bin
+ @rm -f ${DESTDIR}${PREFIX}/bin/${NAME}
+ @echo removing manual page from ${DESTDIR}${PREFIX}/man1
+ @rm -f ${DESTDIR}${MANPREFIX}/man1/${NAME}.1
+
+.PHONY: all options clean dist install uninstall
diff --git a/arg.h b/arg.h
@@ -0,0 +1,41 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifndef __ARG_H__
+#define __ARG_H__
+
+extern char *argv0;
+
+#define USED(x) ((void)(x))
+
+#define ARGBEGIN for (argv0 = *argv, argv++, argc--;\
+ argv[0] && argv[0][1]\
+ && argv[0][0] == '-';\
+ argc--, argv++) {\
+ char _argc;\
+ char **_argv;\
+ if (argv[0][1] == '-' && argv[0][2] == '\0') {\
+ argv++;\
+ argc--;\
+ break;\
+ }\
+ for (argv[0]++, _argv = argv; argv[0][0];\
+ argv[0]++) {\
+ if (_argv != argv)\
+ break;\
+ _argc = argv[0][0];\
+ switch (_argc)
+
+#define ARGEND }\
+ USED(_argc);\
+ }\
+ USED(argv);\
+ USED(argc);
+
+#define EARGF(x) ((argv[1] == NULL)? ((x), abort(), (char *)0) :\
+ (argc--, argv++, argv[0]))
+
+#endif
+
diff --git a/config.mk b/config.mk
@@ -0,0 +1,23 @@
+# nldev metadata
+NAME = utf8expr
+VERSION = 0.8
+
+# Customize below to fit your system
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+
+# includes and libs
+INCS = -I. -I/usr/include
+LIBS = -L/usr/lib -lc
+
+# flags
+CPPFLAGS = -DVERSION=\"${VERSION}\"
+CFLAGS = -g -std=c99 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
+LDFLAGS = -static -g ${LIBS}
+#LDFLAGS = -s ${LIBS}
+
+# compiler and linker
+CC = cc
+
diff --git a/utf8expr.1 b/utf8expr.1
@@ -0,0 +1,41 @@
+.Dd May 21, 2012
+.Dt UTF8EXPR 1
+.Os
+.
+.Sh NAME
+.Nm utf8expr
+.Nd evalute UTF-8 expressions
+.
+.Sh SYNOPSIS
+.Nm
+.Bk -words
+EXPRESSION
+.
+.Sh DESCRIPTION
+.Bd -filled
+.Nm
+will evalute a subset of the expr(1) syntax with taking care
+of UTF-8 characters.
+.Ed
+.
+.Sh EXPRESSIONS
+.Pp
+.Bl -tag -width ".Fl test Ao Ar string Ac"
+.
+.It substr STRING POS LENGTH
+substring of STRING, POS counted from 1
+.
+.It index STRING CHARS
+index in STRING where any CHARS is found, or 0
+.
+.It length STRING
+length of STRING
+.
+.Sh AUTHORS
+See the LICENSE file for the authors of this software.
+.
+.Sh LICENSE
+.Nm
+is released under the MIT/X Consortium License.
+.
+
diff --git a/utf8expr.c b/utf8expr.c
@@ -0,0 +1,173 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <libgen.h>
+
+#include "arg.h"
+
+char *argv0;
+
+/*
+ * Idea taken from:
+ * http://canonical.org/~kragen/strlen-utf8.html
+ */
+size_t
+utf8strlen(char *s)
+{
+ size_t i;
+
+ i = 0;
+ for (; s[0]; s++) {
+ if ((s[0] & 0xc0) != 0x80)
+ i++;
+ }
+
+ return i;
+}
+
+char *
+utf8strchr(char *s, char *c)
+{
+ size_t j, cl;
+
+ cl = strlen(c);
+ if (cl == 0)
+ return NULL;
+
+ for (j = 0; ; s++) {
+ if (j > 6)
+ return NULL;
+ j++;
+
+ if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
+ if (cl == j) {
+ if (!memcmp(&s[-j], c, cl))
+ return &s[-j];
+ }
+ j = 0;
+
+ if (s[0] == '\0')
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+char *
+utf8substr(char *s, size_t pos, size_t *length)
+{
+ size_t i, j, rl;
+ char *ret;
+
+ if (*length < 1)
+ return NULL;
+
+ ret = NULL;
+ rl = 0;
+ for (i = 0, j = 0; *length > 0; s++) {
+ if (j > 6)
+ return NULL;
+ j++;
+
+ if (ret != NULL)
+ rl++;
+
+ if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
+ if (i >= pos) {
+ if (ret == NULL) {
+ ret = &s[-j];
+ rl = j;
+ }
+ (*length)--;
+ }
+ i++;
+ j = 0;
+
+ if (s[0] == '\0')
+ break;
+ }
+ }
+
+ *length = rl;
+ return ret;
+}
+
+size_t
+utf8index(char *s, char *chars)
+{
+ size_t i, j;
+ char c[7];
+
+ j = 0;
+ for (i = 0; ; s++) {
+ if (j > 6)
+ return 0;
+ j++;
+
+ if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
+ memset(c, 0, sizeof(c));
+ memmove(c, &s[-j], j);
+ if (utf8strchr(chars, c))
+ return i;
+ i++;
+ j = 0;
+
+ if (s[0] == '\0')
+ break;
+ }
+ }
+
+ return 0;
+}
+
+void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [substr|index|length] str [args ...]\n",
+ basename(argv0));
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *s;
+ size_t len;
+
+ argv0 = argv[0];
+
+ if (argc < 3)
+ usage();
+
+ switch(argv[1][0]) {
+ case 'i':
+ if (argc < 4)
+ usage();
+ printf("%ld\n", utf8index(argv[2], argv[3]));
+ break;
+ case 'l':
+ printf("%ld\n", utf8strlen(argv[2]));
+ break;
+ case 's':
+ if (argc < 5)
+ usage();
+ len = atoi(argv[4]);
+ s = utf8substr(argv[2], atoi(argv[3]), &len);
+ if (s == NULL)
+ return -1;
+ printf("%.*s\n", (int)len, s);
+ break;
+ default:
+ usage();
+ };
+
+ return 0;
+}
+