Commit fa64b459 authored by Rusty Russell's avatar Rusty Russell

str_talloc: strreg

Useful wrapper for extended POSIX regular expressions.
parent 053f9398
......@@ -44,6 +44,7 @@ int main(int argc, char *argv[])
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/str\n");
printf("ccan/talloc\n");
printf("ccan/noerr\n");
return 0;
......
......@@ -4,7 +4,12 @@
#include <limits.h>
#include <stdlib.h>
#include "str_talloc.h"
#include <sys/types.h>
#include <regex.h>
#include <stdarg.h>
#include <unistd.h>
#include <ccan/talloc/talloc.h>
#include <ccan/str/str.h>
char **strsplit(const void *ctx, const char *string, const char *delims,
unsigned int *nump)
......@@ -41,3 +46,46 @@ char *strjoin(const void *ctx, char *strings[], const char *delim)
}
return ret;
}
bool strreg(const void *ctx, const char *string, const char *regex, ...)
{
size_t nmatch = 1 + strcount(regex, "(");
regmatch_t *matches = talloc_array(ctx, regmatch_t, nmatch);
regex_t r;
bool ret;
if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0)
return false;
if (regexec(&r, string, nmatch, matches, 0) == 0) {
unsigned int i;
va_list ap;
ret = true;
va_start(ap, regex);
for (i = 1; i < nmatch; i++) {
char **arg;
arg = va_arg(ap, char **);
if (arg) {
/* eg. ([a-z])? can give "no match". */
if (matches[i].rm_so == -1)
*arg = NULL;
else {
*arg = talloc_strndup(ctx,
string + matches[i].rm_so,
matches[i].rm_eo
- matches[i].rm_so);
if (!*arg) {
ret = false;
break;
}
}
}
}
va_end(ap);
} else
ret = false;
talloc_free(matches);
regfree(&r);
return ret;
}
......@@ -63,4 +63,47 @@ char **strsplit(const void *ctx, const char *string, const char *delims,
* }
*/
char *strjoin(const void *ctx, char *strings[], const char *delim);
/**
* strreg - match and extract from a string via (extended) regular expressions.
* @ctx: the context to tallocate from (often NULL)
* @string: the string to try to match.
* @regex: the regular expression to match.
* ...: pointers to strings to allocate for subexpressions.
*
* Returns true if we matched, in which case any parenthesized
* expressions in @regex are allocated and placed in the char **
* arguments following @regex. NULL arguments mean the match is not
* saved. The order of the strings is the order
* of opening braces in the expression: in the case of repeated
* expressions (eg "([a-z])*") the last one is saved, in the case of
* non-existent matches (eg "([a-z]*)?") the pointer is set to NULL.
*
* Allocation failures or malformed regular expressions return false.
*
* See Also:
* regcomp(3), regex(3).
*
* Example:
* // Given 'My name is Rusty' outputs 'Hello Rusty!'
* // Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!'
* // Given 'My name isnt Rusty Russell' outputs 'Hello there!'
* int main(int argc, char *argv[])
* {
* char *person, *input;
*
* // Join args and trim trailing space.
* input = strjoin(NULL, argv+1, " ");
* if (strlen(input) != 0)
* input[strlen(input)-1] = '\0';
*
* if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)",
* NULL, &person))
* printf("Hello %s!\n", person);
* else
* printf("Hello there!\n");
* return 0;
* }
*/
bool strreg(const void *ctx, const char *string, const char *regex, ...);
#endif /* CCAN_STR_TALLOC_H */
#include <ccan/str_talloc/str_talloc.h>
#include <ccan/str_talloc/str_talloc.c>
#include <ccan/tap/tap.h>
int main(int argc, char *argv[])
{
void *ctx = talloc_init("toplevel");
unsigned int top_blocks = talloc_total_blocks(ctx);
char *a, *b;
/* If it accesses this, it will crash. */
char **invalid = (char **)1L;
plan_tests(25);
/* Simple matching. */
ok1(strreg(ctx, "hello world!", "hello") == true);
ok1(strreg(ctx, "hello world!", "hi") == false);
/* No parentheses means we don't use any extra args. */
ok1(strreg(ctx, "hello world!", "hello", invalid) == true);
ok1(strreg(ctx, "hello world!", "hi", invalid) == false);
ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true);
ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true);
/* Found string */
ok1(streq(a, "hello"));
/* Allocated off ctx */
ok1(talloc_find_parent_byname(a, "toplevel") == ctx);
talloc_free(a);
ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)",
&a, &b, invalid) == true);
ok1(streq(a, "hello"));
ok1(streq(b, "world"));
ok1(talloc_find_parent_byname(a, "toplevel") == ctx);
ok1(talloc_find_parent_byname(b, "toplevel") == ctx);
talloc_free(a);
talloc_free(b);
/* * after parentheses returns last match. */
ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)",
&a, &b, invalid) == true);
ok1(streq(a, "o"));
ok1(streq(b, "world"));
talloc_free(a);
talloc_free(b);
/* Nested parentheses are ordered by open brace. */
ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
&a, &b, invalid) == true);
ok1(streq(a, "hello world"));
ok1(streq(b, "hello"));
talloc_free(a);
talloc_free(b);
/* Nested parentheses are ordered by open brace. */
ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
&a, &b, invalid) == true);
ok1(streq(a, "hello world"));
ok1(streq(b, "hello"));
talloc_free(a);
talloc_free(b);
/* NULL means we're not interested. */
ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)",
&a, NULL, invalid) == true);
ok1(streq(a, "hello world"));
talloc_free(a);
/* No leaks! */
ok1(talloc_total_blocks(ctx) == top_blocks);
talloc_free(ctx);
talloc_disable_null_tracking();
return exit_status();
}
ALL_TOOLS = tools/configurator/configurator tools/ccan_depends tools/doc_extract tools/namespacize tools/ccanlint/ccanlint
DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o
DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/str/str.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o
.PHONY: tools
tools: $(ALL_TOOLS)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment