341 lines
7.5 KiB
C
341 lines
7.5 KiB
C
/* lrexlib.c - POSIX & PCRE regular expression library */
|
|
/* POSIX regexs can use Spencer extensions for matching NULs if available
|
|
(REG_BASIC) */
|
|
/* Reuben Thomas nov00-06oct03 */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "lua.h"
|
|
#include "lauxlib.h"
|
|
#include "lrexlib.h"
|
|
|
|
|
|
/* Sanity check */
|
|
#if !defined(WITH_POSIX) && !defined(WITH_PCRE)
|
|
#error Define WITH_POSIX or WITH_PCRE, otherwise this library is useless!
|
|
#endif
|
|
|
|
|
|
/* POSIX regex methods */
|
|
|
|
#ifdef WITH_POSIX
|
|
|
|
#include <regex.h>
|
|
|
|
static int rex_comp(lua_State *L)
|
|
{
|
|
size_t l;
|
|
const char *pattern;
|
|
int res;
|
|
regex_t *pr = (regex_t *)lua_newuserdata(L, sizeof(regex_t));
|
|
pattern = luaL_checklstring(L, 1, &l);
|
|
#ifdef REG_BASIC
|
|
pr->re_endp = pattern + lua_strlen(L, 1);
|
|
res = regcomp(pr, pattern, REG_EXTENDED | REG_PEND);
|
|
#else
|
|
res = regcomp(pr, pattern, REG_EXTENDED);
|
|
#endif
|
|
if (res) {
|
|
size_t sz = regerror(res, pr, NULL, 0);
|
|
char errbuf[sz];
|
|
regerror(res, pr, errbuf, sz);
|
|
lua_pushstring(L, errbuf);
|
|
lua_error(L);
|
|
}
|
|
luaL_getmetatable(L, "regex_t");
|
|
lua_setmetatable(L, -2);
|
|
return 1;
|
|
}
|
|
|
|
static void rex_getargs(lua_State *L, size_t *len, size_t *ncapt,
|
|
const char **text, regex_t **pr, regmatch_t **match)
|
|
{
|
|
luaL_checkany(L, 1);
|
|
*pr = (regex_t *)lua_touserdata(L, 1);
|
|
#ifdef REG_BASIC
|
|
*text = luaL_checklstring(L, 2, len);
|
|
#else
|
|
*text = luaL_checklstring(L, 2, NULL);
|
|
#endif
|
|
*ncapt = (*pr)->re_nsub;
|
|
luaL_checkstack(L, *ncapt + 2, "too many captures");
|
|
*match = malloc((*ncapt + 1) * sizeof(regmatch_t));
|
|
}
|
|
|
|
static void rex_push_matches(lua_State *L, const char *text, regmatch_t *match,
|
|
size_t ncapt)
|
|
{
|
|
size_t i;
|
|
lua_newtable(L);
|
|
for (i = 1; i <= ncapt; i++) {
|
|
if (match[i].rm_so >= 0) {
|
|
lua_pushlstring(L, text + match[i].rm_so,
|
|
match[i].rm_eo - match[i].rm_so);
|
|
lua_rawseti(L, -2, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int rex_match(lua_State *L)
|
|
{
|
|
int res;
|
|
#ifdef REG_BASIC
|
|
size_t len;
|
|
#endif
|
|
size_t ncapt;
|
|
const char *text;
|
|
regex_t *pr;
|
|
regmatch_t *match;
|
|
rex_getargs(L,
|
|
#ifdef REG_BASIC
|
|
&len,
|
|
#else
|
|
NULL,
|
|
#endif
|
|
&ncapt, &text, &pr, &match);
|
|
#ifdef REG_BASIC
|
|
match[0].rm_so = 0;
|
|
match[0].rm_eo = len;
|
|
res = regexec(pr, text, ncapt + 1, match, REG_STARTEND);
|
|
#else
|
|
res = regexec(pr, text, ncapt + 1, match, 0);
|
|
#endif
|
|
if (res == 0) {
|
|
lua_pushnumber(L, match[0].rm_so + 1);
|
|
lua_pushnumber(L, match[0].rm_eo);
|
|
rex_push_matches(L, text, match, ncapt);
|
|
lua_pushstring(L, "n");
|
|
lua_pushnumber(L, ncapt);
|
|
lua_rawset(L, -3);
|
|
return 3;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
static int rex_gmatch(lua_State *L)
|
|
{
|
|
int res;
|
|
#ifdef REG_BASIC
|
|
size_t len;
|
|
#endif
|
|
size_t ncapt, nmatch = 0, maxmatch = 0, limit = 0;
|
|
const char *text;
|
|
regex_t *pr;
|
|
regmatch_t *match;
|
|
rex_getargs(L,
|
|
#ifdef REG_BASIC
|
|
&len,
|
|
#else
|
|
NULL,
|
|
#endif
|
|
&ncapt, &text, &pr, &match);
|
|
luaL_checktype(L, 3, LUA_TFUNCTION);
|
|
if (lua_gettop(L) > 3) {
|
|
maxmatch = (size_t)luaL_checknumber(L, 4);
|
|
limit = 1;
|
|
}
|
|
while (!limit || nmatch < maxmatch) {
|
|
#ifdef REG_BASIC
|
|
match[0].rm_so = 0;
|
|
match[0].rm_eo = len;
|
|
res = regexec(pr, text, ncapt + 1, match, REG_STARTEND);
|
|
#else
|
|
res = regexec(pr, text, ncapt + 1, match, 0);
|
|
#endif
|
|
if (res == 0) {
|
|
lua_pushvalue(L, 3);
|
|
lua_pushlstring(L, text + match[0].rm_so, match[0].rm_eo - match[0].rm_so);
|
|
rex_push_matches(L, text, match, ncapt);
|
|
lua_call(L, 2, 0);
|
|
text += match[0].rm_eo;
|
|
#ifdef REG_BASIC
|
|
len -= match[0].rm_eo;
|
|
#endif
|
|
nmatch++;
|
|
} else
|
|
break;
|
|
}
|
|
lua_pushnumber(L, nmatch);
|
|
return 1;
|
|
}
|
|
|
|
static int rex_gc (lua_State *L)
|
|
{
|
|
regex_t *r = (regex_t *)luaL_checkudata(L, 1, "regex_t");
|
|
if (r)
|
|
regfree(r);
|
|
return 0;
|
|
}
|
|
|
|
static const luaL_Reg rexmeta[] = {
|
|
{"match", rex_match},
|
|
{"gmatch", rex_gmatch},
|
|
{"__gc", rex_gc},
|
|
{NULL, NULL}
|
|
};
|
|
|
|
#endif /* WITH_POSIX */
|
|
|
|
|
|
/* PCRE methods */
|
|
|
|
#ifdef WITH_PCRE
|
|
|
|
#include <pcre/pcre.h>
|
|
|
|
static int pcre_comp(lua_State *L)
|
|
{
|
|
size_t l;
|
|
const char *pattern;
|
|
const char *error;
|
|
int erroffset;
|
|
pcre **ppr = (pcre **)lua_newuserdata(L, sizeof(pcre **));
|
|
pcre *pr;
|
|
pattern = luaL_checklstring(L, 1, &l);
|
|
pr = pcre_compile(pattern, 0, &error, &erroffset, NULL);
|
|
if (!pr) {
|
|
lua_pushstring(L, error);
|
|
lua_error(L);
|
|
}
|
|
*ppr = pr;
|
|
luaL_getmetatable(L, "pcre");
|
|
lua_setmetatable(L, -2);
|
|
return 1;
|
|
}
|
|
|
|
static void pcre_getargs(lua_State *L, int *len, int *ncapt, const char **text,
|
|
pcre ***ppr, int **match)
|
|
{
|
|
luaL_checkany(L, 1);
|
|
*ppr = (pcre **)lua_touserdata(L, 1);
|
|
*text = luaL_checklstring(L, 2, len);
|
|
pcre_fullinfo(**ppr, NULL, PCRE_INFO_CAPTURECOUNT, ncapt);
|
|
luaL_checkstack(L, *ncapt + 2, "too many captures");
|
|
/* need (2 ints per capture, plus one for substring match) * 3/2 */
|
|
*match = malloc((*ncapt + 1) * 3 * sizeof(int));
|
|
}
|
|
|
|
static void pcre_push_matches(lua_State *L, const char *text, int *match,
|
|
int ncapt)
|
|
{
|
|
int i;
|
|
lua_newtable(L);
|
|
for (i = 1; i <= ncapt; i++) {
|
|
if (match[i * 2] >= 0) {
|
|
lua_pushlstring(L, text + match[i * 2],
|
|
match[i * 2 + 1] - match[i * 2]);
|
|
lua_rawseti(L, -2, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int pcre_match(lua_State *L)
|
|
{
|
|
int res;
|
|
const char *text;
|
|
pcre **ppr;
|
|
int *match;
|
|
int ncapt;
|
|
int len;
|
|
pcre_getargs(L, &len, &ncapt, &text, &ppr, &match);
|
|
res = pcre_exec(*ppr, NULL, text, len, 0, 0, match, (ncapt + 1) * 3);
|
|
if (res >= 0) {
|
|
lua_pushnumber(L, match[0] + 1);
|
|
lua_pushnumber(L, match[1]);
|
|
pcre_push_matches(L, text, match, ncapt);
|
|
lua_pushstring(L, "n");
|
|
lua_pushnumber(L, ncapt);
|
|
lua_rawset(L, -3);
|
|
return 3;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
static int pcre_gmatch(lua_State *L)
|
|
{
|
|
int res;
|
|
const char *text;
|
|
int limit = 0;
|
|
int ncapt, nmatch = 0, maxmatch;
|
|
pcre **ppr;
|
|
int *match;
|
|
int len;
|
|
pcre_getargs(L, &len, &ncapt, &text, &ppr, &match);
|
|
luaL_checktype(L, 3, LUA_TFUNCTION);
|
|
if (lua_gettop(L) > 3) {
|
|
maxmatch = (int)luaL_checknumber(L, 4);
|
|
limit = 1;
|
|
}
|
|
while (!limit || nmatch < maxmatch) {
|
|
res = pcre_exec(*ppr, NULL, text, len, 0, 0, match, (ncapt + 1) * 3);
|
|
if (res == 0) {
|
|
lua_pushvalue(L, 3);
|
|
lua_pushlstring(L, text + match[0], match[1] - match[0]);
|
|
pcre_push_matches(L, text, match, ncapt);
|
|
lua_call(L, 2, 0);
|
|
text += match[1];
|
|
len -= match[1];
|
|
nmatch++;
|
|
} else
|
|
break;
|
|
}
|
|
lua_pushnumber(L, nmatch);
|
|
return 1;
|
|
}
|
|
|
|
static int pcre_gc (lua_State *L)
|
|
{
|
|
pcre **ppr = (pcre **)luaL_checkudata(L, 1, "pcre");
|
|
if (ppr)
|
|
pcre_free(*ppr);
|
|
return 0;
|
|
}
|
|
|
|
static const luaL_reg pcremeta[] = {
|
|
{"match", pcre_match},
|
|
{"gmatch", pcre_gmatch},
|
|
{"__gc", pcre_gc},
|
|
{NULL, NULL}
|
|
};
|
|
|
|
#endif /* defined(WITH_PCRE) */
|
|
|
|
|
|
/* Open the library */
|
|
|
|
static const luaL_Reg rexlib[] = {
|
|
#ifdef WITH_POSIX
|
|
{"newPOSIX", rex_comp},
|
|
#endif
|
|
#ifdef WITH_PCRE
|
|
{"newPCRE", pcre_comp},
|
|
#endif
|
|
{NULL, NULL}
|
|
};
|
|
|
|
static void createmeta(lua_State *L, const char *name)
|
|
{
|
|
luaL_newmetatable(L, name); /* create new metatable */
|
|
lua_pushliteral(L, "__index");
|
|
lua_pushvalue(L, -2); /* push metatable */
|
|
lua_rawset(L, -3); /* metatable.__index = metatable */
|
|
}
|
|
|
|
LUALIB_API int luaopen_rex(lua_State *L)
|
|
{
|
|
#ifdef WITH_POSIX
|
|
createmeta(L, "regex_t");
|
|
luaL_setfuncs(L, rexmeta, 0);
|
|
lua_pop(L, 1);
|
|
#endif
|
|
#ifdef WITH_PCRE
|
|
createmeta(L, "pcre");
|
|
luaL_setfuncs(L, pcremeta, 0);
|
|
lua_pop(L, 1);
|
|
#endif
|
|
luaL_newlib(L, rexlib);
|
|
return 1;
|
|
}
|