rpm/luaext/lrexlib.c

341 lines
7.5 KiB
C

/* lrexlib.c - POSIX & PCRE regular expression library */
/* POSIX regexs can use Spencer extensions for matching NULs if available
(REG_BASIC) */
/* Reuben Thomas nov00-06oct03 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lua.h"
#include "lauxlib.h"
#include "lrexlib.h"
/* Sanity check */
#if !defined(WITH_POSIX) && !defined(WITH_PCRE)
#error Define WITH_POSIX or WITH_PCRE, otherwise this library is useless!
#endif
/* POSIX regex methods */
#ifdef WITH_POSIX
#include <regex.h>
static int rex_comp(lua_State *L)
{
size_t l;
const char *pattern;
int res;
regex_t *pr = (regex_t *)lua_newuserdata(L, sizeof(regex_t));
pattern = luaL_checklstring(L, 1, &l);
#ifdef REG_BASIC
pr->re_endp = pattern + lua_strlen(L, 1);
res = regcomp(pr, pattern, REG_EXTENDED | REG_PEND);
#else
res = regcomp(pr, pattern, REG_EXTENDED);
#endif
if (res) {
size_t sz = regerror(res, pr, NULL, 0);
char errbuf[sz];
regerror(res, pr, errbuf, sz);
lua_pushstring(L, errbuf);
lua_error(L);
}
luaL_getmetatable(L, "regex_t");
lua_setmetatable(L, -2);
return 1;
}
static void rex_getargs(lua_State *L, size_t *len, size_t *ncapt,
const char **text, regex_t **pr, regmatch_t **match)
{
luaL_checkany(L, 1);
*pr = (regex_t *)lua_touserdata(L, 1);
#ifdef REG_BASIC
*text = luaL_checklstring(L, 2, len);
#else
*text = luaL_checklstring(L, 2, NULL);
#endif
*ncapt = (*pr)->re_nsub;
luaL_checkstack(L, *ncapt + 2, "too many captures");
*match = malloc((*ncapt + 1) * sizeof(regmatch_t));
}
static void rex_push_matches(lua_State *L, const char *text, regmatch_t *match,
size_t ncapt)
{
size_t i;
lua_newtable(L);
for (i = 1; i <= ncapt; i++) {
if (match[i].rm_so >= 0) {
lua_pushlstring(L, text + match[i].rm_so,
match[i].rm_eo - match[i].rm_so);
lua_rawseti(L, -2, i);
}
}
}
static int rex_match(lua_State *L)
{
int res;
#ifdef REG_BASIC
size_t len;
#endif
size_t ncapt;
const char *text;
regex_t *pr;
regmatch_t *match;
rex_getargs(L,
#ifdef REG_BASIC
&len,
#else
NULL,
#endif
&ncapt, &text, &pr, &match);
#ifdef REG_BASIC
match[0].rm_so = 0;
match[0].rm_eo = len;
res = regexec(pr, text, ncapt + 1, match, REG_STARTEND);
#else
res = regexec(pr, text, ncapt + 1, match, 0);
#endif
if (res == 0) {
lua_pushnumber(L, match[0].rm_so + 1);
lua_pushnumber(L, match[0].rm_eo);
rex_push_matches(L, text, match, ncapt);
lua_pushstring(L, "n");
lua_pushnumber(L, ncapt);
lua_rawset(L, -3);
return 3;
} else
return 0;
}
static int rex_gmatch(lua_State *L)
{
int res;
#ifdef REG_BASIC
size_t len;
#endif
size_t ncapt, nmatch = 0, maxmatch = 0, limit = 0;
const char *text;
regex_t *pr;
regmatch_t *match;
rex_getargs(L,
#ifdef REG_BASIC
&len,
#else
NULL,
#endif
&ncapt, &text, &pr, &match);
luaL_checktype(L, 3, LUA_TFUNCTION);
if (lua_gettop(L) > 3) {
maxmatch = (size_t)luaL_checknumber(L, 4);
limit = 1;
}
while (!limit || nmatch < maxmatch) {
#ifdef REG_BASIC
match[0].rm_so = 0;
match[0].rm_eo = len;
res = regexec(pr, text, ncapt + 1, match, REG_STARTEND);
#else
res = regexec(pr, text, ncapt + 1, match, 0);
#endif
if (res == 0) {
lua_pushvalue(L, 3);
lua_pushlstring(L, text + match[0].rm_so, match[0].rm_eo - match[0].rm_so);
rex_push_matches(L, text, match, ncapt);
lua_call(L, 2, 0);
text += match[0].rm_eo;
#ifdef REG_BASIC
len -= match[0].rm_eo;
#endif
nmatch++;
} else
break;
}
lua_pushnumber(L, nmatch);
return 1;
}
static int rex_gc (lua_State *L)
{
regex_t *r = (regex_t *)luaL_checkudata(L, 1, "regex_t");
if (r)
regfree(r);
return 0;
}
static const luaL_Reg rexmeta[] = {
{"match", rex_match},
{"gmatch", rex_gmatch},
{"__gc", rex_gc},
{NULL, NULL}
};
#endif /* WITH_POSIX */
/* PCRE methods */
#ifdef WITH_PCRE
#include <pcre/pcre.h>
static int pcre_comp(lua_State *L)
{
size_t l;
const char *pattern;
const char *error;
int erroffset;
pcre **ppr = (pcre **)lua_newuserdata(L, sizeof(pcre **));
pcre *pr;
pattern = luaL_checklstring(L, 1, &l);
pr = pcre_compile(pattern, 0, &error, &erroffset, NULL);
if (!pr) {
lua_pushstring(L, error);
lua_error(L);
}
*ppr = pr;
luaL_getmetatable(L, "pcre");
lua_setmetatable(L, -2);
return 1;
}
static void pcre_getargs(lua_State *L, int *len, int *ncapt, const char **text,
pcre ***ppr, int **match)
{
luaL_checkany(L, 1);
*ppr = (pcre **)lua_touserdata(L, 1);
*text = luaL_checklstring(L, 2, len);
pcre_fullinfo(**ppr, NULL, PCRE_INFO_CAPTURECOUNT, ncapt);
luaL_checkstack(L, *ncapt + 2, "too many captures");
/* need (2 ints per capture, plus one for substring match) * 3/2 */
*match = malloc((*ncapt + 1) * 3 * sizeof(int));
}
static void pcre_push_matches(lua_State *L, const char *text, int *match,
int ncapt)
{
int i;
lua_newtable(L);
for (i = 1; i <= ncapt; i++) {
if (match[i * 2] >= 0) {
lua_pushlstring(L, text + match[i * 2],
match[i * 2 + 1] - match[i * 2]);
lua_rawseti(L, -2, i);
}
}
}
static int pcre_match(lua_State *L)
{
int res;
const char *text;
pcre **ppr;
int *match;
int ncapt;
int len;
pcre_getargs(L, &len, &ncapt, &text, &ppr, &match);
res = pcre_exec(*ppr, NULL, text, len, 0, 0, match, (ncapt + 1) * 3);
if (res >= 0) {
lua_pushnumber(L, match[0] + 1);
lua_pushnumber(L, match[1]);
pcre_push_matches(L, text, match, ncapt);
lua_pushstring(L, "n");
lua_pushnumber(L, ncapt);
lua_rawset(L, -3);
return 3;
} else
return 0;
}
static int pcre_gmatch(lua_State *L)
{
int res;
const char *text;
int limit = 0;
int ncapt, nmatch = 0, maxmatch;
pcre **ppr;
int *match;
int len;
pcre_getargs(L, &len, &ncapt, &text, &ppr, &match);
luaL_checktype(L, 3, LUA_TFUNCTION);
if (lua_gettop(L) > 3) {
maxmatch = (int)luaL_checknumber(L, 4);
limit = 1;
}
while (!limit || nmatch < maxmatch) {
res = pcre_exec(*ppr, NULL, text, len, 0, 0, match, (ncapt + 1) * 3);
if (res == 0) {
lua_pushvalue(L, 3);
lua_pushlstring(L, text + match[0], match[1] - match[0]);
pcre_push_matches(L, text, match, ncapt);
lua_call(L, 2, 0);
text += match[1];
len -= match[1];
nmatch++;
} else
break;
}
lua_pushnumber(L, nmatch);
return 1;
}
static int pcre_gc (lua_State *L)
{
pcre **ppr = (pcre **)luaL_checkudata(L, 1, "pcre");
if (ppr)
pcre_free(*ppr);
return 0;
}
static const luaL_reg pcremeta[] = {
{"match", pcre_match},
{"gmatch", pcre_gmatch},
{"__gc", pcre_gc},
{NULL, NULL}
};
#endif /* defined(WITH_PCRE) */
/* Open the library */
static const luaL_Reg rexlib[] = {
#ifdef WITH_POSIX
{"newPOSIX", rex_comp},
#endif
#ifdef WITH_PCRE
{"newPCRE", pcre_comp},
#endif
{NULL, NULL}
};
static void createmeta(lua_State *L, const char *name)
{
luaL_newmetatable(L, name); /* create new metatable */
lua_pushliteral(L, "__index");
lua_pushvalue(L, -2); /* push metatable */
lua_rawset(L, -3); /* metatable.__index = metatable */
}
LUALIB_API int luaopen_rex(lua_State *L)
{
#ifdef WITH_POSIX
createmeta(L, "regex_t");
luaL_setfuncs(L, rexmeta, 0);
lua_pop(L, 1);
#endif
#ifdef WITH_PCRE
createmeta(L, "pcre");
luaL_setfuncs(L, pcremeta, 0);
lua_pop(L, 1);
#endif
luaL_newlib(L, rexlib);
return 1;
}