common: Add a lightweight OCaml binding for PCRE.

This uses the gnulib TLS macros.
This commit is contained in:
Richard W.M. Jones
2017-08-01 12:58:33 +01:00
parent ff83a88b4c
commit f33db99edf
12 changed files with 577 additions and 0 deletions

2
.gitignore vendored
View File

@@ -123,6 +123,8 @@ Makefile.in
/common/errnostring/errnostring-gperf.gperf
/common/errnostring/errnostring.h
/common/miniexpect/miniexpect.3
/common/mlpcre/.depend
/common/mlpcre/pcre_tests
/common/mlprogress/.depend
/common/mlstdutils/.depend
/common/mlstdutils/bytes.ml

View File

@@ -156,6 +156,7 @@ SUBDIRS += csharp
# OCaml tools. Note 'common/ml*', 'mllib' and 'customize' contain
# shared code used by other OCaml tools, so these must come first.
if HAVE_OCAML
SUBDIRS += common/mlpcre
SUBDIRS += common/mlprogress
SUBDIRS += common/mlvisit
SUBDIRS += common/mlxml

View File

@@ -95,6 +95,7 @@ symlinkat
sys_select
sys_types
sys_wait
tls
vasprintf
vc-list-files
warnings

142
common/mlpcre/Makefile.am Normal file
View File

@@ -0,0 +1,142 @@
# Bindings for Perl-compatible Regular Expressions.
# Copyright (C) 2017 Red Hat Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
include $(top_srcdir)/subdir-rules.mk
EXTRA_DIST = \
$(SOURCES_MLI) \
$(SOURCES_ML) \
$(SOURCES_C) \
pcre_tests.ml
SOURCES_MLI = \
PCRE.mli
SOURCES_ML = \
PCRE.ml
SOURCES_C = \
pcre-c.c
if HAVE_OCAML
# We pretend that we're building a C library. automake handles the
# compilation of the C sources for us. At the end we take the C
# objects and OCaml objects and link them into the OCaml library.
# This C library is never used.
noinst_LIBRARIES = libmlpcre.a
if !HAVE_OCAMLOPT
MLPCRE_CMA = mlpcre.cma
else
MLPCRE_CMA = mlpcre.cmxa
endif
noinst_DATA = $(MLPCRE_CMA)
libmlpcre_a_SOURCES = $(SOURCES_C)
libmlpcre_a_CPPFLAGS = \
-I. \
-I$(top_builddir) \
-I$(top_srcdir)/gnulib/lib -I$(top_builddir)/gnulib/lib \
-I$(top_srcdir)/common/utils -I$(top_builddir)/common/utils \
-I$(shell $(OCAMLC) -where)
libmlpcre_a_CFLAGS = \
$(WARN_CFLAGS) $(WERROR_CFLAGS) \
-fPIC
BOBJECTS = $(SOURCES_ML:.ml=.cmo)
XOBJECTS = $(BOBJECTS:.cmo=.cmx)
# -I $(top_builddir)/lib/.libs is a hack which forces corresponding -L
# option to be passed to gcc, so we don't try linking against an
# installed copy of libguestfs.
OCAMLPACKAGES = \
-I $(top_builddir)/gnulib/lib/.libs \
-I $(top_builddir)/common/utils/.libs \
-I $(builddir)
OCAMLPACKAGES_TESTS = $(MLPCRE_CMA)
OCAMLFLAGS = $(OCAML_FLAGS) $(OCAML_WARN_ERROR)
if !HAVE_OCAMLOPT
OBJECTS = $(BOBJECTS)
else
OBJECTS = $(XOBJECTS)
endif
libmlpcre_a_DEPENDENCIES = $(OBJECTS)
$(MLPCRE_CMA): $(OBJECTS) libmlpcre.a
$(OCAMLFIND) mklib $(OCAMLPACKAGES) \
$(OBJECTS) $(libmlpcre_a_OBJECTS) -cclib -lpcre -o mlpcre
# Tests.
pcre_tests_SOURCES = dummy.c
pcre_tests_BOBJECTS = pcre_tests.cmo
pcre_tests_XOBJECTS = $(pcre_tests_BOBJECTS:.cmo=.cmx)
# Can't call the following as <test>_OBJECTS because automake gets confused.
if !HAVE_OCAMLOPT
pcre_tests_THEOBJECTS = $(pcre_tests_BOBJECTS)
pcre_tests.cmo: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS)
else
pcre_tests_THEOBJECTS = $(pcre_tests_XOBJECTS)
pcre_tests.cmx: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS)
endif
OCAMLLINKFLAGS = $(LINK_CUSTOM_OCAMLC_ONLY)
pcre_tests_DEPENDENCIES = \
$(pcre_tests_THEOBJECTS) \
$(MLPCRE_CMA) \
$(top_srcdir)/ocaml-link.sh
pcre_tests_LINK = \
$(top_srcdir)/ocaml-link.sh \
-cclib '-lutils -lpcre -lgnu' -- \
$(OCAMLFIND) $(BEST) $(OCAMLFLAGS) $(OCAMLLINKFLAGS) \
$(OCAMLPACKAGES) $(OCAMLPACKAGES_TESTS) \
$(pcre_tests_THEOBJECTS) -o $@
TESTS_ENVIRONMENT = $(top_builddir)/run --test
LOG_COMPILER = $(VG)
check_PROGRAMS = pcre_tests
TESTS = pcre_tests
check-valgrind:
$(MAKE) VG="@VG@" check
# Dependencies.
depend: .depend
.depend: $(wildcard $(abs_srcdir)/*.mli) $(wildcard $(abs_srcdir)/*.ml)
rm -f $@ $@-t
$(OCAMLFIND) ocamldep -I ../../ocaml -I $(abs_srcdir) $^ | \
$(SED) 's/ *$$//' | \
$(SED) -e :a -e '/ *\\$$/N; s/ *\\\n */ /; ta' | \
$(SED) -e 's,$(abs_srcdir)/,$(builddir)/,g' | \
sort > $@-t
mv $@-t $@
-include .depend
endif
.PHONY: depend docs

32
common/mlpcre/PCRE.ml Normal file
View File

@@ -0,0 +1,32 @@
(* Bindings for Perl-compatible Regular Expressions.
* Copyright (C) 2017 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*)
(* Lightweight bindings for the PCRE library. *)
exception Error of string * int
type regexp
external compile : string -> regexp = "guestfs_int_pcre_compile"
external matches : regexp -> string -> bool = "guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
let () =
Callback.register_exception "PCRE.Error" (Error ("", 0))

79
common/mlpcre/PCRE.mli Normal file
View File

@@ -0,0 +1,79 @@
(* Bindings for Perl-compatible Regular Expressions.
* Copyright (C) 2017 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*)
(** Lightweight bindings for the PCRE library.
Note this is {i not} Markus Mottl's ocaml-pcre, and doesn't
work like that library.
To match a regular expression:
{v
let re = PCRE.compile "(a+)b"
...
if PCRE.matches re "ccaaaabb" then (
let whole = PCRE.sub 0 in (* returns "aaaab" *)
let first = PCRE.sub 1 in (* returns "aaaa" *)
...
)
v}
Note that there is implicit global state stored between the
call to {!matches} and {!sub}. This is stored in thread
local storage so it is safe provided there are no other calls
to {!matches} in the same thread.
*)
exception Error of string * int
(** PCRE error raised by various functions.
The string is the printable error message.
The integer is one of the negative [PCRE_*] error codes
(see pcreapi(3) for a full list), {i or} one of the positive
error codes from [pcre_compile2]. It may also be 0 if there
was no error code information. *)
type regexp
(** The type of a compiled regular expression. *)
val compile : string -> regexp
(** Compile a regular expression. This can raise {!Error}. *)
val matches : regexp -> string -> bool
(** Test whether the regular expression matches the string. This
returns true if the regexp matches or false otherwise.
This also saves any matched substrings in thread-local storage
until either the next call to {!matches} in the current thread
or the thread/program exits. You can call {!sub} to return
these substrings.
This can raise {!Error} if PCRE returns an error. *)
val sub : int -> string
(** Return the nth substring (capture) matched by the previous call
to {!matches} in the current thread.
If [n == 0] it returns the whole matching part of the string.
If [n >= 1] it returns the nth substring.
If there was no nth substring then this raises [Not_found].
This can also raise {!Error} for other PCRE-related errors. *)

2
common/mlpcre/dummy.c Normal file
View File

@@ -0,0 +1,2 @@
/* Dummy source, to be used for OCaml-based tools with no C sources. */
enum { foo = 1 };

224
common/mlpcre/pcre-c.c Normal file
View File

@@ -0,0 +1,224 @@
/* Bindings for Perl-compatible Regular Expressions.
* Copyright (C) 2017 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <pcre.h>
#include <caml/alloc.h>
#include <caml/callback.h>
#include <caml/custom.h>
#include <caml/fail.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include "cleanups.h"
#include "glthread/tls.h"
#pragma GCC diagnostic ignored "-Wmissing-prototypes"
/* Data on the most recent match is stored in this thread-local
* variable. It is freed either by the next call to PCRE.matches or
* by (clean) thread exit.
*/
static gl_tls_key_t last_match;
struct last_match {
char *subject; /* subject string */
int *vec; /* vector containing match offsets */
int r; /* value returned by pcre_exec */
};
static void
free_last_match (struct last_match *data)
{
if (data) {
free (data->subject);
free (data->vec);
free (data);
}
}
static void init (void) __attribute__((constructor));
static void
init (void)
{
gl_tls_key_init (last_match, (void (*) (void *))free_last_match);
}
/* Raises PCRE.error (msg, errcode). */
static void
raise_pcre_error (const char *msg, int errcode)
{
value *exn = caml_named_value ("PCRE.Error");
value args[2];
args[0] = caml_copy_string (msg);
args[1] = Val_int (errcode);
caml_raise_with_args (*exn, 2, args);
}
/* Wrap and unwrap pcre regular expression handles, with a finalizer. */
#define Regexp_val(rv) (*(pcre **)Data_custom_val(rv))
static void
regexp_finalize (value rev)
{
pcre *re = Regexp_val (rev);
if (re) pcre_free (re);
}
static struct custom_operations custom_operations = {
(char *) "pcre_custom_operations",
regexp_finalize,
custom_compare_default,
custom_hash_default,
custom_serialize_default,
custom_deserialize_default
};
static value
Val_regexp (pcre *re)
{
CAMLparam0 ();
CAMLlocal1 (rv);
rv = caml_alloc_custom (&custom_operations, sizeof (pcre *), 0, 1);
Regexp_val (rv) = re;
CAMLreturn (rv);
}
value
guestfs_int_pcre_compile (value pattv)
{
CAMLparam1 (pattv);
pcre *re;
int errcode = 0;
const char *err;
int offset;
re = pcre_compile2 (String_val (pattv), 0, &errcode, &err, &offset, NULL);
if (re == NULL)
raise_pcre_error (err, errcode);
CAMLreturn (Val_regexp (re));
}
value
guestfs_int_pcre_matches (value rev, value strv)
{
CAMLparam2 (rev, strv);
pcre *re = Regexp_val (rev);
struct last_match *m, *oldm;
size_t len = caml_string_length (strv);
int capcount, r;
int veclen;
/* Calculate maximum number of substrings, and hence the vector
* length required.
*/
r = pcre_fullinfo (re, NULL, PCRE_INFO_CAPTURECOUNT, (int *) &capcount);
/* I believe that errors should never occur because of OCaml
* type safety, so we should abort here. If this ever happens
* we will need to look at it again.
*/
assert (r == 0);
veclen = 3 * (1 + capcount);
m = calloc (1, sizeof *m);
if (m == NULL)
caml_raise_out_of_memory ();
/* We will need the original subject string when fetching
* substrings, so take a copy.
*/
m->subject = malloc (len+1);
if (m->subject == NULL) {
free_last_match (m);
caml_raise_out_of_memory ();
}
memcpy (m->subject, String_val (strv), len+1);
m->vec = malloc (veclen * sizeof (int));
if (m->vec == NULL) {
free_last_match (m);
caml_raise_out_of_memory ();
}
m->r = pcre_exec (re, NULL, m->subject, len, 0, 0, m->vec, veclen);
if (m->r < 0 && m->r != PCRE_ERROR_NOMATCH) {
free_last_match (m);
raise_pcre_error ("pcre_exec", m->r);
}
/* This error would indicate that pcre_exec ran out of space in the
* vector. However if we are calculating the size of the vector
* correctly above, then this should never happen.
*/
assert (m->r != 0);
r = m->r != PCRE_ERROR_NOMATCH;
/* Replace the old TLS match data, but only if we're going
* to return a match.
*/
if (r) {
oldm = gl_tls_get (last_match);
free_last_match (oldm);
gl_tls_set (last_match, m);
}
else
free_last_match (m);
CAMLreturn (r ? Val_true : Val_false);
}
value
guestfs_int_pcre_sub (value nv)
{
CAMLparam1 (nv);
CAMLlocal1 (strv);
int len;
CLEANUP_FREE char *str = NULL;
struct last_match *m = gl_tls_get (last_match);
if (m == NULL)
raise_pcre_error ("PCRE.sub called without calling PCRE.matches", 0);
len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv),
(const char **) &str);
if (len == PCRE_ERROR_NOSUBSTRING)
caml_raise_not_found ();
if (len < 0)
raise_pcre_error ("pcre_get_substring", len);
strv = caml_alloc_string (len);
memcpy (String_val (strv), str, len);
CAMLreturn (strv);
}

View File

@@ -0,0 +1,86 @@
(* Test bindings for Perl-compatible Regular Expressions.
* Copyright (C) 2017 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*)
open Printf
let compile patt =
eprintf "PCRE.compile %s\n%!" patt;
PCRE.compile patt
let matches re str =
eprintf "PCRE.matches %s ->%!" str;
let r = PCRE.matches re str in
eprintf " %b\n%!" r;
r
let sub i =
eprintf "PCRE.sub %d ->%!" i;
let r = PCRE.sub i in
eprintf " %s\n%!" r;
r
let () =
try
let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
assert (matches re0 "aaa" = false);
assert (matches re0 "xyz" = false);
assert (matches re0 "aaabc" = true);
assert (sub 0 = "aaab");
assert (matches re1 "ccaaabb" = true);
assert (sub 1 = "aaa");
assert (sub 0 = "aaab");
assert (matches re2 "ccabbc" = true);
assert (sub 1 = "a");
assert (sub 2 = "bb");
assert (sub 0 = "abb");
assert (matches re2 "ccac" = true);
assert (sub 1 = "a");
assert (sub 2 = "");
assert (sub 0 = "a")
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
| PCRE.Error (msg, code) ->
failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg code)
(* Compile some bad regexps and check that an exception is thrown.
* It would be nice to check the error message is right but
* that involves dealing with language and future changes of
* PCRE error codes.
*)
let () =
List.iter (
fun patt ->
let msg, code =
try ignore (PCRE.compile patt); assert false
with PCRE.Error (m, c) -> m, c in
eprintf "patt: %s -> exception: %s (%d)\n%!" patt msg code
) [ "("; ")"; "+"; "*"; "(abc" ]
let () = Gc.compact ()

View File

@@ -196,6 +196,7 @@ AC_CONFIG_FILES([Makefile
common/errnostring/Makefile
common/edit/Makefile
common/miniexpect/Makefile
common/mlpcre/Makefile
common/mlprogress/Makefile
common/mlstdutils/Makefile
common/mlstdutils/guestfs_config.ml

View File

@@ -100,6 +100,12 @@ A copy of the miniexpect library from
L<http://git.annexia.org/?p=miniexpect.git;a=summary>. This is used
in virt-p2v.
=item F<common/mlpcre>
Lightweight OCaml bindings for Perl Compatible Regular Expressions
(PCRE). Note this is not related in any way to Markus Mottl's
ocaml-pcre library.
=item F<common/mlprogress>
OCaml bindings for the progress bar functions (see F<common/progress>).

1
m4/.gitignore vendored
View File

@@ -248,6 +248,7 @@
/thread.m4
/time_h.m4
/timespec.m4
/tls.m4
/ttyname_r.m4
/ulonglong.m4
/ungetc.m4