diff --git a/.gitignore b/.gitignore index 4699933d3..0e7a649f8 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,8 @@ Makefile.in /common/errnostring/errnostring-gperf.gperf /common/errnostring/errnostring.h /common/miniexpect/miniexpect.3 +/common/mlpcre/.depend +/common/mlpcre/pcre_tests /common/mlprogress/.depend /common/mlstdutils/.depend /common/mlstdutils/bytes.ml diff --git a/Makefile.am b/Makefile.am index 84b00393d..fb4c99db5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -156,6 +156,7 @@ SUBDIRS += csharp # OCaml tools. Note 'common/ml*', 'mllib' and 'customize' contain # shared code used by other OCaml tools, so these must come first. if HAVE_OCAML +SUBDIRS += common/mlpcre SUBDIRS += common/mlprogress SUBDIRS += common/mlvisit SUBDIRS += common/mlxml diff --git a/bootstrap b/bootstrap index 77a95a25b..4e3d4bc51 100755 --- a/bootstrap +++ b/bootstrap @@ -95,6 +95,7 @@ symlinkat sys_select sys_types sys_wait +tls vasprintf vc-list-files warnings diff --git a/common/mlpcre/Makefile.am b/common/mlpcre/Makefile.am new file mode 100644 index 000000000..aa638cd94 --- /dev/null +++ b/common/mlpcre/Makefile.am @@ -0,0 +1,142 @@ +# Bindings for Perl-compatible Regular Expressions. +# Copyright (C) 2017 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +include $(top_srcdir)/subdir-rules.mk + +EXTRA_DIST = \ + $(SOURCES_MLI) \ + $(SOURCES_ML) \ + $(SOURCES_C) \ + pcre_tests.ml + +SOURCES_MLI = \ + PCRE.mli + +SOURCES_ML = \ + PCRE.ml + +SOURCES_C = \ + pcre-c.c + +if HAVE_OCAML + +# We pretend that we're building a C library. automake handles the +# compilation of the C sources for us. At the end we take the C +# objects and OCaml objects and link them into the OCaml library. +# This C library is never used. + +noinst_LIBRARIES = libmlpcre.a + +if !HAVE_OCAMLOPT +MLPCRE_CMA = mlpcre.cma +else +MLPCRE_CMA = mlpcre.cmxa +endif + +noinst_DATA = $(MLPCRE_CMA) + +libmlpcre_a_SOURCES = $(SOURCES_C) +libmlpcre_a_CPPFLAGS = \ + -I. \ + -I$(top_builddir) \ + -I$(top_srcdir)/gnulib/lib -I$(top_builddir)/gnulib/lib \ + -I$(top_srcdir)/common/utils -I$(top_builddir)/common/utils \ + -I$(shell $(OCAMLC) -where) +libmlpcre_a_CFLAGS = \ + $(WARN_CFLAGS) $(WERROR_CFLAGS) \ + -fPIC + +BOBJECTS = $(SOURCES_ML:.ml=.cmo) +XOBJECTS = $(BOBJECTS:.cmo=.cmx) + +# -I $(top_builddir)/lib/.libs is a hack which forces corresponding -L +# option to be passed to gcc, so we don't try linking against an +# installed copy of libguestfs. +OCAMLPACKAGES = \ + -I $(top_builddir)/gnulib/lib/.libs \ + -I $(top_builddir)/common/utils/.libs \ + -I $(builddir) +OCAMLPACKAGES_TESTS = $(MLPCRE_CMA) + +OCAMLFLAGS = $(OCAML_FLAGS) $(OCAML_WARN_ERROR) + +if !HAVE_OCAMLOPT +OBJECTS = $(BOBJECTS) +else +OBJECTS = $(XOBJECTS) +endif + +libmlpcre_a_DEPENDENCIES = $(OBJECTS) + +$(MLPCRE_CMA): $(OBJECTS) libmlpcre.a + $(OCAMLFIND) mklib $(OCAMLPACKAGES) \ + $(OBJECTS) $(libmlpcre_a_OBJECTS) -cclib -lpcre -o mlpcre + +# Tests. + +pcre_tests_SOURCES = dummy.c +pcre_tests_BOBJECTS = pcre_tests.cmo +pcre_tests_XOBJECTS = $(pcre_tests_BOBJECTS:.cmo=.cmx) + +# Can't call the following as _OBJECTS because automake gets confused. +if !HAVE_OCAMLOPT +pcre_tests_THEOBJECTS = $(pcre_tests_BOBJECTS) +pcre_tests.cmo: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS) +else +pcre_tests_THEOBJECTS = $(pcre_tests_XOBJECTS) +pcre_tests.cmx: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS) +endif + +OCAMLLINKFLAGS = $(LINK_CUSTOM_OCAMLC_ONLY) + +pcre_tests_DEPENDENCIES = \ + $(pcre_tests_THEOBJECTS) \ + $(MLPCRE_CMA) \ + $(top_srcdir)/ocaml-link.sh +pcre_tests_LINK = \ + $(top_srcdir)/ocaml-link.sh \ + -cclib '-lutils -lpcre -lgnu' -- \ + $(OCAMLFIND) $(BEST) $(OCAMLFLAGS) $(OCAMLLINKFLAGS) \ + $(OCAMLPACKAGES) $(OCAMLPACKAGES_TESTS) \ + $(pcre_tests_THEOBJECTS) -o $@ + +TESTS_ENVIRONMENT = $(top_builddir)/run --test +LOG_COMPILER = $(VG) + +check_PROGRAMS = pcre_tests +TESTS = pcre_tests + +check-valgrind: + $(MAKE) VG="@VG@" check + +# Dependencies. +depend: .depend + +.depend: $(wildcard $(abs_srcdir)/*.mli) $(wildcard $(abs_srcdir)/*.ml) + rm -f $@ $@-t + $(OCAMLFIND) ocamldep -I ../../ocaml -I $(abs_srcdir) $^ | \ + $(SED) 's/ *$$//' | \ + $(SED) -e :a -e '/ *\\$$/N; s/ *\\\n */ /; ta' | \ + $(SED) -e 's,$(abs_srcdir)/,$(builddir)/,g' | \ + sort > $@-t + mv $@-t $@ + +-include .depend + +endif + +.PHONY: depend docs diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml new file mode 100644 index 000000000..94eea4b34 --- /dev/null +++ b/common/mlpcre/PCRE.ml @@ -0,0 +1,32 @@ +(* Bindings for Perl-compatible Regular Expressions. + * Copyright (C) 2017 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +(* Lightweight bindings for the PCRE library. *) + +exception Error of string * int + +type regexp + +external compile : string -> regexp = "guestfs_int_pcre_compile" + +external matches : regexp -> string -> bool = "guestfs_int_pcre_matches" + +external sub : int -> string = "guestfs_int_pcre_sub" + +let () = + Callback.register_exception "PCRE.Error" (Error ("", 0)) diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli new file mode 100644 index 000000000..331a50a9a --- /dev/null +++ b/common/mlpcre/PCRE.mli @@ -0,0 +1,79 @@ +(* Bindings for Perl-compatible Regular Expressions. + * Copyright (C) 2017 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +(** Lightweight bindings for the PCRE library. + + Note this is {i not} Markus Mottl's ocaml-pcre, and doesn't + work like that library. + + To match a regular expression: + +{v +let re = PCRE.compile "(a+)b" +... + +if PCRE.matches re "ccaaaabb" then ( + let whole = PCRE.sub 0 in (* returns "aaaab" *) + let first = PCRE.sub 1 in (* returns "aaaa" *) + ... +) +v} + + Note that there is implicit global state stored between the + call to {!matches} and {!sub}. This is stored in thread + local storage so it is safe provided there are no other calls + to {!matches} in the same thread. +*) + +exception Error of string * int +(** PCRE error raised by various functions. + + The string is the printable error message. + + The integer is one of the negative [PCRE_*] error codes + (see pcreapi(3) for a full list), {i or} one of the positive + error codes from [pcre_compile2]. It may also be 0 if there + was no error code information. *) + +type regexp +(** The type of a compiled regular expression. *) + +val compile : string -> regexp +(** Compile a regular expression. This can raise {!Error}. *) + +val matches : regexp -> string -> bool +(** Test whether the regular expression matches the string. This + returns true if the regexp matches or false otherwise. + + This also saves any matched substrings in thread-local storage + until either the next call to {!matches} in the current thread + or the thread/program exits. You can call {!sub} to return + these substrings. + + This can raise {!Error} if PCRE returns an error. *) + +val sub : int -> string +(** Return the nth substring (capture) matched by the previous call + to {!matches} in the current thread. + + If [n == 0] it returns the whole matching part of the string. + + If [n >= 1] it returns the nth substring. + + If there was no nth substring then this raises [Not_found]. + This can also raise {!Error} for other PCRE-related errors. *) diff --git a/common/mlpcre/dummy.c b/common/mlpcre/dummy.c new file mode 100644 index 000000000..ebab6198c --- /dev/null +++ b/common/mlpcre/dummy.c @@ -0,0 +1,2 @@ +/* Dummy source, to be used for OCaml-based tools with no C sources. */ +enum { foo = 1 }; diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c new file mode 100644 index 000000000..6fae0e6f4 --- /dev/null +++ b/common/mlpcre/pcre-c.c @@ -0,0 +1,224 @@ +/* Bindings for Perl-compatible Regular Expressions. + * Copyright (C) 2017 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include "cleanups.h" + +#include "glthread/tls.h" + +#pragma GCC diagnostic ignored "-Wmissing-prototypes" + +/* Data on the most recent match is stored in this thread-local + * variable. It is freed either by the next call to PCRE.matches or + * by (clean) thread exit. + */ +static gl_tls_key_t last_match; + +struct last_match { + char *subject; /* subject string */ + int *vec; /* vector containing match offsets */ + int r; /* value returned by pcre_exec */ +}; + +static void +free_last_match (struct last_match *data) +{ + if (data) { + free (data->subject); + free (data->vec); + free (data); + } +} + +static void init (void) __attribute__((constructor)); + +static void +init (void) +{ + gl_tls_key_init (last_match, (void (*) (void *))free_last_match); +} + +/* Raises PCRE.error (msg, errcode). */ +static void +raise_pcre_error (const char *msg, int errcode) +{ + value *exn = caml_named_value ("PCRE.Error"); + value args[2]; + + args[0] = caml_copy_string (msg); + args[1] = Val_int (errcode); + caml_raise_with_args (*exn, 2, args); +} + +/* Wrap and unwrap pcre regular expression handles, with a finalizer. */ +#define Regexp_val(rv) (*(pcre **)Data_custom_val(rv)) + +static void +regexp_finalize (value rev) +{ + pcre *re = Regexp_val (rev); + if (re) pcre_free (re); +} + +static struct custom_operations custom_operations = { + (char *) "pcre_custom_operations", + regexp_finalize, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default +}; + +static value +Val_regexp (pcre *re) +{ + CAMLparam0 (); + CAMLlocal1 (rv); + + rv = caml_alloc_custom (&custom_operations, sizeof (pcre *), 0, 1); + Regexp_val (rv) = re; + + CAMLreturn (rv); +} + +value +guestfs_int_pcre_compile (value pattv) +{ + CAMLparam1 (pattv); + pcre *re; + int errcode = 0; + const char *err; + int offset; + + re = pcre_compile2 (String_val (pattv), 0, &errcode, &err, &offset, NULL); + if (re == NULL) + raise_pcre_error (err, errcode); + + CAMLreturn (Val_regexp (re)); +} + +value +guestfs_int_pcre_matches (value rev, value strv) +{ + CAMLparam2 (rev, strv); + pcre *re = Regexp_val (rev); + struct last_match *m, *oldm; + size_t len = caml_string_length (strv); + int capcount, r; + int veclen; + + /* Calculate maximum number of substrings, and hence the vector + * length required. + */ + r = pcre_fullinfo (re, NULL, PCRE_INFO_CAPTURECOUNT, (int *) &capcount); + /* I believe that errors should never occur because of OCaml + * type safety, so we should abort here. If this ever happens + * we will need to look at it again. + */ + assert (r == 0); + veclen = 3 * (1 + capcount); + + m = calloc (1, sizeof *m); + if (m == NULL) + caml_raise_out_of_memory (); + + /* We will need the original subject string when fetching + * substrings, so take a copy. + */ + m->subject = malloc (len+1); + if (m->subject == NULL) { + free_last_match (m); + caml_raise_out_of_memory (); + } + memcpy (m->subject, String_val (strv), len+1); + + m->vec = malloc (veclen * sizeof (int)); + if (m->vec == NULL) { + free_last_match (m); + caml_raise_out_of_memory (); + } + + m->r = pcre_exec (re, NULL, m->subject, len, 0, 0, m->vec, veclen); + if (m->r < 0 && m->r != PCRE_ERROR_NOMATCH) { + free_last_match (m); + raise_pcre_error ("pcre_exec", m->r); + } + + /* This error would indicate that pcre_exec ran out of space in the + * vector. However if we are calculating the size of the vector + * correctly above, then this should never happen. + */ + assert (m->r != 0); + + r = m->r != PCRE_ERROR_NOMATCH; + + /* Replace the old TLS match data, but only if we're going + * to return a match. + */ + if (r) { + oldm = gl_tls_get (last_match); + free_last_match (oldm); + gl_tls_set (last_match, m); + } + else + free_last_match (m); + + CAMLreturn (r ? Val_true : Val_false); +} + +value +guestfs_int_pcre_sub (value nv) +{ + CAMLparam1 (nv); + CAMLlocal1 (strv); + int len; + CLEANUP_FREE char *str = NULL; + struct last_match *m = gl_tls_get (last_match); + + if (m == NULL) + raise_pcre_error ("PCRE.sub called without calling PCRE.matches", 0); + + len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv), + (const char **) &str); + + if (len == PCRE_ERROR_NOSUBSTRING) + caml_raise_not_found (); + + if (len < 0) + raise_pcre_error ("pcre_get_substring", len); + + strv = caml_alloc_string (len); + memcpy (String_val (strv), str, len); + CAMLreturn (strv); +} diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml new file mode 100644 index 000000000..e5214eab8 --- /dev/null +++ b/common/mlpcre/pcre_tests.ml @@ -0,0 +1,86 @@ +(* Test bindings for Perl-compatible Regular Expressions. + * Copyright (C) 2017 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Printf + +let compile patt = + eprintf "PCRE.compile %s\n%!" patt; + PCRE.compile patt + +let matches re str = + eprintf "PCRE.matches %s ->%!" str; + let r = PCRE.matches re str in + eprintf " %b\n%!" r; + r + +let sub i = + eprintf "PCRE.sub %d ->%!" i; + let r = PCRE.sub i in + eprintf " %s\n%!" r; + r + +let () = + try + let re0 = compile "a+b" in + let re1 = compile "(a+)b" in + let re2 = compile "(a+)(b*)" in + + assert (matches re0 "ccaaabbbb" = true); + assert (sub 0 = "aaab"); + + assert (matches re0 "aaa" = false); + + assert (matches re0 "xyz" = false); + + assert (matches re0 "aaabc" = true); + assert (sub 0 = "aaab"); + + assert (matches re1 "ccaaabb" = true); + assert (sub 1 = "aaa"); + assert (sub 0 = "aaab"); + + assert (matches re2 "ccabbc" = true); + assert (sub 1 = "a"); + assert (sub 2 = "bb"); + assert (sub 0 = "abb"); + + assert (matches re2 "ccac" = true); + assert (sub 1 = "a"); + assert (sub 2 = ""); + assert (sub 0 = "a") + with + | Not_found -> + failwith "one of the PCRE.sub functions unexpectedly raised Not_found" + | PCRE.Error (msg, code) -> + failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg code) + +(* Compile some bad regexps and check that an exception is thrown. + * It would be nice to check the error message is right but + * that involves dealing with language and future changes of + * PCRE error codes. + *) +let () = + List.iter ( + fun patt -> + let msg, code = + try ignore (PCRE.compile patt); assert false + with PCRE.Error (m, c) -> m, c in + eprintf "patt: %s -> exception: %s (%d)\n%!" patt msg code + ) [ "("; ")"; "+"; "*"; "(abc" ] + +let () = Gc.compact () diff --git a/configure.ac b/configure.ac index 3cf9d0a28..651b48b3b 100644 --- a/configure.ac +++ b/configure.ac @@ -196,6 +196,7 @@ AC_CONFIG_FILES([Makefile common/errnostring/Makefile common/edit/Makefile common/miniexpect/Makefile + common/mlpcre/Makefile common/mlprogress/Makefile common/mlstdutils/Makefile common/mlstdutils/guestfs_config.ml diff --git a/docs/guestfs-hacking.pod b/docs/guestfs-hacking.pod index 48bff306c..c71e659c4 100644 --- a/docs/guestfs-hacking.pod +++ b/docs/guestfs-hacking.pod @@ -100,6 +100,12 @@ A copy of the miniexpect library from L. This is used in virt-p2v. +=item F + +Lightweight OCaml bindings for Perl Compatible Regular Expressions +(PCRE). Note this is not related in any way to Markus Mottl's +ocaml-pcre library. + =item F OCaml bindings for the progress bar functions (see F). diff --git a/m4/.gitignore b/m4/.gitignore index 07960ed7b..a84b22e5c 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -248,6 +248,7 @@ /thread.m4 /time_h.m4 /timespec.m4 +/tls.m4 /ttyname_r.m4 /ulonglong.m4 /ungetc.m4