mirror of
https://github.com/libguestfs/libguestfs.git
synced 2026-03-22 07:03:38 +00:00
common/mlpcre: Add split and nsplit functions.
These work like our String.split and String.nsplit functions.
This commit is contained in:
@@ -52,5 +52,38 @@ let rec replace ?(global = false) patt subst subj =
|
||||
xs ^ subst ^ zs
|
||||
)
|
||||
|
||||
let rec split patt subj =
|
||||
if not (matches patt subj) then
|
||||
subj, ""
|
||||
else (
|
||||
(* If patt matches "yyyy" in the original string then we have
|
||||
* the following situation, where "xxxx" is the part of the
|
||||
* original string before the match, and "zzzz..." is the
|
||||
* part after the match:
|
||||
* "xxxxyyyyzzzzzzzzzzzzz"
|
||||
* ^ ^
|
||||
* i1 i2
|
||||
*)
|
||||
let i1, i2 = subi 0 in
|
||||
let xs = String.sub subj 0 i1 (* "xxxx", part before the match *) in
|
||||
let zs = String.sub subj i2 (String.length subj - i2) (* after *) in
|
||||
xs, zs
|
||||
)
|
||||
|
||||
and nsplit ?(max = 0) patt subj =
|
||||
if max < 0 then
|
||||
invalid_arg "PCRE.nsplit: max parameter should not be negative";
|
||||
|
||||
(* If we reached the limit, OR if the pattern does not match the string
|
||||
* at all, return the rest of the string as a single element list.
|
||||
*)
|
||||
if max = 1 || not (matches patt subj) then
|
||||
[subj]
|
||||
else (
|
||||
let s1, s2 = split patt subj in
|
||||
let max = if max = 0 then 0 else max - 1 in
|
||||
s1 :: nsplit ~max patt s2
|
||||
)
|
||||
|
||||
let () =
|
||||
Callback.register_exception "PCRE.Error" (Error ("", 0))
|
||||
|
||||
@@ -110,3 +110,22 @@ val replace : ?global:bool -> regexp -> string -> string -> string
|
||||
|
||||
Note that this function does not allow backreferences.
|
||||
Any captures in [patt] are ignored. *)
|
||||
|
||||
val split : regexp -> string -> string * string
|
||||
val nsplit : ?max:int -> regexp -> string -> string list
|
||||
(** [split patt subj] splits the string at the first occurrence
|
||||
of the regular expression [patt], returning the parts of the
|
||||
string before and after the match (the matching part is not
|
||||
returned). If the pattern does not match then the whole
|
||||
input is returned in the first string, and the second string
|
||||
is empty.
|
||||
|
||||
[nsplit patt subj] is the same but the string is split
|
||||
on every occurrence of [patt]. Note that if the pattern
|
||||
matches at the beginning or end of the string, then an
|
||||
empty string element will be returned at the beginning or
|
||||
end of the list.
|
||||
|
||||
[nsplit] has an optional [?max] parameter which controls
|
||||
the maximum length of the returned list. The final element
|
||||
contains the remainder of the string. *)
|
||||
|
||||
@@ -42,6 +42,20 @@ let replace ?(global = false) patt subst subj =
|
||||
eprintf " %s\n%!" r;
|
||||
r
|
||||
|
||||
let split patt subj =
|
||||
eprintf "PCRE.split <patt> %s ->%!" subj;
|
||||
let s1, s2 = PCRE.split patt subj in
|
||||
eprintf " (%s, %s)\n%!" s1 s2;
|
||||
(s1, s2)
|
||||
|
||||
let nsplit ?(max = 0) patt subj =
|
||||
eprintf "PCRE.nsplit%s <patt> %s ->%!"
|
||||
(if max = 0 then "" else sprintf " ~max:%d" max)
|
||||
subj;
|
||||
let ss = PCRE.nsplit ~max patt subj in
|
||||
eprintf " [%s]\n%!" (String.concat "; " ss);
|
||||
ss
|
||||
|
||||
let sub i =
|
||||
eprintf "PCRE.sub %d ->%!" i;
|
||||
let r = PCRE.sub i in
|
||||
@@ -60,6 +74,7 @@ let () =
|
||||
let re1 = compile "(a+)b" in
|
||||
let re2 = compile "(a+)(b*)" in
|
||||
let re3 = compile ~caseless:true "[^a-z0-9_]" in
|
||||
let ws = compile "\\s+" in
|
||||
|
||||
assert (matches re0 "ccaaabbbb" = true);
|
||||
assert (sub 0 = "aaab");
|
||||
@@ -101,6 +116,20 @@ let () =
|
||||
assert (replace ~global:true re3 "-" "this is a\xc2\xa3FUNNY.name?"
|
||||
(* = "this-is-a-FUNNY-name-" if UTF-8 worked *)
|
||||
= "this-is-a--FUNNY-name-");
|
||||
|
||||
(* This also tests PCRE.split since that is used by nsplit. *)
|
||||
assert (nsplit ~max:1 ws "a b c" = [ "a b c" ]);
|
||||
assert (nsplit ~max:2 ws "a b c" = [ "a"; "b c" ]);
|
||||
assert (nsplit ~max:3 ws "a b c" = [ "a"; "b"; "c" ]);
|
||||
assert (nsplit ~max:10 ws "a b c" = [ "a"; "b"; "c" ]);
|
||||
assert (nsplit ws "the cat sat on \t\t the mat." =
|
||||
[ "the"; "cat"; "sat"; "on"; "the"; "mat." ]);
|
||||
assert (nsplit ~max:5 ws "the cat sat on \t\t the mat." =
|
||||
[ "the"; "cat"; "sat"; "on"; "the mat." ]);
|
||||
assert (nsplit ws " the " = [ ""; "the"; "" ]);
|
||||
assert (nsplit ws "the " = [ "the"; "" ]);
|
||||
assert (nsplit ws " the" = [ ""; "the" ]);
|
||||
assert (nsplit ws " \t the" = [ ""; "the" ]);
|
||||
with
|
||||
| Not_found ->
|
||||
failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
|
||||
|
||||
Reference in New Issue
Block a user