common/mlstdutils: Add String.nsplit ?max parameter, and tests.

This idea was previously proposed by Tomáš Golembiovský in
https://www.redhat.com/archives/libguestfs/2017-January/msg00138.html
This commit is contained in:
Richard W.M. Jones
2017-09-21 20:47:38 +01:00
parent 430029db0a
commit 8bd5933cc7
3 changed files with 53 additions and 15 deletions

View File

@@ -147,18 +147,7 @@ module String = struct
done;
if not !r then s else Bytes.to_string b2
let rec nsplit sep str =
let len = length str in
let seplen = length sep in
let i = find str sep in
if i = -1 then [str]
else (
let s' = sub str 0 i in
let s'' = sub str (i+seplen) (len-i-seplen) in
s' :: nsplit sep s''
)
let split sep str =
let rec split sep str =
let len = length sep in
let seplen = length str in
let i = find str sep in
@@ -167,6 +156,21 @@ module String = struct
sub str 0 i, sub str (i + len) (seplen - i - len)
)
and nsplit ?(max = 0) sep str =
if max < 0 then
invalid_arg "String.nsplit: max parameter should not be negative";
(* If we reached the limit, OR if the pattern does not match the string
* at all, return the rest of the string as a single element list.
*)
if max = 1 || find str sep = -1 then
[str]
else (
let s1, s2 = split sep str in
let max = if max = 0 then 0 else max - 1 in
s1 :: nsplit ~max sep s2
)
let rec lines_split str =
let buf = Buffer.create 16 in
let len = length str in

View File

@@ -88,14 +88,19 @@ module String : sig
[str] with [s2]. *)
val replace_char : string -> char -> char -> string
(** Replace character in string. *)
val nsplit : string -> string -> string list
(** [nsplit sep str] splits [str] into multiple strings at each
separator [sep]. *)
val split : string -> string -> string * string
(** [split sep str] splits [str] at the first occurrence of the
separator [sep], returning the part before and the part after.
If separator is not found, return the whole string and an
empty string. *)
val nsplit : ?max:int -> string -> string -> string list
(** [nsplit ?max sep str] splits [str] into multiple strings at each
separator [sep].
As with the Perl split function, you can give an optional
[?max] parameter to limit the number of strings returned. The
final element of the list will contain the remainder of the
input string. *)
val lines_split : string -> string list
(** [lines_split str] splits [str] into lines, keeping continuation
characters (i.e. [\] at the end of lines) into account. *)

View File

@@ -18,6 +18,8 @@
(* This file tests the Std_utils module. *)
open Printf
open OUnit2
open Std_utils
@@ -26,6 +28,7 @@ let assert_equal_string = assert_equal ~printer:(fun x -> x)
let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x)
let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x)
let assert_equal_stringlist = assert_equal ~printer:(fun x -> "(" ^ (String.escaped (String.concat "," x)) ^ ")")
let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf "%S, %S" x y)
let test_subdirectory ctx =
assert_equal_string "" (subdirectory "/foo" "/foo");
@@ -83,6 +86,30 @@ let test_string_find ctx =
assert_equal_int (-1) (String.find "" "baz");
assert_equal_int (-1) (String.find "foobar" "baz")
(* Test Std_utils.String.split. *)
let test_string_split ctx =
assert_equal_stringpair ("a", "b") (String.split " " "a b");
assert_equal_stringpair ("", "ab") (String.split " " " ab");
assert_equal_stringpair ("", "abc") (String.split "" "abc");
assert_equal_stringpair ("abc", "") (String.split " " "abc");
assert_equal_stringpair ("", "") (String.split " " "")
(* Test Std_utils.String.nsplit. *)
let test_string_nsplit ctx =
(* XXX Not clear if the next test case indicates an error in
* String.nsplit. However this is how it has historically worked.
*)
assert_equal_stringlist [""] (String.nsplit " " "");
assert_equal_stringlist ["abc"] (String.nsplit " " "abc");
assert_equal_stringlist ["a"; "b"; "c"] (String.nsplit " " "a b c");
assert_equal_stringlist ["a"; "b"; "c"; ""] (String.nsplit " " "a b c ");
assert_equal_stringlist [""; "a"; "b"; "c"] (String.nsplit " " " a b c");
assert_equal_stringlist [""; "a"; "b"; "c"; ""] (String.nsplit " " " a b c ");
assert_equal_stringlist ["a b c d"] (String.nsplit ~max:1 " " "a b c d");
assert_equal_stringlist ["a"; "b c d"] (String.nsplit ~max:2 " " "a b c d");
assert_equal_stringlist ["a"; "b"; "c d"] (String.nsplit ~max:3 " " "a b c d");
assert_equal_stringlist ["a"; "b"; "c"; "d"] (String.nsplit ~max:10 " " "a b c d")
(* Test Std_utils.String.lines_split. *)
let test_string_lines_split ctx =
assert_equal_stringlist [""] (String.lines_split "");
@@ -129,6 +156,8 @@ let suite =
"strings.is_prefix" >:: test_string_is_prefix;
"strings.is_suffix" >:: test_string_is_suffix;
"strings.find" >:: test_string_find;
"strings.split" >:: test_string_split;
"strings.nsplit" >:: test_string_nsplit;
"strings.lines_split" >:: test_string_lines_split;
"strings.span" >:: test_string_span;
"strings.chomp" >:: test_string_chomp;