How do you determine whether a given pattern is \"good\", specifically whether it is exhaustive and non-overlapping, for ML-style programming languages?
Suppose you
Here is some code from a non-expert. It shows what the problem looks like if you restrict your patterns to list constructors. In other words, the patterns can only be used with lists that contain lists. Here are some lists like that: []
, [[]]
, [[];[]]
.
If you enable -rectypes
in your OCaml interpreter, this set of lists has a single type: ('a list) as 'a.
type reclist = ('a list) as 'a
Here's a type for representing patterns that match against the reclist
type:
type p = Nil | Any | Cons of p * p
To translate an OCaml pattern into this form, first rewrite using (::). Then replace []
with Nil, _ with Any, and (::) with Cons. So the pattern [] :: _
translates to
Cons (Nil, Any)
Here is a function that matches a pattern against a reclist:
let rec pmatch (p: p) (l: reclist) =
match p, l with
| Any, _ -> true
| Nil, [] -> true
| Cons (p', q'), h :: t -> pmatch p' h && pmatch q' t
| _ -> false
Here's how it looks in use. Note the use of -rectypes
:
$ ocaml312 -rectypes
Objective Caml version 3.12.0
# #use "pat.ml";;
type p = Nil | Any | Cons of p * p
type reclist = 'a list as 'a
val pmatch : p -> reclist -> bool =
# pmatch (Cons(Any, Nil)) [];;
- : bool = false
# pmatch (Cons(Any, Nil)) [[]];;
- : bool = true
# pmatch (Cons(Any, Nil)) [[]; []];;
- : bool = false
# pmatch (Cons (Any, Nil)) [ [[]; []] ];;
- : bool = true
#
The pattern Cons (Any, Nil)
should match any list of length 1, and it definitely seems to be working.
So then it seems fairly straightforward to write a function intersect
that takes two patterns and returns a pattern that matches the intersection of what is matched by the two patterns. Since the patterns might not intersect at all, it returns None
when there's no intersection and Some p
otherwise.
let rec inter_exc pa pb =
match pa, pb with
| Nil, Nil -> Nil
| Cons (a, b), Cons (c, d) -> Cons (inter_exc a c, inter_exc b d)
| Any, b -> b
| a, Any -> a
| _ -> raise Not_found
let intersect pa pb =
try Some (inter_exc pa pb) with Not_found -> None
let intersectn ps =
(* Intersect a list of patterns.
*)
match ps with
| [] -> None
| head :: tail ->
List.fold_left
(fun a b -> match a with None -> None | Some x -> intersect x b)
(Some head) tail
As a simple test, intersect the pattern [_, []]
against the pattern [[], _]
.
The former is the same as _ :: [] :: []
, and so is Cons (Any, Cons (Nil, Nil))
.
The latter is the same as [] :: _ :: []
, and so is Cons (Nil, (Cons (Any, Nil))
.
# intersect (Cons (Any, Cons (Nil, Nil))) (Cons (Nil, Cons (Any, Nil)));;
- : p option = Some (Cons (Nil, Cons (Nil, Nil)))
The result looks pretty right: [[], []]
.
It seems like this is enough to answer the question about overlapping patterns. Two patterns overlap if their intersection is not None
.
For exhaustiveness you need to work with a list of patterns. Here is a function
exhaust
that tests whether a given list of patterns is exhaustive:
let twoparts l =
(* All ways of partitioning l into two sets.
*)
List.fold_left
(fun accum x ->
let absent = List.map (fun (a, b) -> (a, x :: b)) accum
in
List.fold_left (fun accum (a, b) -> (x :: a, b) :: accum)
absent accum)
[([], [])] l
let unique l =
(* Eliminate duplicates from the list. Makes things
* faster.
*)
let rec u sl=
match sl with
| [] -> []
| [_] -> sl
| h1 :: ((h2 :: _) as tail) ->
if h1 = h2 then u tail else h1 :: u tail
in
u (List.sort compare l)
let mkpairs ps =
List.fold_right
(fun p a -> match p with Cons (x, y) -> (x, y) :: a | _ -> a) ps []
let rec submatches pairs =
(* For each matchable subset of fsts, return a list of the
* associated snds. A matchable subset has a non-empty
* intersection, and the intersection is not covered by the rest of
* the patterns. I.e., there is at least one thing that matches the
* intersection without matching any of the other patterns.
*)
let noncovint (prs, rest) =
let prs_firsts = List.map fst prs in
let rest_firsts = unique (List.map fst rest) in
match intersectn prs_firsts with
| None -> false
| Some i -> not (cover i rest_firsts)
in let pairparts = List.filter noncovint (twoparts pairs)
in
unique (List.map (fun (a, b) -> List.map snd a) pairparts)
and cover_pairs basepr pairs =
cover (fst basepr) (unique (List.map fst pairs)) &&
List.for_all (cover (snd basepr)) (submatches pairs)
and cover_cons basepr ps =
let pairs = mkpairs ps
in let revpair (a, b) = (b, a)
in
pairs <> [] &&
cover_pairs basepr pairs &&
cover_pairs (revpair basepr) (List.map revpair pairs)
and cover basep ps =
List.mem Any ps ||
match basep with
| Nil -> List.mem Nil ps
| Any -> List.mem Nil ps && cover_cons (Any, Any) ps
| Cons (a, b) -> cover_cons (a, b) ps
let exhaust ps =
cover Any ps
A pattern is like a tree with Cons
in the internal nodes and Nil
or Any
at the leaves. The basic idea is that a set of patterns is exhaustive if you always reach Any
in at least one of the patterns (no matter what the input looks like). And along the way, you need to see both Nil and Cons at each point. If you reach Nil
at the same spot in all the patterns, it means there's a longer input that won't be matched by any of them. On the other hand, if you see just Cons
at the same spot in all the patterns, there's an input that ends at that point that won't be matched.
The difficult part is checking for exhaustiveness of the two subpatterns of a Cons. This code works the way I do when I check by hand: it finds all the different subsets that could match at the left, then makes sure that the corresponding right subpatterns are exhaustive in each case. Then the same with left and right reversed. Since I'm a nonexpert (more obvious to me all the time), there are probably better ways to do this.
Here is a session with this function:
# exhaust [Nil];;
- : bool = false
# exhaust [Any];;
- : bool = true
# exhaust [Nil; Cons (Nil, Any); Cons (Any, Nil)];;
- : bool = false
# exhaust [Nil; Cons (Any, Any)];;
- : bool = true
# exhaust [Nil; Cons (Any, Nil); Cons (Any, (Cons (Any, Any)))];;
- : bool = true
I checked this code against 30,000 randomly generated patterns, and so I have some confidence that it's right. I hope these humble observations may prove to be of some use.