It is likely an implementation detail, but for the Oracle and IBM JDKs at least is the compiled pattern cached or do we as application developers need to perform the caching
I've created a class CachedPattern that can cache Pattern objects. If you run the main method you'll see that Java's Pattern objects are in fact different instances, which also consumes memory.
import java.util.HashMap;
import java.util.regex.Pattern;
import org.eclipse.core.runtime.Assert;
public class CachedPattern {
public static void main(String[] args){
Pattern p1 = Pattern.compile("abc");
Pattern p2 = Pattern.compile("abc");
Pattern p3 = Pattern.compile("abc");
Pattern p4 = Pattern.compile("abc");
Pattern p5 = Pattern.compile("abc");
Pattern x1 = CachedPattern.compile("abc");
Pattern x2 = CachedPattern.compile("abc");
Pattern x3 = CachedPattern.compile("abc");
Pattern x4 = CachedPattern.compile("abc");
Pattern x5 = CachedPattern.compile("abc");
// are cached objects the same ? YES!
Assert.isTrue(x1.equals(x2));
Assert.isTrue(x1.equals(x3));
Assert.isTrue(x1.equals(x4));
Assert.isTrue(x1.equals(x5));
// are non-cached objects the same ? NO!
Assert.isTrue(p1.equals(p2)); //AssertionFailedException
}
private static HashMap cached = new HashMap<>();
/**
* This value must be unique, to make sure user won't use this inside "regex" variable,
* so that objects without flags would be returned
* For example if UNIQUE_HASH would be empty:
* compile(pattern = "abc1")
* VS.
* compile(pattern = "abc", flag = 1)
* This would give same keys "abc1" and "abc1"
*/
private static final String UNIQUE_HASH = "(())[]+@#$%^@!@#$%*";
public static Pattern compile(String regex){
if(cached.containsKey(regex)){
return cached.get(regex);
}
Pattern p = Pattern.compile(regex);
cached.put(regex, p);
return p;
}
public static Pattern compile(String regex, int flags){
String uniqueKey = regex + UNIQUE_HASH + flags;
if(cached.containsKey(uniqueKey)){
return cached.get(uniqueKey);
}
Pattern p = Pattern.compile(regex);
cached.put(uniqueKey, p);
return p;
}
}