Here is my scala version in both parallel and no-parallel, just for fun:
(In my dual core compute, the parallel version takes 335ms while the no-parallel version takes 655ms)
object SexyPrimes {
  def isPrime(n: Int): Boolean = 
    (2 to math.sqrt(n).toInt).forall{ n%_ != 0 }
  def isSexyPrime(n: Int): Boolean = isPrime(n) && isPrime(n-6)
  def findPrimesPar(n: Int) {
    for(k <- (11 to n).par)
      if(isSexyPrime(k)) printf("%d %d\n",k-6,k)
  }
  def findPrimes(n: Int) {
    for(k <- 11 to n)
      if(isSexyPrime(k)) printf("%d %d\n",k-6,k)
  }
  def timeOf(call : =>Unit) {
    val start = System.currentTimeMillis
    call
    val end = System.currentTimeMillis
    println((end-start)+" mils")
  }
  def main(args: Array[String]) {
    timeOf(findPrimes(100*1000))
    println("------------------------")
    timeOf(findPrimesPar(100*1000))
  }
}
EDIT: According to Emil H's suggestion, I have changed my code to avoid the effects of IO and jvm warmup:
The result shows in my compute:
  List(3432, 1934, 3261, 1716, 3229, 1654, 3214, 1700)
object SexyPrimes {
  def isPrime(n: Int): Boolean = 
    (2 to math.sqrt(n).toInt).forall{ n%_ != 0 }
  def isSexyPrime(n: Int): Boolean = isPrime(n) && isPrime(n-6)
  def findPrimesPar(n: Int) {
    for(k <- (11 to n).par)
      if(isSexyPrime(k)) ()//printf("%d %d\n",k-6,k)
  }
  def findPrimes(n: Int) {
    for(k <- 11 to n)
      if(isSexyPrime(k)) ()//printf("%d %d\n",k-6,k)
  }
  def timeOf(call : =>Unit): Long = {
    val start = System.currentTimeMillis
    call
    val end = System.currentTimeMillis
    end - start 
  }
  def main(args: Array[String]) {
    val xs = timeOf(findPrimes(1000*1000))::timeOf(findPrimesPar(1000*1000))::
             timeOf(findPrimes(1000*1000))::timeOf(findPrimesPar(1000*1000))::
             timeOf(findPrimes(1000*1000))::timeOf(findPrimesPar(1000*1000))::
             timeOf(findPrimes(1000*1000))::timeOf(findPrimesPar(1000*1000))::Nil
    println(xs)
  }
}