I am using Spark 2.1 with Scala.
How to convert a string column with milliseconds to a timestamp with milliseconds?
I tried the following code from the quest
UDF with SimpleDateFormat works. The idea is taken from the Ram Ghadiyaram's link to an UDF logic.
import java.text.SimpleDateFormat
import java.sql.Timestamp
import org.apache.spark.sql.functions.udf
import scala.util.{Try, Success, Failure}
val getTimestamp: (String => Option[Timestamp]) = s => s match {
case "" => None
case _ => {
val format = new SimpleDateFormat("MM/dd/yyyy' 'HH:mm:ss.SSS")
Try(new Timestamp(format.parse(s).getTime)) match {
case Success(t) => Some(t)
case Failure(_) => None
}
}
}
val getTimestampUDF = udf(getTimestamp)
val tdf = Seq((1L, "05/26/2016 01:01:01.601"), (2L, "#$@#@#")).toDF("id", "dts")
val tts = getTimestampUDF($"dts")
tdf.withColumn("ts", tts).show(2, false)
with output:
+---+-----------------------+-----------------------+
|id |dts |ts |
+---+-----------------------+-----------------------+
|1 |05/26/2016 01:01:01.601|2016-05-26 01:01:01.601|
|2 |#$@#@# |null |
+---+-----------------------+-----------------------+