I want to perform several ordered and successive replaceAll(...,...) on a string in a functional way in scala.
What\'s the most elegant solution ? Scalaz welcome ! ;
#to replace or remove multiple substrings in scala in dataframe's string column
import play.api.libs.json._
#to find
def isContainingContent(str:String,regexStr:String):Boolean={
val regex=new scala.util.matching.Regex(regexStr)
val containingRemovables= regex.findFirstIn(str)
containingRemovables match{
case Some(s) => true
case None => false
}
}
val colContentPresent= udf((str: String,regex:String) => {
isContainingContent(str,regex)
})
#to remove
val cleanPayloadOfRemovableContent= udf((str: String,regexStr:String) => {
val regex=new scala.util.matching.Regex(regexStr)
val cleanedStr= regex.replaceAllIn(str,"")
cleanedStr
})
#to define
val removableContentRegex=
"[\\s\\S]*? |\\\\n|<\\?xml[\\s\\S]*?\\?>"
#to call
val dfPayloadLogPresent = dfXMLCheck.withColumn("logsPresentInit", colContentPresent($"payload",lit(removableContentRegex)))
val dfCleanedXML = dfPayloadLogPresent.withColumn("payload", cleanPayloadOfRemovableContent($"payload",lit(removableContentRegex)))