有一个解决方案,它将帮助您将Yaml转换为json,然后将其读取为DataFrame
您需要添加以下2个依赖项:
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
class ScalaYamltoDataFrame {
val yamlExample = "- firstName: \"James\"\n lastName: \"Bond\"\n age: 30\n\n- firstName: \"Super\"\n lastName: \"Man\"\n age: 25"
def convertYamlToJson(yaml: String): String = {
val yamlReader = new ObjectMapper(new YAMLFactory)
val obj = yamlReader.readValue(yaml,classOf[Any])
val jsonWriter = new ObjectMapper
jsonWriter.writeValueAsString(obj)
}
println(convertYamlToJson(yamlExample))
def yamlToDF(): Unit = {
@transient
lazy val sparkSession = SparkSession.builder
.master("local")
.appName("Convert Yaml to Dataframe")
.getOrCreate()
import sparkSession.implicits._
val ds = sparkSession.read
.option("multiline",true)
.json(Seq(convertYamlToJson(yamlExample)).toDS)
ds.show(false)
ds.printSchema()
}
//println(convertYamlToJson(yamlExample))
[{"firstName":"James","lastName":"Bond","age":30},{"firstName":"Super","lastName":"Man","age":25}]
//ds.show(false)
+---+---------+--------+
|age|firstName|lastName|
+---+---------+--------+
|30 |James |Bond |
|25 |Super |Man |
+---+---------+--------+
//ds.printSchma()
root
|-- age: long (nullable = true)
|-- firstName: string (nullable = true)
|-- lastName: string (nullable = true)
希望这会有所帮助!
本文链接:https://www.f2er.com/3122376.html