Apache spark and scala, error while executing queries

Question

I am working with a dataset whose sample is as follows: I have executed the following commands successfully: I am getting following error: java.lang.RuntimeException: Error while encoding: java.lang.RuntimeException: java.lang.Character is not a valid external type for schema of string I am getting the same e…

Accepted Answer

If you looking to skip the RDD extra code you can use the below codeInput file csv (; delimited and every record separated by next line)"age";"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month";"duration";"campaign";"pdays";"previous";"poutcome";"y"58;"management";"married";"tertiary";"no";2143;"yes";"no";"unknown";5;"may";261;1;-1;0;"unknown";"no"44;"technician";"single";"secondary";"no";29;"yes";"no";"unknown";5;"may";151;1;-1;0;"unknown";"no"Define the struct schemaRead ; delimited fileRead the csv with header=true and pre defined schema as Dataframe directlyimport org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}object ProcessSemiColonCsv {  def main(args: Array[String]): Unit = {    val spark = Constant.getSparkSess    val schema = StructType( List(StructField("age", IntegerType, true),      StructField("job", StringType, true) ,      StructField("marital", StringType, true),      StructField("education", StringType, true) ,      StructField("default", StringType, true),      StructField("balance", IntegerType, true) ,      StructField("housing", StringType, true) ,      StructField("loan", StringType, true) ,      StructField("contact", StringType, true) ,      StructField("day", IntegerType, true) ,      StructField("month", StringType, true) ,      StructField("duration", IntegerType, true) ,      StructField("campaign", IntegerType, true) ,      StructField("pdays", IntegerType, true) ,      StructField("previous", IntegerType, true) ,      StructField("poutcome", StringType, true) ,      StructField("y", StringType, true)) )    val df = spark.read      .option("delimiter", ";")      .option("header", "true")      .schema(schema)      .csv("src/main/resources/SemiColon.csv")    df.show()    df.printSchema()  }}

Advertisement

Answer