public class OrcFileFormat extends Object implements org.apache.spark.sql.execution.datasources.FileFormat, DataSourceRegister, scala.Serializable
FileFormat for reading ORC files. If this is moved or renamed, please update
DataSource's backwardCompatibilityMap.| Constructor and Description |
|---|
OrcFileFormat() |
| Modifier and Type | Method and Description |
|---|---|
scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> |
buildReader(SparkSession sparkSession,
StructType dataSchema,
StructType partitionSchema,
StructType requiredSchema,
scala.collection.Seq<Filter> filters,
scala.collection.immutable.Map<String,String> options,
org.apache.hadoop.conf.Configuration hadoopConf) |
scala.Option<StructType> |
inferSchema(SparkSession sparkSession,
scala.collection.immutable.Map<String,String> options,
scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files) |
boolean |
isSplitable(SparkSession sparkSession,
scala.collection.immutable.Map<String,String> options,
org.apache.hadoop.fs.Path path) |
org.apache.spark.sql.execution.datasources.OutputWriterFactory |
prepareWrite(SparkSession sparkSession,
org.apache.hadoop.mapreduce.Job job,
scala.collection.immutable.Map<String,String> options,
StructType dataSchema) |
String |
shortName()
The string that represents the format that this data source provider uses.
|
String |
toString() |
public String shortName()
DataSourceRegister
override def shortName(): String = "parquet"
shortName in interface DataSourceRegisterpublic String toString()
toString in class Objectpublic scala.Option<StructType> inferSchema(SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)
inferSchema in interface org.apache.spark.sql.execution.datasources.FileFormatpublic org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite(SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, StructType dataSchema)
prepareWrite in interface org.apache.spark.sql.execution.datasources.FileFormatpublic boolean isSplitable(SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.fs.Path path)
isSplitable in interface org.apache.spark.sql.execution.datasources.FileFormatpublic scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader(SparkSession sparkSession, StructType dataSchema, StructType partitionSchema, StructType requiredSchema, scala.collection.Seq<Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)
buildReader in interface org.apache.spark.sql.execution.datasources.FileFormat