Package org.apache.spark.types.variant
Class VariantUtil
Object
org.apache.spark.types.variant.VariantUtil
This class defines constants related to the variant format and provides functions for
manipulating variant binaries.
A variant is made up of 2 binaries: value and metadata. A variant value consists of a one-byte
header and a number of content bytes (can be zero). The header byte is divided into upper 6 bits
(called "type info") and lower 2 bits (called "basic type"). The content format is explained in
the below constants for all possible basic type and type info values.
The variant metadata includes a version id and a dictionary of distinct strings (case-sensitive).
Its binary format is:
- Version: 1-byte unsigned integer. The only acceptable value is 1 currently.
- Dictionary size: 4-byte little-endian unsigned integer. The number of keys in the
dictionary.
- Offsets: (size + 1) * 4-byte little-endian unsigned integers. `offsets[i]` represents the
starting position of string i, counting starting from the address of `offsets[0]`. Strings
must be stored contiguously, so we don’t need to store the string size, instead, we compute it
with `offset[i + 1] - offset[i]`.
- UTF-8 string data.
-
Nested Class Summary
Nested ClassesModifier and TypeClassDescriptionstatic interfacestatic interfacestatic enum -
Field Summary
FieldsModifier and TypeFieldDescriptionstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final bytestatic final byte -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionstatic bytearrayHeader(boolean largeSize, int offsetSize) static byte[]getBinary(byte[] value, int pos) static booleangetBoolean(byte[] value, int pos) static BigDecimalgetDecimal(byte[] value, int pos) static doublegetDouble(byte[] value, int pos) static floatgetFloat(byte[] value, int pos) static longgetLong(byte[] value, int pos) static StringgetMetadataKey(byte[] metadata, int id) static StringgetString(byte[] value, int pos) static VariantUtil.TypegetType(byte[] value, int pos) static <T> ThandleArray(byte[] value, int pos, VariantUtil.ArrayHandler<T> handler) static <T> ThandleObject(byte[] value, int pos, VariantUtil.ObjectHandler<T> handler) static byteobjectHeader(boolean largeSize, int idSize, int offsetSize) static byteprimitiveHeader(int type) static byteshortStrHeader(int size) static intvalueSize(byte[] value, int pos) static voidwriteLong(byte[] bytes, int pos, long value, int numBytes)
-
Field Details
-
BASIC_TYPE_BITS
public static final int BASIC_TYPE_BITS- See Also:
-
BASIC_TYPE_MASK
public static final int BASIC_TYPE_MASK- See Also:
-
TYPE_INFO_MASK
public static final int TYPE_INFO_MASK- See Also:
-
MAX_SHORT_STR_SIZE
public static final int MAX_SHORT_STR_SIZE- See Also:
-
PRIMITIVE
public static final int PRIMITIVE- See Also:
-
SHORT_STR
public static final int SHORT_STR- See Also:
-
OBJECT
public static final int OBJECT- See Also:
-
ARRAY
public static final int ARRAY- See Also:
-
NULL
public static final int NULL- See Also:
-
TRUE
public static final int TRUE- See Also:
-
FALSE
public static final int FALSE- See Also:
-
INT1
public static final int INT1- See Also:
-
INT2
public static final int INT2- See Also:
-
INT4
public static final int INT4- See Also:
-
INT8
public static final int INT8- See Also:
-
DOUBLE
public static final int DOUBLE- See Also:
-
DECIMAL4
public static final int DECIMAL4- See Also:
-
DECIMAL8
public static final int DECIMAL8- See Also:
-
DECIMAL16
public static final int DECIMAL16- See Also:
-
DATE
public static final int DATE- See Also:
-
TIMESTAMP
public static final int TIMESTAMP- See Also:
-
TIMESTAMP_NTZ
public static final int TIMESTAMP_NTZ- See Also:
-
FLOAT
public static final int FLOAT- See Also:
-
BINARY
public static final int BINARY- See Also:
-
LONG_STR
public static final int LONG_STR- See Also:
-
VERSION
public static final byte VERSION- See Also:
-
VERSION_MASK
public static final byte VERSION_MASK- See Also:
-
U8_MAX
public static final int U8_MAX- See Also:
-
U16_MAX
public static final int U16_MAX- See Also:
-
U24_MAX
public static final int U24_MAX- See Also:
-
U24_SIZE
public static final int U24_SIZE- See Also:
-
U32_SIZE
public static final int U32_SIZE- See Also:
-
SIZE_LIMIT
public static final int SIZE_LIMIT- See Also:
-
MAX_DECIMAL4_PRECISION
public static final int MAX_DECIMAL4_PRECISION- See Also:
-
MAX_DECIMAL8_PRECISION
public static final int MAX_DECIMAL8_PRECISION- See Also:
-
MAX_DECIMAL16_PRECISION
public static final int MAX_DECIMAL16_PRECISION- See Also:
-
-
Constructor Details
-
VariantUtil
public VariantUtil()
-
-
Method Details
-
writeLong
public static void writeLong(byte[] bytes, int pos, long value, int numBytes) -
primitiveHeader
public static byte primitiveHeader(int type) -
shortStrHeader
public static byte shortStrHeader(int size) -
objectHeader
public static byte objectHeader(boolean largeSize, int idSize, int offsetSize) -
arrayHeader
public static byte arrayHeader(boolean largeSize, int offsetSize) -
getType
-
valueSize
public static int valueSize(byte[] value, int pos) -
getBoolean
public static boolean getBoolean(byte[] value, int pos) -
getLong
public static long getLong(byte[] value, int pos) -
getDouble
public static double getDouble(byte[] value, int pos) -
getDecimal
-
getFloat
public static float getFloat(byte[] value, int pos) -
getBinary
public static byte[] getBinary(byte[] value, int pos) -
getString
-
handleObject
-
handleArray
-
getMetadataKey
-