public class NGram extends UnaryTransformer<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>,NGram>
When the input is empty, an empty array is returned. When the input array length is less than n (number of elements per n-gram), no n-grams are returned.
| Modifier and Type | Method and Description |
|---|---|
protected static <T> T |
$(Param<T> param) |
static Params |
clear(Param<?> param) |
static T |
copy(ParamMap extra) |
protected static <T extends Params> |
copyValues(T to,
ParamMap extra) |
protected static <T extends Params> |
copyValues$default$2() |
protected scala.Function1<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>> |
createTransformFunc()
Creates the transform function using the given param map.
|
protected static <T extends Params> |
defaultCopy(ParamMap extra) |
static java.lang.String |
explainParam(Param<?> param) |
static java.lang.String |
explainParams() |
static ParamMap |
extractParamMap() |
static ParamMap |
extractParamMap(ParamMap extra) |
static <T> scala.Option<T> |
get(Param<T> param) |
static <T> scala.Option<T> |
getDefault(Param<T> param) |
static java.lang.String |
getInputCol() |
int |
getN() |
static <T> T |
getOrDefault(Param<T> param) |
static java.lang.String |
getOutputCol() |
static Param<java.lang.Object> |
getParam(java.lang.String paramName) |
static <T> boolean |
hasDefault(Param<T> param) |
static boolean |
hasParam(java.lang.String paramName) |
protected static void |
initializeLogIfNecessary(boolean isInterpreter) |
static Param<java.lang.String> |
inputCol() |
static boolean |
isDefined(Param<?> param) |
static boolean |
isSet(Param<?> param) |
protected static boolean |
isTraceEnabled() |
static NGram |
load(java.lang.String path) |
protected static org.slf4j.Logger |
log() |
protected static void |
logDebug(scala.Function0<java.lang.String> msg) |
protected static void |
logDebug(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable) |
protected static void |
logError(scala.Function0<java.lang.String> msg) |
protected static void |
logError(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable) |
protected static void |
logInfo(scala.Function0<java.lang.String> msg) |
protected static void |
logInfo(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable) |
protected static java.lang.String |
logName() |
protected static void |
logTrace(scala.Function0<java.lang.String> msg) |
protected static void |
logTrace(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable) |
protected static void |
logWarning(scala.Function0<java.lang.String> msg) |
protected static void |
logWarning(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable) |
IntParam |
n()
Minimum n-gram length, >= 1.
|
static Param<java.lang.String> |
outputCol() |
protected DataType |
outputDataType()
Returns the data type of the output column.
|
static Param<?>[] |
params() |
static void |
save(java.lang.String path) |
static <T> Params |
set(Param<T> param,
T value) |
protected static Params |
set(ParamPair<?> paramPair) |
protected static Params |
set(java.lang.String param,
java.lang.Object value) |
protected static <T> Params |
setDefault(Param<T> param,
T value) |
protected static Params |
setDefault(scala.collection.Seq<ParamPair<?>> paramPairs) |
static T |
setInputCol(java.lang.String value) |
NGram |
setN(int value) |
static T |
setOutputCol(java.lang.String value) |
static java.lang.String |
toString() |
static Dataset<Row> |
transform(Dataset<?> dataset) |
static Dataset<Row> |
transform(Dataset<?> dataset,
ParamMap paramMap) |
static Dataset<Row> |
transform(Dataset<?> dataset,
ParamPair<?> firstParamPair,
ParamPair<?>... otherParamPairs) |
static Dataset<Row> |
transform(Dataset<?> dataset,
ParamPair<?> firstParamPair,
scala.collection.Seq<ParamPair<?>> otherParamPairs) |
static StructType |
transformSchema(StructType schema) |
protected static StructType |
transformSchema(StructType schema,
boolean logging) |
java.lang.String |
uid()
An immutable unique ID for the object and its derivatives.
|
protected void |
validateInputType(DataType inputType)
Validates the input type.
|
static void |
validateParams() |
static MLWriter |
write() |
copy, setInputCol, setOutputCol, transform, transformSchematransform, transform, transformtransformSchemaclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitclear, copyValues, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, paramMap, params, set, set, set, setDefault, setDefault, shouldOwn, validateParamstoStringpublic static NGram load(java.lang.String path)
public static java.lang.String toString()
public static Param<?>[] params()
public static void validateParams()
public static java.lang.String explainParam(Param<?> param)
public static java.lang.String explainParams()
public static final boolean isSet(Param<?> param)
public static final boolean isDefined(Param<?> param)
public static boolean hasParam(java.lang.String paramName)
public static Param<java.lang.Object> getParam(java.lang.String paramName)
protected static final Params set(java.lang.String param, java.lang.Object value)
public static final <T> scala.Option<T> get(Param<T> param)
public static final <T> T getOrDefault(Param<T> param)
protected static final <T> T $(Param<T> param)
public static final <T> scala.Option<T> getDefault(Param<T> param)
public static final <T> boolean hasDefault(Param<T> param)
public static final ParamMap extractParamMap()
protected static java.lang.String logName()
protected static org.slf4j.Logger log()
protected static void logInfo(scala.Function0<java.lang.String> msg)
protected static void logDebug(scala.Function0<java.lang.String> msg)
protected static void logTrace(scala.Function0<java.lang.String> msg)
protected static void logWarning(scala.Function0<java.lang.String> msg)
protected static void logError(scala.Function0<java.lang.String> msg)
protected static void logInfo(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable)
protected static void logDebug(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable)
protected static void logTrace(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable)
protected static void logWarning(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable)
protected static void logError(scala.Function0<java.lang.String> msg,
java.lang.Throwable throwable)
protected static boolean isTraceEnabled()
protected static void initializeLogIfNecessary(boolean isInterpreter)
protected static StructType transformSchema(StructType schema, boolean logging)
public static Dataset<Row> transform(Dataset<?> dataset, ParamPair<?> firstParamPair, scala.collection.Seq<ParamPair<?>> otherParamPairs)
public static Dataset<Row> transform(Dataset<?> dataset, ParamPair<?> firstParamPair, ParamPair<?>... otherParamPairs)
public static final Param<java.lang.String> inputCol()
public static final java.lang.String getInputCol()
public static final Param<java.lang.String> outputCol()
public static final java.lang.String getOutputCol()
public static T setInputCol(java.lang.String value)
public static T setOutputCol(java.lang.String value)
public static StructType transformSchema(StructType schema)
public static T copy(ParamMap extra)
public static void save(java.lang.String path)
throws java.io.IOException
java.io.IOExceptionpublic static MLWriter write()
public java.lang.String uid()
Identifiablepublic IntParam n()
public NGram setN(int value)
public int getN()
protected scala.Function1<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>> createTransformFunc()
UnaryTransformercreateTransformFunc in class UnaryTransformer<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>,NGram>protected void validateInputType(DataType inputType)
UnaryTransformervalidateInputType in class UnaryTransformer<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>,NGram>inputType - (undocumented)protected DataType outputDataType()
UnaryTransformeroutputDataType in class UnaryTransformer<scala.collection.Seq<java.lang.String>,scala.collection.Seq<java.lang.String>,NGram>