FastTreeBinaryClassificationTrainer.Arguments

The documentation is generated based on the sources available at dotnet/machinelearning and released under MIT License.

Type: argument Aliases: Microsoft.ML.Trainers.FastTree.FastTreeBinaryClassificationTrainer+Arguments Namespace: System Assembly: Microsoft.ML.FastTree.dll Microsoft Documentation: FastTreeBinaryClassificationTrainer.Arguments

Description

Parameters

Name Short name Default Description
AllowEmptyTrees ShortName = “allowempty,dummies” True HelpText = “When a root split is impossible, allow training to proceed”
BaggingSize ShortName = “bag” 0 HelpText = “Number of trees in each bag (0 for disabling bagging)”
BaggingTrainFraction ShortName = “bagfrac” 0.7 HelpText = “Percentage of training examples used in each bag”
BaselineAlphaRisk ShortName = “basealpha”   HelpText = “Baseline alpha for tradeoffs of risk (0 is normal training)”
BaselineScoresFormula ShortName = “basescores”   HelpText = “Freeform defining the scores that should be used as the baseline ranker”
BestStepRankingRegressionTrees ShortName = “bsr” False HelpText = “Use best regression step trees?”
Bias ShortName = “bias” 0 HelpText = “Bias for calculating gradient for each feature bin for a categorical feature.”
Bundling ShortName = “bundle” None HelpText = “Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle.”
Caching ShortName = “cache” Auto HelpText = “Whether learner should cache input training data”
CategoricalSplit ShortName = “cat” False HelpText = “Whether to do split based on multiple categorical feature values.”
CompressEnsemble ShortName = “cmp” False HelpText = “Compress the tree Ensemble”
DiskTranspose ShortName = “dt”   HelpText = “Whether to utilize the disk or the data’s native transposition facilities (where applicable) when performing the transpose”
DropoutRate ShortName = “tdrop” 0 HelpText = “Dropout rate for tree regularization”
EarlyStoppingMetrics ShortName = “esmt” 0 HelpText = “Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3)”
EarlyStoppingRule ShortName = “esr”   HelpText = “Early stopping rule. (Validation set (/valid) is required.)”
EnablePruning ShortName = “pruning” False HelpText = “Enable post-training pruning to avoid overfitting. (a validation set is required)”
EntropyCoefficient ShortName = “e” 0 HelpText = “The entropy (regularization) coefficient between 0 and 1”
ExecutionTimes ShortName = “et” False HelpText = “Print execution time breakdown to stdout”
FeatureColumn ShortName = “feat” Features HelpText = “Column to use for features”
FeatureCompressionLevel ShortName = “fcomp” 1 HelpText = “The level of feature compression to use”
FeatureFirstUsePenalty ShortName = “ffup” 0 HelpText = “The feature first use penalty coefficient”
FeatureFlocks ShortName = “flocks” True HelpText = “Whether to collectivize features during dataset preparation to speed up training”
FeatureFraction ShortName = “ff” 1 HelpText = “The fraction of features (chosen randomly) to use on each iteration”
FeatureReusePenalty ShortName = “frup” 0 HelpText = “The feature re-use penalty (regularization) coefficient”
FeatureSelectSeed ShortName = “r3” 123 HelpText = “The seed of the active feature selection”
FilterZeroLambdas ShortName = “fzl” False HelpText = “Filter zero lambdas during training”
GainConfidenceLevel ShortName = “gainconf” 0 HelpText = “Tree fitting gain confidence requirement (should be in the range [0,1) ).”
GetDerivativesSampleRate ShortName = “sr” 1 HelpText = “Sample each query 1 in k times in the GetDerivatives function”
GroupIdColumn ShortName = “groupId” GroupId HelpText = “Column to use for example groupId”
HistogramPoolSize ShortName = “ps” -1 HelpText = “The number of histograms in the pool (between 2 and numLeaves)”
LabelColumn ShortName = “lab” Label HelpText = “Column to use for labels”
LearningRates ShortName = “lr” 0.2 HelpText = “The learning rate”
MaxBins ShortName = “mb” 255 HelpText = “Maximum number of distinct values (bins) per feature”
MaxCategoricalGroupsPerNode ShortName = “mcg” 64 HelpText = “Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features.”
MaxCategoricalSplitPoints ShortName = “maxcat” 64 HelpText = “Maximum categorical split points to consider when splitting on a categorical feature.”
MaxTreeOutput ShortName = “mo” 100 HelpText = “Upper bound on absolute value of single tree output”
MaxTreesAfterCompression ShortName = “cmpmax” -1 HelpText = “Maximum Number of trees after compression”
MinDocsForCategoricalSplit ShortName = “mdo” 100 HelpText = “Minimum categorical doc count in a bin to consider for a split.”
MinDocsPercentageForCategoricalSplit ShortName = “mdop” 0.001 HelpText = “Minimum categorical docs percentage in a bin to consider for a split.”
MinDocumentsInLeafs ShortName = “mil” 10 HelpText = “The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data”
MinStepSize ShortName = “minstep” 0 HelpText = “Minimum line search step size”
NormalizeFeatures ShortName = “norm” Auto HelpText = “Normalize option for the feature column”
NumLeaves ShortName = “nl” 20 HelpText = “The max number of leaves in each regression tree”
NumPostBracketSteps ShortName = “lssteps” 0 HelpText = “Number of post-bracket line search steps”
NumThreads ShortName = “t”   HelpText = “The number of threads to use”
NumTrees ShortName = “iter” 100 HelpText = “Total number of decision trees to create in the ensemble”
OptimizationAlgorithm ShortName = “oa” GradientDescent HelpText = “Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent)”
ParallelTrainer ShortName = “parag” Microsoft. ML. Trainers. FastTree. SingleTrainerFactory HelpText = “Allows to choose Parallel FastTree Learning Algorithm”
PositionDiscountFreeform ShortName = “pdff”   HelpText = “The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position)”
PrintTestGraph ShortName = “graph” False HelpText = “Print metrics graph for the first test set”
PrintTrainValidGraph ShortName = “graphtv” False HelpText = “Print Train and Validation metrics in graph”
PruningThreshold ShortName = “prth” 0.004 HelpText = “The tolerance threshold for pruning”
PruningWindowSize ShortName = “prws” 5 HelpText = “The moving window size for pruning”
RandomStart ShortName = “rs” False HelpText = “Training starts from random ordering (determined by /r1)”
RngSeed ShortName = “r1” 123 HelpText = “The seed of the random number generator”
Shrinkage ShortName = “shrk” 1 HelpText = “Shrinkage”
Smoothing ShortName = “s” 0 HelpText = “Smoothing paramter for tree regularization”
SoftmaxTemperature ShortName = “smtemp” 0 HelpText = “The temperature of the randomized softmax distribution for choosing the feature”
SparsifyThreshold ShortName = “sp” 0.7 HelpText = “Sparsity level needed to use sparse feature representation”
SplitFraction ShortName = “sf” 1 HelpText = “The fraction of features (chosen randomly) to use on each split”
TestFrequency ShortName = “tf” 2147483647 HelpText = “Calculate metric values for train/valid/test every k rounds”
TrainingData ShortName = “data”   HelpText = “The data to be used for training”
UnbalancedSets ShortName = “us” False HelpText = “Should we use derivatives optimized for unbalanced sets”
UseLineSearch ShortName = “ls” False HelpText = “Should we use line search for a step size”
UseTolerantPruning ShortName = “prtol” False HelpText = “Use window and tolerance for pruning”
WeightColumn ShortName = “weight” Weight HelpText = “Column to use for example weight”
WriteLastEnsemble ShortName = “hl” False HelpText = “Write the last ensemble instead of the one determined by early stopping”