Random Forest template
This is a basic template for Random Forest concept. It sorts 15 vectors into 5 classes based on
magnitudes of their elements. The code must be obvious without any further explanation.
There is no shortage of RF examples. As always, I found other examples not very clear and optimal and
offer my own.
//Elementary DEMO of Random Forest.
//Andrew Polar, 2012 edited in 2024.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace RFCStest
{
class Node
{
public Node left;
public Node right;
public int[] classifierVector;
public double classifierThreshold;
public int? label = null;
}
class DataHolder
{
public int[,] training = new int[,] {
{ 1, 2, 0, 0, 1, 1, 0, 1, 2, 3, 4, 1,10, 8,11,13, 8, 9, 0, 1, 1, 3, 2, 1, 2, 3, 0, 3, 0, 1},
{ 1, 1, 4, 0, 0, 0, 1, 1, 3, 0, 1, 0, 2, 1, 3, 1, 0, 0, 1, 0, 3, 3, 1, 0,10, 9, 9, 8,12,11},
{11,12,10, 7, 9,12, 2, 1, 1, 3, 1, 0, 1, 2, 2, 1, 4, 1, 0, 3, 2, 0, 1, 1, 1, 2, 3, 1, 0, 0},
{ 0, 2, 2, 4, 0, 1, 0, 1, 0, 0, 1, 2, 1, 2, 0, 1, 0, 1, 0, 1, 1, 5, 0, 1, 9, 9,12,12,10, 7},
{ 1, 1, 0, 0, 1, 1, 2, 2, 1, 4, 1, 0, 0, 2, 1, 3, 1, 0,12,11,11, 8,12, 9, 0, 0, 0, 1, 1, 5},
{ 1, 1, 1, 4, 0, 0, 0, 0, 3, 5, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 0, 1, 1,10,10,12, 8, 8, 8},
{10,11, 9, 8,10,11, 0, 1, 0, 2, 1, 2, 1, 2, 2, 0, 0, 2, 0, 1, 1, 0, 1, 5, 5, 0, 2, 2, 0, 1},
{ 6, 0, 0, 1, 1, 1, 2, 1, 0, 4, 4, 0, 0, 3, 0, 0, 1, 5,12,11, 9, 9,10, 8, 0, 1, 1, 3, 3, 0},
{ 0, 0, 1, 1, 2, 2, 9, 8,10, 9,15, 8, 2, 1, 1, 0, 0, 0, 1, 2, 0, 0, 3, 3, 1, 2, 1, 0, 0, 3},
{ 1, 0, 0, 1, 0, 2, 8,10,12, 8,14, 9, 1, 2, 0, 1, 2, 0, 0, 1, 1, 1, 2, 4, 0, 3, 0, 1, 0, 2},
{ 0, 0, 1, 1, 2, 0, 1, 3, 2, 0, 0, 1, 0, 1, 1, 3, 2, 1,13,10, 9, 9, 8,10, 1, 3, 2, 2, 1, 4},
{ 0, 7, 0, 4, 1, 0,10, 9, 8, 6,10,11, 0, 1, 2, 4, 2, 1, 5, 2, 1, 3, 0, 0, 1, 3, 0, 0, 1, 2},
{ 0, 1, 3, 3, 0, 1, 2, 3, 0, 7, 1, 0,10, 8, 9,15, 6, 8, 0, 0, 4, 4, 1, 4, 1, 1, 0, 1, 1, 4},
{ 0, 0, 3, 3, 0, 1, 1, 3, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 3, 1,12, 9, 9,18, 4, 7},
{ 0, 2, 4, 0, 0, 1, 0, 3, 1, 8, 0, 1, 9, 9,10,12, 8,10, 0, 5, 5, 2, 2, 0, 0, 3, 0, 3, 0, 1},
};
public int[] trainingLabels = { 2, 4, 0, 4, 3, 4, 0, 3, 1, 1, 3, 1, 2, 4, 2 };
public int[,] testing = new int[,] {
{ 0, 0, 1, 1, 2, 2, 1, 0, 4, 4, 2, 0, 9, 9,10,12, 9,10, 1, 0, 0, 1, 3, 0, 3, 1, 1, 0, 1, 2},
{ 8, 8,11,11, 9,12, 1, 2, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0, 1, 2, 2, 0, 0, 6, 6, 0, 0, 1, 0, 1},
{ 0, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 5, 1, 0, 3, 3, 1, 1,10,12, 9, 9, 8,10, 0, 1, 1, 1, 0, 0},
{ 1, 1, 2, 2, 1, 0, 2, 1, 2, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 9,12,17,11, 8, 6},
{ 5, 1, 0, 0, 0, 1, 1, 2, 2, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 4, 4,10,10, 9, 9,10,12},
{ 0, 2, 1, 1, 1, 0, 0, 0, 4, 1, 0, 1, 1, 0, 0, 2, 1, 1,10,12,12, 9, 9, 9, 1, 1, 0, 0, 2, 3},
{ 2, 1, 1, 3, 2, 5, 9,12, 9,13, 9, 8, 1, 2, 1, 2, 0, 0, 6, 3, 2, 1, 1, 1, 0, 0, 3, 3, 3, 4},
{ 1, 5, 5, 5, 0, 0,11,11, 9, 4, 5,10, 0, 0, 0, 3, 3, 4, 0, 5, 4, 4, 0, 1, 0, 1, 2, 1, 0, 4},
{ 1, 1, 3, 3, 4, 0, 0, 2, 1, 0, 1, 5,10, 8, 9,12,14, 5, 3, 1, 1, 0, 8, 1, 1, 0, 0, 2, 1, 0},
{ 5, 7,12,10, 8, 5, 0, 3, 4, 2, 1, 1, 1, 4, 0, 0, 1, 5, 3, 1, 1, 3, 4, 1, 1, 2, 2, 0, 1, 2},
{15,13,11, 9,10,11, 1, 0, 2, 1, 0, 1, 3, 0, 0, 0, 1, 0, 1, 2, 3, 0, 2, 1, 0, 0, 1, 1, 2, 1},
{ 1, 3, 3, 5, 0, 0, 2, 1, 1, 3, 0, 0, 8,10,12, 9,10,10, 0, 1, 5, 2, 7, 1, 0, 1, 2, 2, 0, 0},
{ 1, 1, 2, 2, 0, 0, 2, 1, 0, 1, 0, 0, 1, 2, 2, 2, 0, 1,14, 9, 8, 8, 9, 9, 2, 2, 3, 3, 0, 5},
{ 1, 2, 5, 0, 6, 9,12,15, 7, 9, 8,12, 1, 3, 4, 0, 0, 1, 2, 0, 2, 1, 2, 2, 0, 0, 1, 2, 2, 0},
{ 9, 8, 7,12,20,10, 1, 0, 5, 5, 0, 2, 0, 1, 4, 4, 3, 0, 5, 3, 2, 4, 0, 0, 7, 1, 4, 1, 2, 1},
};
public int[] testingLabels = { 2, 0, 3, 4, 4, 3, 1, 1, 2, 0, 0, 2, 3, 1, 0 };
}
class RF
{
public void showData(int[,] matrix)
{
int rows = matrix.GetLength(0);
int cols = matrix.GetLength(1);
for (int i = 0; i < rows; ++i)
{
Console.Write("{");
for (int j = 0; j < cols - 1; ++j)
{
Console.Write("{0,2},", matrix[i, j]);
}
Console.Write("{0,2}", matrix[i, cols - 1]);
Console.WriteLine("},");
}
Console.WriteLine();
}
public int[] GetMatrixRow(int n, int[,] matrix)
{
int nCols = matrix.GetLength(1);
int[] row = new int[nCols];
for (int i = 0; i < nCols; i++)
{
row[i] = matrix[n, i];
}
return row;
}
public int[] getRandomSampleForSingleTree(int N)
{
int sizeOfRandomSample = (int)(0.64 * (double)(N));
Random rnd = new Random();
int[] sample = new int[sizeOfRandomSample];
int cnt = 0;
do
{
int randomInt = rnd.Next(N);
bool isUnique = true;
for (int k = 0; k < cnt; ++k)
{
if (sample[k] == randomInt)
{
isUnique = false;
break;
}
}
if (isUnique)
{
sample[cnt++] = randomInt;
}
} while (cnt < sizeOfRandomSample);
return sample;
}
private int[] makeClassifier(int[] sample, int[,] matrix)
{
int nCols = matrix.GetLength(1);
int[] classifierVector = new int[nCols];
for (int i = 0; i < nCols; ++i)
{
classifierVector[i] = matrix[sample[0], i];
}
return classifierVector;
}
private double getCosine(int[] s1, int[] s2)
{
double product = 0.0;
double sqr1 = 0.0;
double sqr2 = 0.0;
for (int i = 0; i < s1.Length; ++i)
{
product += (double)(s1[i]) * (double)(s2[i]);
sqr1 += (double)(s1[i]) * (double)(s1[i]);
sqr2 += (double)(s2[i]) * (double)(s2[i]);
}
return product / Math.Sqrt(sqr1) / Math.Sqrt(sqr2);
}
private double getThreshold(int[] sample, int[] classifier, int[,] matrix)
{
int nCols = matrix.GetLength(1);
double fmin = 1000.0;
double fmax = -1.0;
int[] s = new int[nCols];
foreach (int x in sample)
{
double f = getCosine(GetMatrixRow(x, matrix), classifier);
if (f < fmin) fmin = f;
if (f > fmax) fmax = f;
}
return (fmax + fmin) / 2.0;
}
public Node nodeFactory(int[] sample, int[] labels, int[,] matrix)
{
Node node = new Node();
bool oneClass = true;
int label0 = labels[sample[0]];
for (int i = 1; i < sample.Length; i++)
{
if (label0 != labels[sample[i]])
{
oneClass = false;
break;
}
}
if (oneClass)
{
node.label = labels[sample[0]];
Console.WriteLine("Training: final node is built with {0} records", sample.Length);
return node;
}
node.classifierVector = makeClassifier(sample, matrix);
node.classifierThreshold = getThreshold(sample, node.classifierVector, matrix);
return node;
}
private byte[] splitSample(Node node, int[] sample, int[,] matrix)
{
byte[] indicator = new byte[sample.Length];
int cnt = 0;
foreach (int x in sample)
{
double f = getCosine(GetMatrixRow(x, matrix), node.classifierVector);
if (f >= node.classifierThreshold) indicator[cnt] = 1;
else indicator[cnt] = 0;
++cnt;
}
return indicator;
}
public void makeLeftAndRightNodes(Node node, int[] sample, int[] labels, int[,] matrix)
{
byte[] indicator = splitSample(node, sample, matrix);
int nLeftVectors = 0;
foreach (byte b in indicator)
{
if (b > 0) ++nLeftVectors;
}
int nRightVectors = indicator.Length - nLeftVectors;
int[] left = new int[nLeftVectors];
int[] right = new int[nRightVectors];
int cntL = 0;
int cntR = 0;
int cnt = 0;
foreach (byte b in indicator)
{
if (b > 0)
{
left[cntL++] = sample[cnt];
}
else
{
right[cntR++] = sample[cnt];
}
++cnt;
}
node.left = nodeFactory(left, labels, matrix);
node.right = nodeFactory(right, labels, matrix);
if (node.left.label == null)
{
makeLeftAndRightNodes(node.left, left, labels, matrix);
}
if (node.right.label == null)
{
makeLeftAndRightNodes(node.right, right, labels, matrix);
}
}
public int? getLabelFromTree(Node node, int[] data)
{
if (node.label != null)
{
return node.label;
}
double f = getCosine(node.classifierVector, data);
if (f > node.classifierThreshold)
{
return getLabelFromTree(node.left, data);
}
else
{
return getLabelFromTree(node.right, data);
}
}
public Node MakeSingleTree(int[,] matrix, int[] labels)
{
int[] randomSample = getRandomSampleForSingleTree(labels.Length);
Node topnode = nodeFactory(randomSample, labels, matrix);
if (topnode.label == null)
{
makeLeftAndRightNodes(topnode, randomSample, labels, matrix);
}
return topnode;
}
public int[] GetLabels(Node topnode, int[,] matrix)
{
int n = matrix.GetLength(0);
int[] labels = new int[n];
for (int i = 0; i < n; ++i)
{
int? label = getLabelFromTree(topnode, GetMatrixRow(i, matrix));
if (null == label) labels[i] = -1;
else labels[i] = (int)label;
}
return labels;
}
}
class Program
{
static void Main(string[] args)
{
RF rf = new RF();
DataHolder dh = new DataHolder();
//We make two different trees as example from training dataset
Node topnode1 = rf.MakeSingleTree(dh.training, dh.trainingLabels);
Console.WriteLine();
Node topnode2 = rf.MakeSingleTree(dh.training, dh.trainingLabels);
Console.WriteLine();
//we obtain two label sets from these two trees
int[] labels1 = rf.GetLabels(topnode1, dh.testing);
int[] labels2 = rf.GetLabels(topnode2, dh.testing);
//we compare obtained lables to actual ones,
//the final result is obtained by voting using more trees
int errors = 0;
string text = "row = {0,3}, label set 1 = {1,3}, label set 2 = {2,3}, actual label = {3,3}";
for (int i = 0; i < labels1.Length; ++i)
{
Console.WriteLine(text, i, labels1[i], labels2[i], dh.testingLabels[i]);
if (labels1[i] != dh.testingLabels[i]) ++errors;
if (labels2[i] != dh.testingLabels[i]) ++errors;
}
Console.WriteLine("Errors in all sets {0}", errors);
}
}
}
|
|