Content



Random Forest template

This is a basic template for Random Forest concept. It sorts 15 vectors into 5 classes based on magnitudes of their elements. The code must be obvious without any further explanation.

There is no shortage of RF examples. As always, I found other examples not very clear and optimal and offer my own.
//Elementary DEMO of Random Forest.
//Andrew Polar, 2012 edited in 2024.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace RFCStest
{
    class Node
    {
        public Node left;
        public Node right;
        public int[] classifierVector;
        public double classifierThreshold;
        public int? label = null;
    }

    class DataHolder
    {
        public int[,] training = new int[,] {
            { 1, 2, 0, 0, 1, 1, 0, 1, 2, 3, 4, 1,10, 8,11,13, 8, 9, 0, 1, 1, 3, 2, 1, 2, 3, 0, 3, 0, 1},
            { 1, 1, 4, 0, 0, 0, 1, 1, 3, 0, 1, 0, 2, 1, 3, 1, 0, 0, 1, 0, 3, 3, 1, 0,10, 9, 9, 8,12,11},
            {11,12,10, 7, 9,12, 2, 1, 1, 3, 1, 0, 1, 2, 2, 1, 4, 1, 0, 3, 2, 0, 1, 1, 1, 2, 3, 1, 0, 0},
            { 0, 2, 2, 4, 0, 1, 0, 1, 0, 0, 1, 2, 1, 2, 0, 1, 0, 1, 0, 1, 1, 5, 0, 1, 9, 9,12,12,10, 7},
            { 1, 1, 0, 0, 1, 1, 2, 2, 1, 4, 1, 0, 0, 2, 1, 3, 1, 0,12,11,11, 8,12, 9, 0, 0, 0, 1, 1, 5},
            { 1, 1, 1, 4, 0, 0, 0, 0, 3, 5, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 0, 1, 1,10,10,12, 8, 8, 8},
            {10,11, 9, 8,10,11, 0, 1, 0, 2, 1, 2, 1, 2, 2, 0, 0, 2, 0, 1, 1, 0, 1, 5, 5, 0, 2, 2, 0, 1},
            { 6, 0, 0, 1, 1, 1, 2, 1, 0, 4, 4, 0, 0, 3, 0, 0, 1, 5,12,11, 9, 9,10, 8, 0, 1, 1, 3, 3, 0},
            { 0, 0, 1, 1, 2, 2, 9, 8,10, 9,15, 8, 2, 1, 1, 0, 0, 0, 1, 2, 0, 0, 3, 3, 1, 2, 1, 0, 0, 3},
            { 1, 0, 0, 1, 0, 2, 8,10,12, 8,14, 9, 1, 2, 0, 1, 2, 0, 0, 1, 1, 1, 2, 4, 0, 3, 0, 1, 0, 2},
            { 0, 0, 1, 1, 2, 0, 1, 3, 2, 0, 0, 1, 0, 1, 1, 3, 2, 1,13,10, 9, 9, 8,10, 1, 3, 2, 2, 1, 4},
            { 0, 7, 0, 4, 1, 0,10, 9, 8, 6,10,11, 0, 1, 2, 4, 2, 1, 5, 2, 1, 3, 0, 0, 1, 3, 0, 0, 1, 2},
            { 0, 1, 3, 3, 0, 1, 2, 3, 0, 7, 1, 0,10, 8, 9,15, 6, 8, 0, 0, 4, 4, 1, 4, 1, 1, 0, 1, 1, 4},
            { 0, 0, 3, 3, 0, 1, 1, 3, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 3, 1,12, 9, 9,18, 4, 7},
            { 0, 2, 4, 0, 0, 1, 0, 3, 1, 8, 0, 1, 9, 9,10,12, 8,10, 0, 5, 5, 2, 2, 0, 0, 3, 0, 3, 0, 1},
        };
        public int[] trainingLabels = { 2, 4, 0, 4, 3, 4, 0, 3, 1, 1, 3, 1, 2, 4, 2 };

        public int[,] testing = new int[,] {
            { 0, 0, 1, 1, 2, 2, 1, 0, 4, 4, 2, 0, 9, 9,10,12, 9,10, 1, 0, 0, 1, 3, 0, 3, 1, 1, 0, 1, 2},
            { 8, 8,11,11, 9,12, 1, 2, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0, 1, 2, 2, 0, 0, 6, 6, 0, 0, 1, 0, 1},
            { 0, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 5, 1, 0, 3, 3, 1, 1,10,12, 9, 9, 8,10, 0, 1, 1, 1, 0, 0},
            { 1, 1, 2, 2, 1, 0, 2, 1, 2, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 9,12,17,11, 8, 6},
            { 5, 1, 0, 0, 0, 1, 1, 2, 2, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 4, 4,10,10, 9, 9,10,12},
            { 0, 2, 1, 1, 1, 0, 0, 0, 4, 1, 0, 1, 1, 0, 0, 2, 1, 1,10,12,12, 9, 9, 9, 1, 1, 0, 0, 2, 3},
            { 2, 1, 1, 3, 2, 5, 9,12, 9,13, 9, 8, 1, 2, 1, 2, 0, 0, 6, 3, 2, 1, 1, 1, 0, 0, 3, 3, 3, 4},
            { 1, 5, 5, 5, 0, 0,11,11, 9, 4, 5,10, 0, 0, 0, 3, 3, 4, 0, 5, 4, 4, 0, 1, 0, 1, 2, 1, 0, 4},
            { 1, 1, 3, 3, 4, 0, 0, 2, 1, 0, 1, 5,10, 8, 9,12,14, 5, 3, 1, 1, 0, 8, 1, 1, 0, 0, 2, 1, 0},
            { 5, 7,12,10, 8, 5, 0, 3, 4, 2, 1, 1, 1, 4, 0, 0, 1, 5, 3, 1, 1, 3, 4, 1, 1, 2, 2, 0, 1, 2},
            {15,13,11, 9,10,11, 1, 0, 2, 1, 0, 1, 3, 0, 0, 0, 1, 0, 1, 2, 3, 0, 2, 1, 0, 0, 1, 1, 2, 1},
            { 1, 3, 3, 5, 0, 0, 2, 1, 1, 3, 0, 0, 8,10,12, 9,10,10, 0, 1, 5, 2, 7, 1, 0, 1, 2, 2, 0, 0},
            { 1, 1, 2, 2, 0, 0, 2, 1, 0, 1, 0, 0, 1, 2, 2, 2, 0, 1,14, 9, 8, 8, 9, 9, 2, 2, 3, 3, 0, 5},
            { 1, 2, 5, 0, 6, 9,12,15, 7, 9, 8,12, 1, 3, 4, 0, 0, 1, 2, 0, 2, 1, 2, 2, 0, 0, 1, 2, 2, 0},
            { 9, 8, 7,12,20,10, 1, 0, 5, 5, 0, 2, 0, 1, 4, 4, 3, 0, 5, 3, 2, 4, 0, 0, 7, 1, 4, 1, 2, 1},
        };
        public int[] testingLabels = { 2, 0, 3, 4, 4, 3, 1, 1, 2, 0, 0, 2, 3, 1, 0 };
    }

    class RF
    {
        public void showData(int[,] matrix)
        {
            int rows = matrix.GetLength(0);
            int cols = matrix.GetLength(1);
            for (int i = 0; i < rows; ++i)
            {
                Console.Write("{");
                for (int j = 0; j < cols - 1; ++j)
                {
                    Console.Write("{0,2},", matrix[i, j]);
                }
                Console.Write("{0,2}", matrix[i, cols - 1]);
                Console.WriteLine("},");
            }
            Console.WriteLine();
        }

        public int[] GetMatrixRow(int n, int[,] matrix)
        {
            int nCols = matrix.GetLength(1);
            int[] row = new int[nCols];
            for (int i = 0; i < nCols; i++)
            {
                row[i] = matrix[n, i];
            }
            return row;
        }

        public int[] getRandomSampleForSingleTree(int N)
        {
            int sizeOfRandomSample = (int)(0.64 * (double)(N));
            Random rnd = new Random();
            int[] sample = new int[sizeOfRandomSample];
            int cnt = 0;
            do
            {
                int randomInt = rnd.Next(N);
                bool isUnique = true;
                for (int k = 0; k < cnt; ++k)
                {
                    if (sample[k] == randomInt)
                    {
                        isUnique = false;
                        break;
                    }
                }
                if (isUnique)
                {
                    sample[cnt++] = randomInt;
                }
            } while (cnt < sizeOfRandomSample);
            return sample;
        }

        private int[] makeClassifier(int[] sample, int[,] matrix)
        {
            int nCols = matrix.GetLength(1);
            int[] classifierVector = new int[nCols];
            for (int i = 0; i < nCols; ++i)
            {
                classifierVector[i] = matrix[sample[0], i];
            }
            return classifierVector;
        }

        private double getCosine(int[] s1, int[] s2)
        {
            double product = 0.0;
            double sqr1 = 0.0;
            double sqr2 = 0.0;
            for (int i = 0; i < s1.Length; ++i)
            {
                product += (double)(s1[i]) * (double)(s2[i]);
                sqr1 += (double)(s1[i]) * (double)(s1[i]);
                sqr2 += (double)(s2[i]) * (double)(s2[i]);
            }
            return product / Math.Sqrt(sqr1) / Math.Sqrt(sqr2);
        }

        private double getThreshold(int[] sample, int[] classifier, int[,] matrix)
        {
            int nCols = matrix.GetLength(1);
            double fmin = 1000.0;
            double fmax = -1.0;
            int[] s = new int[nCols];
            foreach (int x in sample)
            {
                double f = getCosine(GetMatrixRow(x, matrix), classifier);
                if (f < fmin) fmin = f;
                if (f > fmax) fmax = f;
            }
            return (fmax + fmin) / 2.0;
        }

        public Node nodeFactory(int[] sample, int[] labels, int[,] matrix)
        {
            Node node = new Node();
            bool oneClass = true;
            int label0 = labels[sample[0]];
            for (int i = 1; i < sample.Length; i++)
            {
                if (label0 != labels[sample[i]])
                {
                    oneClass = false;
                    break;
                }
            }
            if (oneClass)
            {
                node.label = labels[sample[0]];
                Console.WriteLine("Training: final node is built with {0} records", sample.Length);
                return node;
            }
            node.classifierVector = makeClassifier(sample, matrix);
            node.classifierThreshold = getThreshold(sample, node.classifierVector, matrix);
            return node;
        }

        private byte[] splitSample(Node node, int[] sample, int[,] matrix)
        {
            byte[] indicator = new byte[sample.Length];
            int cnt = 0;
            foreach (int x in sample)
            {
                double f = getCosine(GetMatrixRow(x, matrix), node.classifierVector);
                if (f >= node.classifierThreshold) indicator[cnt] = 1;
                else indicator[cnt] = 0;
                ++cnt;
            }
            return indicator;
        }

        public void makeLeftAndRightNodes(Node node, int[] sample, int[] labels, int[,] matrix)
        {
            byte[] indicator = splitSample(node, sample, matrix);
            int nLeftVectors = 0;
            foreach (byte b in indicator)
            {
                if (b > 0) ++nLeftVectors;
            }
            int nRightVectors = indicator.Length - nLeftVectors;
            int[] left = new int[nLeftVectors];
            int[] right = new int[nRightVectors];
            int cntL = 0;
            int cntR = 0;
            int cnt = 0;
            foreach (byte b in indicator)
            {
                if (b > 0)
                {
                    left[cntL++] = sample[cnt];
                }
                else
                {
                    right[cntR++] = sample[cnt];
                }
                ++cnt;
            }
            node.left = nodeFactory(left, labels, matrix);
            node.right = nodeFactory(right, labels, matrix);
            if (node.left.label == null)
            {
                makeLeftAndRightNodes(node.left, left, labels, matrix);
            }
            if (node.right.label == null)
            {
                makeLeftAndRightNodes(node.right, right, labels, matrix);
            }
        }

        public int? getLabelFromTree(Node node, int[] data)
        {
            if (node.label != null)
            {
                return node.label;
            }
            double f = getCosine(node.classifierVector, data);
            if (f > node.classifierThreshold)
            {
                return getLabelFromTree(node.left, data);
            }
            else
            {
                return getLabelFromTree(node.right, data);
            }
        }

        public Node MakeSingleTree(int[,] matrix, int[] labels)
        {
            int[] randomSample = getRandomSampleForSingleTree(labels.Length);
            Node topnode = nodeFactory(randomSample, labels, matrix);
            if (topnode.label == null)
            {
                makeLeftAndRightNodes(topnode, randomSample, labels, matrix);
            }
            return topnode;
        }

        public int[] GetLabels(Node topnode, int[,] matrix)
        {
            int n = matrix.GetLength(0);
            int[] labels = new int[n];
            for (int i = 0; i < n; ++i)
            {
                int? label = getLabelFromTree(topnode, GetMatrixRow(i, matrix));
                if (null == label) labels[i] = -1;
                else labels[i] = (int)label;
            }
            return labels;
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            RF rf = new RF();
            DataHolder dh = new DataHolder();

            //We make two different trees as example from training dataset
            Node topnode1 = rf.MakeSingleTree(dh.training, dh.trainingLabels);
            Console.WriteLine();
            Node topnode2 = rf.MakeSingleTree(dh.training, dh.trainingLabels);
            Console.WriteLine();

            //we obtain two label sets from these two trees 
            int[] labels1 = rf.GetLabels(topnode1, dh.testing);
            int[] labels2 = rf.GetLabels(topnode2, dh.testing);

            //we compare obtained lables to actual ones,
            //the final result is obtained by voting using more trees
            int errors = 0;
            string text = "row = {0,3}, label set 1 = {1,3}, label set 2 = {2,3}, actual label = {3,3}";
            for (int i = 0; i < labels1.Length; ++i)
            {
                Console.WriteLine(text, i, labels1[i], labels2[i], dh.testingLabels[i]);
                if (labels1[i] != dh.testingLabels[i]) ++errors;
                if (labels2[i] != dh.testingLabels[i]) ++errors;
            }
            Console.WriteLine("Errors in all sets {0}", errors);
        }
    }
}