using System; using System.IO; using CenterSpace.NMath.Core; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// A .NET example in C# showing how to create and manipulate factors. /// </summary> /// <remarks> /// The Factor class represents a categorical vector in which all /// elements are drawn from a finite number of factor levels. Thus, a Factor contains /// two parts: a string array of factor levels, and an integer array of /// categorical data, of which each element is an index into the array of levels. /// </remarks> public class FactorExample { static void Main( string[] args ) { // Read in data from the file. The data show test scores for 17 children on a // simple reading test. The childs gender ( "male" or "female" ) and grade // (4, 5, or 6) is also recorded. DataFrame df = DataFrame.Load( "FactorExample.dat", true, false, "\t", true ); Console.WriteLine(); Console.WriteLine( df + "\n" ); // Factors are usually constructed from a data frame column using the // GetFactor() method, which creates a Factor with levels for the sorted, unique // values in the column. Factor gender = df.GetFactor( "Gender" ); // Display the levels and categorical data for the gender factor. Console.WriteLine( "Gender factor: " + gender ); Console.WriteLine( "Gender levels: " + gender.LevelsToString() ); Console.WriteLine( "Gender data: " + gender.DataToString() ); Console.WriteLine(); // Construct a factor for grade level. Factor grade = df.GetFactor( "Grade" ); // Display the levels and categorical data for the grade factor. Console.WriteLine( "Grade factor: " + grade ); Console.WriteLine( "Grade levels: " + grade.LevelsToString() ); Console.WriteLine( "Grade data: " + grade.DataToString() ); Console.WriteLine(); // The principal use of factors is in conjunction with the // GetGroupings() methods on Subset. One overload of this method accepts // a single Factor and returns an array of subsets containing the indices // for each level of the given factor. Subset[] genders = Subset.GetGroupings( gender ); Subset[] grades = Subset.GetGroupings( grade ); // Display overall mean Console.WriteLine( "Grand mean = {0}", StatsFunctions.Mean( df["Score"] ) ); Console.WriteLine(); // Display mean for each level of the Gender and Grade factors. Console.WriteLine( "Marginal Means" ); for ( int i = 0; i < gender.NumberOfLevels; i++ ) { double mean = StatsFunctions.Mean( df[df.IndexOfColumn( "Score" ), genders[i]] ); Console.WriteLine( "Mean for gender {0} = {1}", gender.Levels[i], mean.ToString( "F2" ) ); } for ( int i = 0; i < grade.NumberOfLevels; i++ ) { double mean = StatsFunctions.Mean( df[df.IndexOfColumn( "Score" ), grades[i]] ); mean = System.Math.Round( mean, 2 ); Console.WriteLine( "Mean for grade {0} = {1}", grade.Levels[i], mean ); } Console.WriteLine(); // Another overload of GetGroupings() accepts two Factor objects and returns // a two-dimensional jagged array of subsets containing the indices for // each combination of levels in the two factors. Console.WriteLine( "Cell Means" ); Subset[,] cells = Subset.GetGroupings( gender, grade ); for ( int i = 0; i < gender.NumberOfLevels; i++ ) { for ( int j = 0; j < grade.NumberOfLevels; j++ ) { double mean = StatsFunctions.Mean( df[df.IndexOfColumn( "Score" ), cells[i, j]] ); mean = System.Math.Round( mean, 2 ); Console.WriteLine( "Mean for gender {0} in grade {1} = {2}", gender.Levels[i], grade.Levels[j], mean ); } } Console.WriteLine(); // Combining DataFrame.GetFactor()with Subset.GetGroupings() to access cells // is such a common operation that class DataFrame also provides the Tabulate() // method as a convenience. This method accepts one or two grouping columns, a // data column, and a delegate to apply to each data column subset. This code // displays the same marginal and cell means shown above, but with far fewer // lines of code: var meanFunction = new Func<IDFColumn, double>( StatsFunctions.Mean ); Console.WriteLine( "Same results using cross-tabulation:\n" ); Console.WriteLine( df.Tabulate( "Grade", "Score", meanFunction ) + "\n" ); Console.WriteLine( df.Tabulate( "Gender", "Score", meanFunction ) + "\n" ); Console.WriteLine( df.Tabulate( "Grade", "Gender", "Score", meanFunction ) + "\n" ); // Factors are used internally by ANOVA classes for grouping data. var anova = new TwoWayAnova( df, df.IndexOfColumn( "Gender" ), df.IndexOfColumn( "Grade" ), df.IndexOfColumn( "Score" ) ); Console.WriteLine( anova ); Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } // Main } // class } // namespace← All NMath Code Examples