using System; using System.Collections; using CenterSpace.NMath.Core; using System.IO; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// A .NET example in C# showing how to use the cross-tabulation functionality of DataFrame. /// </summary> /// <remarks> /// As illustrated in the FactorExample, the DataFrame.GetFactor() method can be used /// in conjunction with Subset.GetGroupings() to access "cells" of data based on one /// or two grouping factors. This is such a common operation that class DataFrame also /// provides the Tabulate() methods as a convenience. This method accepts one or two /// grouping columns, a data column, and a delegate to apply to each data column subset. /// The results are returned in a new data frame. /// </remarks> public class CrossTabulationExample { static void Main( string[] args ) { // Read in data from the file. The data show test scores for 18 children on a // simple reading test. The childs gender ( "male" or "female" ) and grade // (4, 5, or 6) is also recorded. DataFrame df = DataFrame.Load( "CrossTabulationExample.dat", true, false, "\t", true ); Console.WriteLine(); Console.WriteLine( df ); Console.WriteLine(); // This code encapsulates the static StatsFunctions.Mean() function in a // StatsFunctions.DoubleIDFColumnFunction delegate, then displays the average // test score for each grade: var mean = new Func<IDFColumn, double>( StatsFunctions.Mean ); Console.WriteLine( df.Tabulate( "Grade", "Score", mean ) ); Console.WriteLine(); // The code shows the average test score for every combination of gender and grade: DataFrame means = df.Tabulate( "Grade", "Gender", "Score", mean ); Console.WriteLine( means ); Console.WriteLine(); // The returned data frame can be easily accessed for individual results: Console.WriteLine( "Average score for boys in grade 5 = {0}", means[5, "male"] ); Console.WriteLine( "Average score for grade 5 = {0}", means[5, "Overall"] ); Console.WriteLine( "Average score for boys = {0}", means["Overall", "male"] ); Console.WriteLine( "Grand average = {0}", means["Overall", "Overall"] ); Console.WriteLine(); // Most of the static descriptive statistics functions on class StatsFunctions // accept an IDFColumn and return a double. A few return integers. For example, // this code encapsulates StatsFunctions.Count(), which returns the number of items // in a column, in a StatsFunctions.IntIDFColumnFunction, then displays the number // of subjects in each cell: var count = new Func<IDFColumn, int>( StatsFunctions.Count ); Console.WriteLine( df.Tabulate( "Grade", "Gender", "Score", count ) ); Console.WriteLine(); // The delegate the returns a generic object can be especially useful if you want to // tabulate a variety of summary statistics all at once: var getSummaryDelegate = new Func<IDFColumn, object>( GetSummary ); DataFrame summaryStats = df.Tabulate( "Grade", "Gender", "Score", getSummaryDelegate ); Console.WriteLine( "Summary Statistics for Boys in Grade 6" ); Console.WriteLine( summaryStats[6, "male"] ); Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } // Main static private object GetSummary( IDFColumn data ) { var summary = new MySummary(); summary.N = StatsFunctions.Count( data ); summary.Mean = StatsFunctions.Mean( data ); summary.StDev = StatsFunctions.StandardDeviation( data ); summary.Min = StatsFunctions.MinValue( data ); summary.Max = StatsFunctions.MaxValue( data ); return summary; } private class MySummary { public int N; public double Mean; public double StDev; public double Min; public double Max; public override string ToString() { string nl = System.Environment.NewLine; var buff = new System.Text.StringBuilder(); buff.Append( "Size = " + N + nl ); buff.Append( "Mean = " + Mean + nl ); buff.Append( "Standard Deviation = " + StDev + nl ); buff.Append( "Minimum = " + Min + nl ); buff.Append( "Maximum = " + Max + nl ); return buff.ToString(); } } } // class } // namespace← All NMath Code Examples