using System; using System.Collections; using CenterSpace.NMath.Core; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// A .NET example in C# showing how to manipulate data using the DataFrame class. /// </summary> /// <remarks> /// The statistical functions in NMath Stats support the NMath Core types /// DoubleVector and DoubleMatrix, as well as simple arrays of doubles. In many /// cases, these types are sufficient for storing and manipulating your /// statistical data. However, they suffer from two limitations: they can only /// store numeric data, and they have limited support for adding, inserting, removing, /// and reordering data. Therefore, NMath Stats provides the DataFrame class which /// represents a two-dimensional data object consisting of a list of columns of the /// same length. Columns are themselves lists of different types of data: numeric, /// string, boolean, generic, and so on. /// </remarks> public class DataFrameExample { static void Main( string[] args ) { // Create an empty data frame. var df = new DataFrame(); // Add some columns. These data describe the relationship between // the size of acorns and various oak tree species. Columns in a data frame // can be accessed by numeric index (0...n-1) or by a name supplied at // construction time. df.AddColumn( new DFStringColumn( "Region" ) ); df.AddColumn( new DFNumericColumn( "AcornSize" ) ); df.AddColumn( new DFNumericColumn( "TreeHeight" ) ); df.AddColumn( new DFBoolColumn( "Threatened" ) ); // Add some rows of data. Rows can be accessed by numeric index (0...n-1) // or by a key object. The first parameter to the AddRow() method, in this // case the name of the oak tree species, is the row key. df.AddRow( "Quercus alba L.", "Atlantic", 1.4, 27, false ); df.AddRow( "Quercus bicolor Willd.", "Atlantic", 3.4, 21, false ); df.AddRow( "Quercus macrocarpa Michx.", "Atlantic", 9.1, 25, false ); df.AddRow( "Quercus Chapmanii Sarg.", "Atlantic", 0.9, 15, false ); df.AddRow( "Quercus Durandii Buckl.", "Atlantic", 0.8, 23, true ); df.AddRow( "Quercus laurifolia Michx.", "Atlantic", 1.1, 27, false ); df.AddRow( "Quercus marilandica Muenchh.", "Atlantic", 3.7, 9, false ); df.AddRow( "Quercus nigra L.", "Atlantic", 1.1, 24, true ); df.AddRow( "Quercus palustris Muenchh.", "Atlantic", 1.1, 23, false ); df.AddRow( "Quercus texana Buckl.", "Atlantic", 1.1, 9, false ); df.AddRow( "Quercus coccinea Muenchh.", "Atlantic", 1.2, 4, false ); df.AddRow( "Quercus Douglasii Hook. & Arn", "California", 4.1, 18, false ); df.AddRow( "Quercus dumosa Nutt.", "California", 1.6, 6, false ); df.AddRow( "Quercus Engelmannii Greene", "California", 2.0, 17, false ); df.AddRow( "Quercus Garryana Hook.", "California", 5.5, 20, true ); df.AddRow( "Quercus chrysolepis Liebm.", "California", 17.1, 15, false ); df.AddRow( "Quercus vaccinifolia Engelm.", "California", 0.4, 1, false ); df.AddRow( "Quercus tomentella Engelm", "California", 7.1, 18, true ); // Display the entire, original data frame. Console.WriteLine(); Console.WriteLine( df ); Console.WriteLine(); // Reorder some columns. Lets move the AcornSize column to the end. df.PermuteColumns( 0, 3, 1, 2 ); Console.WriteLine( df ); Console.WriteLine(); // If you dont know the index of a column you can query for it by name. int acornSizeCol = df.IndexOfColumn( "AcornSize" ); int treeHeightCol = df.IndexOfColumn( "TreeHeight" ); // Sort the rows. Lets sort the rows by AcornSize in ascending order, and secondarily // by TreeHeight in descending order. int[] colIndices = { acornSizeCol, treeHeightCol }; SortingType[] sortingTypes = { SortingType.Ascending, SortingType.Descending }; df.SortRows( colIndices, sortingTypes ); Console.WriteLine( df ); Console.WriteLine(); // Remove some columns and rows. df.RemoveColumn( "Threatened" ); df.RemoveRow( "Quercus nigra L." ); df.RemoveRow( 2 ); Console.WriteLine( df ); Console.WriteLine(); // Update a value by row and column index. int rowIndex = df.IndexOfKey( "Quercus chrysolepis Liebm." ); int colIndex = df.IndexOfColumn( "AcornSize" ); df[rowIndex, colIndex] = 17.2; // Get a row dictionary for one species of oak tree. The keys are the column names, // and the values are the row data. IDictionary dict = df.GetRowDictionary( "Quercus palustris Muenchh." ); Console.WriteLine( "Quercus palustris Muenchh." ); foreach ( string key in dict.Keys ) { Console.WriteLine( key + ": " + dict[key] ); } Console.WriteLine(); // Get a column dictionary for the TreeHeight column. The keys are the row keys, and // values are the column data. dict = df.GetColumnDictionary( "TreeHeight" ); Console.WriteLine( "TreeHeight" ); foreach ( string key in dict.Keys ) { Console.WriteLine( key + ": " + dict[key] ); } Console.WriteLine(); // Compute some descriptive statistics Console.WriteLine( "Acorn Size:" ); Console.WriteLine( "Mean = " + StatsFunctions.Mean( df["AcornSize"] ) ); Console.WriteLine( "Var = " + StatsFunctions.Variance( df["AcornSize"] ) ); Console.WriteLine(); // Export data to a DoubleMatrix. Non-numeric columns are ignored. DoubleMatrix A = df.ToDoubleMatrix(); Console.WriteLine( A ); Console.WriteLine(); // Get a DoubleVector for the values in the AcornSize column. DoubleVector v = df["AcornSize"].ToDoubleVector(); Console.WriteLine( v ); Console.WriteLine(); Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } // Main } // class } // namespace← All NMath Code Examples