using System; using System.IO; using CenterSpace.NMath.Core; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// A .NET example in C# showing how to access arbitrary subsets of a data frame. /// </summary> /// <remarks> /// In addition to accessors for individual elements, columns, or rows in /// a data frame, class DataFrame provides a large number of indexers and /// member functions for accessing sub-frames containing any arbitrary subset /// of rows, columns, or both. Such indexers and methods accept the NMath Core /// types Slice and Range to indicate sets of row or column indices with constant /// spacing. In addition, NMath Stats introduces a new class called Subset. /// Like a Slice or Range, a Subset represents a collection of indices that can be /// used to view a subset of data from another data structure. Unlike a Slice or /// Range, however, a Subset need not be continuous, or even ordered. It is /// simply an arbitrary collection of indices. /// </remarks> public class SubsetExample { static void Main( string[] args ) { // Read in data from the file. The data comes from The Data and Story // Library (http://lib.stat.cmu.edu/DASL) and is described below: // // These data measure protein consumption in twenty-five European // countries for nine food groups. DataFrame df = DataFrame.Load( "SubsetExample.dat" ); Console.WriteLine(); Console.WriteLine( "COMPLETE DATA SET\n" + df + "\n" ); // Subset instances can be constructed in a variety of ways. One constructor // simply accepts an array of integers. The integers do not need to be ordered. var sub = new Subset( new int[] { 5, 4, 0, 3 } ); // Lets use this subset to get a sub-frame of the data. This code gets rows // 5, 4, 0, and 3 from the original data frame, in that order, and all columns. DataFrame df2 = df.GetRows( sub ); Console.WriteLine( "ARBITRARY SUBSET OF ROWS\n" + df2 + "\n" ); // This code gets uses the same subset to get columns 5, 4, 0, and 3 from the // original data frame, in that order, and all rows. df2 = df.GetColumns( sub ); Console.WriteLine( "ARBITRARY SUBSET OF COLUMNS\n" + df2 + "\n" ); // Indexers enable you to subset both rows and columns simultaneously. df2 = df[sub, sub]; Console.WriteLine( "ARBITRARY SUBSET OF ROWS AND COLUMNS\n" + df2 + "\n" ); // A very useful constructor takes an array of boolean values and constructs a // Subset containing the indices of all true elements in the array. Lets create // a subset of row indices containing those rows where protein from Milk exceeds // protein from Fish. var bArray = new bool[df.Rows]; for ( int i = 0; i < df.Rows; i++ ) { bArray[i] = ( (double) df["Milk"][i] > (double) df["Fish"][i] ); } var milkGTfish = new Subset( bArray ); df2 = df.GetRows( milkGTfish ); Console.WriteLine( "ROWS WHERE MILK > FISH\n" + df2 + "\n" ); // The StatsFunctions.If() method applies a given logical function delegate to // a data set and returns an array of boolean values. Lets create a subset for // countries where protein consumption from Nuts exceeds 3.0. See below for // the definition of logical function GT3(). bArray = StatsFunctions.If( df["Nuts"], new Func<double, bool>( GT3 ) ); var nutsGT3 = new Subset( bArray ); df2 = df.GetRows( nutsGT3 ); Console.WriteLine( "ROWS WHERE NUTS > 3.0\n" + df2 + "\n" ); // The Subset class provides a variety of operators for combining subsets, including // operator& for intersections and operator| for unions. df2 = df.GetRows( milkGTfish & nutsGT3 ); Console.WriteLine( "ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)\n" + df2 + "\n" ); df2 = df.GetRows( milkGTfish | nutsGT3 ); Console.WriteLine( "ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)\n" + df2 + "\n" ); Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } // Main private static bool GT3( double x ) { return ( x > 3.0 ); } } // class } // namespace← All NMath Code Examples