Tuesday, March 13, 2007 5:09 AM bart

Answers to C# Quiz - Need for speed

You can find the original quiz over here. There have been lots of great answers, thanks to all readers! The bottom line however is that one should be careful when doing performance optimizations; in much cases the code doesn't become cleaner (even the opposite). One shouldn't trade design guidelines for speed unless there are very very good reasons to do so. An example includes trading a class for a struct (see Framework Design Guidelines for a full elaboration on these topics).

Below you can find a piece of code that illustrates a few performance optimizations. General recommendations include:

  1. Avoid the use of accessor functions like properties and structs if you can get to the data directly (without breaking encapsulation however).
  2. Calculations sometimes benefit from an accumulator variable, as illustrated below.
  3. Jagged arrays outperform multidimensional arrays because of a variety of reasons. See Performance Tips and Tricks in .NET Applications for more info; this article dates from the v1 ages of .NET but most guidelines still hold.

The code:

1 using System; 2 using System.Threading; 3 using System.Text; 4 using System.Diagnostics; 5 6 class Matrix 7 { 8 private double[,] m; 9 internal double[][] mj; 10 11 public Matrix(int dim0, int dim1) 12 { 13 m = new double[dim0,dim1]; 14 mj = new double[dim0][]; 15 for (int i = 0; i < dim0; i++) 16 mj[i] = new double[dim1]; 17 } 18 19 public int Height { get { return m.GetLength(0); } } 20 public int Width { get { return m.GetLength(1); } } 21 22 public double this[int x, int y] 23 { 24 get { return m[x,y]; } 25 set { m[x,y] = value; } //hasn't changed to set mj value - avoid perf influence 26 } 27 28 public static int algo = 0; 29 public static string desc; 30 31 public static Matrix operator*(Matrix m1, Matrix m2) 32 { 33 if (m1.Width != m2.Height) 34 throw new InvalidOperationException("Matrices should have compatible dimensions for multiplication."); 35 36 switch (algo) 37 { 38 case 1: 39 { 40 desc = "Avoid properties"; 41 int h = m1.Height; 42 int w = m2.Width; 43 int l = m1.Width; 44 Matrix m = new Matrix(h, w); 45 for(int i = 0; i < h; i++) 46 { 47 for(int j = 0; j < w; j++) 48 { 49 m[i,j] = 0; 50 for (int k = 0; k < l; k++) 51 m[i,j] += m1[i,k] * m2[k,j]; 52 } 53 } 54 return m; 55 } 56 57 case 2: 58 { 59 desc = "Avoid indexers"; 60 int h = m1.Height; 61 int w = m2.Width; 62 int l = m1.Width; 63 Matrix m = new Matrix(h, w); 64 for(int i = 0; i < h; i++) 65 { 66 for(int j = 0; j < w; j++) 67 { 68 m.m[i,j] = 0; 69 for (int k = 0; k < l; k++) 70 m.m[i,j] += m1.m[i,k] * m2.m[k,j]; 71 } 72 } 73 return m; 74 } 75 76 case 3: 77 { 78 desc = "Use accumulator variable"; 79 int h = m1.Height; 80 int w = m2.Width; 81 int l = m1.Width; 82 Matrix m = new Matrix(h, w); 83 for(int i = 0; i < h; i++) 84 { 85 for(int j = 0; j < w; j++) 86 { 87 double res = 0; 88 for (int k = 0; k < l; k++) 89 res += m1.m[i,k] * m2.m[k,j]; 90 m.m[i,j] = res; 91 } 92 } 93 return m; 94 } 95 96 case 4: 97 { 98 desc = "Use jagged arrays"; 99 int h = m1.Height; 100 int w = m2.Width; 101 int l = m1.Width; 102 Matrix m = new Matrix(h, w); 103 for(int i = 0; i < h; i++) 104 { 105 for(int j = 0; j < w; j++) 106 { 107 double res = 0; 108 for (int k = 0; k < l; k++) 109 res += m1.mj[i][k] * m2.mj[k][j]; 110 m.mj[i][j] = res; 111 } 112 } 113 return m; 114 } 115 116 case 5: 117 { 118 desc = "Go unsafe with multidimensional"; 119 int h = m1.Height; 120 int w = m2.Width; 121 int l = m1.Width; 122 Matrix m = new Matrix(h, w); 123 unsafe 124 { 125 fixed (double* pm = m.m, pm1 = m1.m, pm2 = m2.m) 126 { 127 int i1, i2; 128 for(int i = 0; i < h; i++) 129 { 130 i1 = i * l; 131 for(int j = 0; j < w; j++) 132 { 133 i2 = j; 134 double res = 0; 135 for (int k = 0; k < l; k++, i2 += w) 136 res += pm1[i1 + k] * pm2[i2]; 137 pm[i * w + j] = res; 138 } 139 } 140 } 141 } 142 return m; 143 } 144 145 default: 146 { 147 desc = "Naive"; 148 Matrix m = new Matrix(m1.Height, m2.Width); 149 for(int i = 0; i < m.Height; i++) 150 { 151 for(int j = 0; j < m.Width; j++) 152 { 153 m[i,j] = 0; 154 for (int k = 0; k < m1.Width; k++) 155 m[i,j] += m1[i,k] * m2[k,j]; 156 } 157 } 158 return m; 159 } 160 } 161 } 162 } 163 164 class Program 165 { 166 static void Main() 167 { 168 Random rand = new Random(); 169 Stopwatch sw = new Stopwatch(); 170 TimeSpan original = TimeSpan.FromSeconds(0); 171 172 Matrix m1 = new Matrix(20,30); 173 for (int i = 0; i < m1.Height; i++) 174 for (int j = 0; j < m1.Width; j++) 175 m1[i,j] = rand.Next(-100,100); 176 177 Matrix m2 = new Matrix(30,40); 178 for (int i = 0; i < m2.Height; i++) 179 for (int j = 0; j < m2.Width; j++) 180 m2[i,j] = rand.Next(-100,100); 181 182 Matrix ro = null; 183 184 { 185 sw.Start(); 186 for (int k = 0; k < 10000; k++) 187 ro = m1 * m2; 188 sw.Stop(); 189 original = sw.Elapsed; 190 Console.WriteLine("Original - {0}", original); 191 } 192 193 { 194 for (Matrix.algo = 1; Matrix.algo <= 5; Matrix.algo++) 195 { 196 Matrix r = null; 197 sw.Reset(); 198 sw.Start(); 199 for (int k = 0; k < 10000; k++) 200 r = m1 * m2; 201 sw.Stop(); 202 Console.WriteLine("Algo {0} - {1} {3} ({2:#.00}x)", Matrix.algo, sw.Elapsed, ((double)original.TotalMilliseconds) / sw.Elapsed.TotalMilliseconds, Matrix.desc); 203 } 204 } 205 } 206 }

Needless to say, performance figures will vary from machine to machine and on every execution. Over here the figures look roughly like this:

Original - 00:00:07.9269539
Algo 1   - 00:00:03.0053351 Avoid properties (2,64x)
Algo 2   - 00:00:02.5814237 Avoid indexers (3,07x)
Algo 3   - 00:00:02.4030079 Use accumulator variable (3,30x)
Algo 4   - 00:00:01.5665534 Use jagged arrays (5,06x)
Algo 5   - 00:00:01.0873534 Go unsafe with multidimensional (7,29x)

The last optimization is likely the most risky one since it uses unsafe code to get rid of bounds checking overhead. Unless you really know what you're doing, it's better to avoid unsafe code anyhow (a speed-up of a factor 5 just by applying managed code optimizations is great already!).

Have fun!

Del.icio.us | Digg It | Technorati | Blinklist | Furl | reddit | DotNetKicks

Filed under: ,

Comments

# re: Answers to C# Quiz - Need for speed

Tuesday, March 13, 2007 5:59 AM by Mitch Wheat

Hi Bart

Did you try running the inner of the 3 loops backwards? Many algorithms can have their term order evaluation changed to gain subtle timing improvements, much like the loop unrolling done by a parallel vectorising compiler. With very large matrices, the access order can have a large impact on memory access timings, due to caching etc

Keep up the great posts!

# re: Answers to C# Quiz - Need for speed

Tuesday, March 13, 2007 8:00 AM by Jonathan de Halleux

This may also vary for which architecture (32bit, 64bit) you are using.

Jagged Arrays outperform multidimensional array because many reasons. Of of them is that the JIT can optimize away 1 range check by keeping the array-of-array in a register while executing the inner loop (to verify this, pop up windbg, load sos and dump the assembly code of the Jitted method).

A couple more things:

- since you are not using the .Length property directly, the JIT will probably not recognize the 'for(int i = 0;i<array.Length;++i)' pattern which it knows how to optimize.

- the body of your inner loops is short and your measurments might suffer loop alignment issues.

# re: Answers to C# Quiz - Need for speed

Tuesday, March 13, 2007 12:46 PM by bart

Hi Mitch,

Thanks for the feedback. I've evaluated the loop reversal indeed and a slight performance gain can be realized by doing so. As you say, access patterns are important, e.g. when iterating over a multidimensional array on a column-per-column basis which can cause data cache misses in the L* cache hierarchy of the processor. To see the result of this optimization, copy code fragment "case 5" to a new switch-case with reversed loops like this:

for(int i = h - 1; i >= 0; i--)

for(int j = w - 1; j >= 0; j--)

for (int k = l - 1; k >= 0; k--)

-Bart

# re: Answers to C# Quiz - Need for speed

Tuesday, March 13, 2007 12:55 PM by bart

Hi Jonathan,

Thanks for the insightful comment. I didn't know about the JIT recognizing the use of the Length property in a for loop; guess another WinDbg session is coming up soon :-). I'm aware of the risk for misalignment of the loop bodies but didn't see a direct way to countermeasure it.

-Bart