12

I have recently been running benchmarks on Java vs C# for 1000 tasks to be scheduled over a threadpool. The server has 4 physical processors, each with 8 cores. The OS is Server 2008, has 32 GB of memory and each CPU is a Xeon x7550 Westmere/Nehalem-C.

In short, the Java implementation is much faster than C# at 4 threads but much slower as the number of threads increases. It also seems C# has become quicker per iteration, when the thread count has increased. Graphs are included in this post:

Java vs C# with a threadpool size of 4 threads Java vs C# with a threadpool size of 32 threads Peter's Java answer (see below) vs C#, for 32 threads

The Java implementation was written on a 64bit Hotspot JVM, with Java 7 and using an Executor Service threadpool I found online (see below). I also set the JVM to concurrent GC.

C# was written on .net 3.5 and the threadpool came from codeproject: http://www.codeproject.com/Articles/7933/Smart-Thread-Pool

(I have included the code below).

My questions:

1) Why is Java getting slower but C# is getting quicker?

2) Why do the execution times of C# fluctuate greatly? (This is our main question)

We did wonder whether the C# fluctuation was caused by the memory bus being maxed out....

Code (Please do not highlight errors with locking, this is irrelevant with my aims):

Java

import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class PoolDemo {

    static long FastestMemory = 2000000000;
    static long SlowestMemory = 0;
    static long TotalTime;
    static long[] FileArray;
    static DataOutputStream outs;
    static FileOutputStream fout;

  public static void main(String[] args) throws InterruptedException, FileNotFoundException {

        int Iterations = Integer.parseInt(args[0]);
        int ThreadSize = Integer.parseInt(args[1]);

        FileArray = new long[Iterations];
        fout = new FileOutputStream("server_testing.csv");

        // fixed pool, unlimited queue
        ExecutorService service = Executors.newFixedThreadPool(ThreadSize);
        //ThreadPoolExecutor executor = (ThreadPoolExecutor) service;

        for(int i = 0; i<Iterations; i++) {
          Task t = new Task(i);
          service.execute(t);
        }

        service.shutdown();
        service.awaitTermination(90, TimeUnit.SECONDS);

        System.out.println("Fastest: " + FastestMemory);
        System.out.println("Average: " + TotalTime/Iterations);

        for(int j=0; j<FileArray.length; j++){
            new PrintStream(fout).println(FileArray[j] + ",");
        }
      }

  private static class Task implements Runnable {

        private int ID;

        static Byte myByte = 0;

        public Task(int index) {
          this.ID = index;
        }

        @Override
        public void run() {
            long Start = System.nanoTime();

          int Size1 = 10000000;
            int Size2 = 2 * Size1;
            int Size3 = Size1;

            byte[] list1 = new byte[Size1];
            byte[] list2 = new byte[Size2];
            byte[] list3 = new byte[Size3];

            for(int i=0; i<Size1; i++){
                list1[i] = myByte;
            }

            for (int i = 0; i < Size2; i=i+2)
            {
                list2[i] = myByte;
            }

            for (int i = 0; i < Size3; i++)
            {
                byte temp = list1[i];
                byte temp2 = list2[i];
                list3[i] = temp;
                list2[i] = temp;
                list1[i] = temp2;
            }

            long Finish = System.nanoTime();
            long Duration = Finish - Start;
            FileArray[this.ID] = Duration;
            TotalTime += Duration;
            System.out.println("Individual Time " + this.ID + " \t: " + (Duration) + " nanoseconds");


            if(Duration < FastestMemory){
                FastestMemory = Duration;
            }
            if (Duration > SlowestMemory)
            {
                SlowestMemory = Duration;
            }
        }
      }
}

C#:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using Amib.Threading;
using System.Diagnostics;
using System.IO;
using System.Runtime;


namespace ServerTesting
{
    class Program
    {
        static long FastestMemory = 2000000000;
        static long SlowestMemory = 0;
        static long TotalTime = 0;
        static int[] FileOutput;
        static byte myByte = 56;

        static System.IO.StreamWriter timeFile;
        static System.IO.StreamWriter memoryFile;

        static void Main(string[] args)
        {
            Console.WriteLine("Concurrent GC enabled: " + GCSettings.IsServerGC);
            int Threads =   Int32.Parse(args[1]);
            int Iterations = Int32.Parse(args[0]);

            timeFile = new System.IO.StreamWriter(Threads + "_" + Iterations + "_" + "time.csv");

            FileOutput = new int[Iterations];
            TestMemory(Threads, Iterations);

            for (int j = 0; j < Iterations; j++)
            {
                timeFile.WriteLine(FileOutput[j] + ",");
            }

            timeFile.Close();
            Console.ReadLine();
        }

        private static void TestMemory(int threads, int iterations)
        {
            SmartThreadPool pool = new SmartThreadPool();
            pool.MaxThreads = threads;
            Console.WriteLine("Launching " + iterations + " calculators with " + pool.MaxThreads + " threads");
            for (int i = 0; i < iterations; i++)
            {
                pool.QueueWorkItem(new WorkItemCallback(MemoryIntensiveTask), i);
            }
            pool.WaitForIdle();
            double avg = TotalTime/iterations;
            Console.WriteLine("Avg Memory Time : " + avg);
            Console.WriteLine("Fastest: " + FastestMemory + " ms");
            Console.WriteLine("Slowest: " + SlowestMemory + " ms");
        }



        private static object MemoryIntensiveTask(object args)
        {

            DateTime start = DateTime.Now;
            int Size1 = 10000000;
            int Size2 = 2 * Size1;
            int Size3 = Size1;

            byte[] list1 = new byte[Size1];
            byte[] list2 = new byte[Size2];
            byte[] list3 = new byte[Size3];

            for (int i = 0; i < Size1; i++)
            {
                list1[i] = myByte;
            }

            for (int i = 0; i < Size2; i = i + 2)
            {
                list2[i] = myByte;
            }

            for (int i = 0; i < Size3; i++)
            {
                byte temp = list1[i];
                byte temp2 = list2[i];
                list3[i] = temp;
                list2[i] = temp;
                list1[i] = temp2;
            }

            DateTime finish = DateTime.Now;
            TimeSpan ts = finish - start;
            long duration = ts.Milliseconds;

            Console.WriteLine("Individual Time " + args + " \t: " + duration);

            FileOutput[(int)args] = (int)duration;
            TotalTime += duration;

            if (duration < FastestMemory)
            {
                FastestMemory = duration;
            }
            if (duration > SlowestMemory)
            {
                SlowestMemory = duration;
            }
            return null;
        }
    }
}
4

1 回答 1

12

您似乎没有像测试语言如何优化未优化的代码那样测试线程框架的工作。

Java 特别擅长优化无意义的代码,我相信这可以解释语言的差异。随着线程数量的增加,我怀疑瓶颈会转移到 GC 的执行方式或其他与您的测试有关的事情上。

Java 也可能会放慢速度,因为默认情况下它不支持 NUMA。尝试运行-XX:+UseNUMA 但是我建议为了获得最佳性能,您应该尝试将每个进程保持在单个 numa 区域以避免交叉 numa 开销。

你也可以试试这个稍微优化一下的代码,它在我的机器上快了 40%

import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class PoolDemo {

    static long FastestMemory = 2000000000;
    static long SlowestMemory = 0;
    static long TotalTime;
    static long[] FileArray;
    static FileOutputStream fout;

    public static void main(String[] args) throws InterruptedException, FileNotFoundException {

        int Iterations = Integer.parseInt(args[0]);
        int ThreadSize = Integer.parseInt(args[1]);

        FileArray = new long[Iterations];
        fout = new FileOutputStream("server_testing.csv");

        // fixed pool, unlimited queue
        ExecutorService service = Executors.newFixedThreadPool(ThreadSize);
        //ThreadPoolExecutor executor = (ThreadPoolExecutor) service;

        for (int i = 0; i < Iterations; i++) {
            Task t = new Task(i);
            service.execute(t);
        }

        service.shutdown();
        service.awaitTermination(90, TimeUnit.SECONDS);

        System.out.println("Fastest: " + FastestMemory);
        System.out.println("Average: " + TotalTime / Iterations);

        PrintStream ps = new PrintStream(fout);
        for (long aFileArray : FileArray) {
            ps.println(aFileArray + ",");
        }
    }

    static class ThreadLocalBytes extends ThreadLocal<byte[]> {
        private final int bytes;

        ThreadLocalBytes(int bytes) {
            this.bytes = bytes;
        }

        @Override
        protected byte[] initialValue() {
            return new byte[bytes];
        }
    }

    private static class Task implements Runnable {

        static final int Size1 = 10000000;
        static final int Size2 = 2 * Size1;
        static final int Size3 = Size1;

        private int ID;
        private static final ThreadLocalBytes list1b = new ThreadLocalBytes(Size1);
        private static final ThreadLocalBytes list2b = new ThreadLocalBytes(Size2);
        private static final ThreadLocalBytes list3b = new ThreadLocalBytes(Size3);

        static byte myByte = 0;

        public Task(int index) {
            this.ID = index;
        }

        @Override
        public void run() {
            long Start = System.nanoTime();


            byte[] list1 = list1b.get();
            byte[] list2 = list2b.get();
            byte[] list3 = list3b.get();

            for (int i = 0; i < Size1; i++) {
                list1[i] = myByte;
            }

            for (int i = 0; i < Size2; i = i + 2) {
                list2[i] = myByte;
            }

            for (int i = 0; i < Size3; i++) {
                byte temp = list1[i];
                byte temp2 = list2[i];
                list3[i] = temp;
                list2[i] = temp;
                list1[i] = temp2;
            }

            long Finish = System.nanoTime();
            long Duration = Finish - Start;
            FileArray[this.ID] = Duration;
            TotalTime += Duration;
            System.out.println("Individual Time " + this.ID + " \t: " + (Duration) + " nanoseconds");

            if (Duration < FastestMemory) {
                FastestMemory = Duration;
            }
            if (Duration > SlowestMemory) {
                SlowestMemory = Duration;
            }
        }
    }
}
于 2012-04-05T11:55:06.427 回答