I've programmed a (very simple) benchmark in Java. It simply increments a double value up to a specified value and takes the time.
When I use this singlethreaded or with a low amount of threads (up to 100) on my 6-core desktop, the benchmark returns reasonable and repeatable results.
But when I use for example 1200 threads, the average multicore duration is significantly lower than the singlecore duration (about 10 times or more). I've made sure that the total amount of incrementations is the same, no matter how much threads I use.
Why does the performance drop so much with more threads? Is there a trick to solve this problem?
I'm posting my source, but I don't think, that there is a problem.
Benchmark.java:
package sibbo.benchmark;
import java.text.DecimalFormat;
import java.util.LinkedList;
import java.util.List;
public class Benchmark implements TestFinishedListener {
private static final double TARGET = 1e10;
private static final int THREAD_MULTIPLICATOR = 2;
public static void main(String[] args) throws InterruptedException {
Benchmark b = new Benchmark(TARGET);
b.start();
}
private int coreCount;
private List<Worker> workers = new LinkedList<>();
private List<Worker> finishedWorkers = new LinkedList<>();
private double target;
public Benchmark(double target) {
this.target = target;
getSystemInfos();
printInfos();
}
private void getSystemInfos() {
coreCount = Runtime.getRuntime().availableProcessors();
}
private void printInfos() {
System.out.println("Usable cores: " + coreCount);
System.out.println("Multicore threads: " + coreCount * THREAD_MULTIPLICATOR);
System.out.println("Loops per core: " + new DecimalFormat("###,###,###,###,##0").format(TARGET));
System.out.println();
}
public synchronized void start() throws InterruptedException {
Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
System.out.print("Initializing singlecore benchmark... ");
Worker w = new Worker(this, 0);
workers.add(w);
Thread.sleep(1000);
System.out.println("finished");
System.out.print("Running singlecore benchmark... ");
w.runBenchmark(target);
wait();
System.out.println("finished");
printResult();
System.out.println();
// Multicore
System.out.print("Initializing multicore benchmark... ");
finishedWorkers.clear();
for (int i = 0; i < coreCount * THREAD_MULTIPLICATOR; i++) {
workers.add(new Worker(this, i));
}
Thread.sleep(1000);
System.out.println("finished");
System.out.print("Running multicore benchmark... ");
for (Worker worker : workers) {
worker.runBenchmark(target / THREAD_MULTIPLICATOR);
}
wait();
System.out.println("finished");
printResult();
Thread.currentThread().setPriority(Thread.NORM_PRIORITY);
}
private void printResult() {
DecimalFormat df = new DecimalFormat("###,###,###,##0.000");
long min = -1, av = 0, max = -1;
int threadCount = 0;
boolean once = true;
System.out.println("Result:");
for (Worker w : finishedWorkers) {
if (once) {
once = false;
min = w.getTime();
max = w.getTime();
}
if (w.getTime() > max) {
max = w.getTime();
}
if (w.getTime() < min) {
min = w.getTime();
}
threadCount++;
av += w.getTime();
if (finishedWorkers.size() <= 6) {
System.out.println("Worker " + w.getId() + ": " + df.format(w.getTime() / 1e9) + "s");
}
}
System.out.println("Min: " + df.format(min / 1e9) + "s, Max: " + df.format(max / 1e9) + "s, Av per Thread: "
+ df.format((double) av / threadCount / 1e9) + "s");
}
@Override
public synchronized void testFinished(Worker w) {
workers.remove(w);
finishedWorkers.add(w);
if (workers.isEmpty()) {
notify();
}
}
}
Worker.java:
package sibbo.benchmark;
public class Worker implements Runnable {
private double value = 0;
private long time;
private double target;
private TestFinishedListener l;
private final int id;
public Worker(TestFinishedListener l, int id) {
this.l = l;
this.id = id;
new Thread(this).start();
}
public int getId() {
return id;
}
public synchronized void runBenchmark(double target) {
this.target = target;
notify();
}
public long getTime() {
return time;
}
@Override
public void run() {
synWait();
value = 0;
long startTime = System.nanoTime();
while (value < target) {
value++;
}
long endTime = System.nanoTime();
time = endTime - startTime;
l.testFinished(this);
}
private synchronized void synWait() {
try {
wait();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}