0

我在代码中添加了多线程部分。

 public class ThreadClassSeqGroups
    {
        public Dictionary<string, string> seqGroup;
        public Dictionary<string, List<SearchAlgorithm.CandidateStr>> completeModels;
        public Dictionary<string, List<SearchAlgorithm.CandidateStr>> partialModels;
        private Thread nativeThread;

        public ThreadClassSeqGroups(Dictionary<string, string> seqs)
        {
            seqGroup = seqs;
            completeModels  = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
            partialModels   = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
        }

        public void Run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
            List<ushort> secondarySeedOrder, double partialCutoff)
        {
            nativeThread = new Thread(() => this._run(dsd, primarySeedSu, secondarySeedOrder, partialCutoff));
            nativeThread.Priority = ThreadPriority.Highest;
            nativeThread.Start();
        }

        public void _run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
            List<ushort> secondarySeedOrder, double partialCutoff)
        {
            int groupSize = this.seqGroup.Count;
            int seqCount = 0;
            foreach (KeyValuePair<string, string> p in seqGroup)
            {
                Console.WriteLine("ThreadID {0} (priority:{1}):\t#{2}/{3} SeqName: {4}",
                    nativeThread.ManagedThreadId, nativeThread.Priority.ToString(), ++seqCount, groupSize, p.Key);
                List<SearchAlgorithm.CandidateStr> tmpCompleteModels, tmpPartialModels;
                SearchAlgorithm.SearchInBothDirections(
                        p.Value.ToUpper().Replace('T', 'U'), dsd, primarySeedSu, secondarySeedOrder, partialCutoff,
                        out tmpCompleteModels, out tmpPartialModels);
                completeModels.Add(p.Key, tmpCompleteModels);
                partialModels.Add(p.Key, tmpPartialModels);
            }
        }

        public void Join()
        {
            nativeThread.Join();
        }

    }

class Program
{
    public static int _paramSeqGroupSize = 2000;
    static void Main(Dictionary<string, string> rawSeqs)
    {
        // Split the whole rawSeqs (Dict<name, seq>) into several groups
        Dictionary<string, string>[] rawSeqGroups = SplitSeqFasta(rawSeqs, _paramSeqGroupSize);


        // Create a thread for each seqGroup and run
        var threadSeqGroups = new MultiThreading.ThreadClassSeqGroups[rawSeqGroups.Length];
        for (int i = 0; i < rawSeqGroups.Length; i++)
        {
            threadSeqGroups[i] = new MultiThreading.ThreadClassSeqGroups(rawSeqGroups[i]);
            //threadSeqGroups[i].SetPriority();
            threadSeqGroups[i].Run(dsd, primarySeedSu, secondarySeedOrder, _paramPartialCutoff);
        }

        // Merge results from threads after the thread finish
        var allCompleteModels   = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
        var allPartialModels    = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
        foreach (MultiThreading.ThreadClassSeqGroups t in threadSeqGroups)
        {
            t.Join();
            foreach (string name in t.completeModels.Keys)
            {
                allCompleteModels.Add(name, t.completeModels[name]);
            }
            foreach (string name in t.partialModels.Keys)
            {
                allPartialModels.Add(name, t.partialModels[name]);
            }
        }
    }
}

但是多线程的速度比单线程慢很多,CPU负载一般<10%。

例如:

输入文件包含 2500 个字符串

_paramGroupSize = 3000, 主线程 + 1 个计算线程花费 200 秒

_paramGroupSize = 400,主线程 + 7 个计算线程花费更多时间(我在运行 10 多分钟后将其杀死)。

我的实施有什么问题吗?如何加快速度?

谢谢。

4

3 回答 3

3

在我看来,您正在尝试与多个线程并行处理文件。这是一个坏主意,假设您有一个机械磁盘。

基本上,磁盘的头部需要为每个读取请求寻找下一个读取位置。这是一项代价高昂的操作,并且由于多个线程发出读取命令,这意味着当每个线程轮到运行时,头部会反弹。与单线程进行读取的情况相比,这将大大降低性能。

于 2012-07-27T18:30:05.857 回答
0

多线程之前的代码是什么?很难说这段代码在做什么,而且很多“工作”代码似乎隐藏在你的搜索算法中。然而,一些想法:

  1. 您提到了一个“输入文件”,但这在代码中没有清楚地显示 - 如果您的文件访问正在被线程化,这不会提高性能,因为文件访问将成为瓶颈。
  2. 创建比 CPU 内核更多的线程最终会降低性能(除非每个线程都被阻塞等待不同的资源)。在您的情况下,我建议总共 8 个线程太多了。
  3. 似乎可以通过您的类完成大量数据(内存)访问,该类从您的方法中DescrStrDetail的变量传递到每个子线程。但是,缺少此变量的声明,因此它的用法/实现是未知的。如果此变量具有阻止多个线程同时访问的锁,那么您的多个线程可能会相互锁定此数据,从而进一步降低性能。dsdMain
于 2012-07-27T15:38:13.750 回答
0

When threads are run they are given time on a specific processor. if there are more threads than processors, the system context switches between threads to get all active threads some time to process. Context switching is really expensive. If you have more threads than processors most of the CPU time can be take up by context switching and make a single-threaded solution look faster than a multi thread solution.

Your example shows starting an indeterminate number of threads. if SplitSeqFasta returns more entries than cores, you will create more threads and cores and introduce a lot of context switching.

I suggest you throttle the number of threads manually, or use something like the thread parallel library and the Parallel class to have it automatically throttle for you.

于 2012-07-27T16:00:44.487 回答