为什么将std :: multiset用作优先级队列比使用std :: priority_queue更快?

本文介绍了为什么将std :: multiset用作优先级队列比使用std :: priority_queue更快?的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我尝试用std :: priority_queue替换std :: multiset.但是我对速度结果感到失望.该算法的运行时间增加了50％...

I try to replace std::multiset with std::priority_queue. But I was dissapointed with the speed results. Running time of the algorithm increase by 50%...

以下是相应的命令:

top() = begin();
pop() = erase(knn.begin());
push() = insert();

我对priority_queue实现的速度感到惊讶，我预期会有不同的结果(对于PQ更好)...从概念上讲，多集被用作优先级队列.为什么即使使用-O2，优先级队列和多重集的性能也是如此?

I am surprised with the speed of priority_queue implementation, I expected different results (better for PQ)... Conceptually, the multiset is being used as a priority queue. Why are the priority queue and the multiset have such different performance, even with -O2?

十个结果的平均值，MSVS 2010，Win XP，32位，方法findAllKNN2()(请参见下面的内容)

Average of ten results, MSVS 2010, Win XP, 32 bit, method findAllKNN2 () (see bellow, please)

MS
N           time [s]
100 000     0.5
1 000 000   8

PQ
N           time [s]
100 000     0.8
1 000 000   12

什么可能导致这些结果?没有对源代码进行任何其他更改...感谢您的帮助...

What could cause these results? No other changes of the source code have been made... Thanks for your help...

MS实施:

template <typename Point>
struct TKDNodePriority
{
    KDNode <Point> *node;
    typename Point::Type priority;

    TKDNodePriority() : node ( NULL ), priority ( 0 ) {}
    TKDNodePriority ( KDNode <Point> *node_, typename Point::Type priority_ ) : node ( node_ ), priority ( priority_ ) {}

    bool operator < ( const TKDNodePriority <Point> &n1 ) const
    {
            return priority > n1.priority;
    }
};

template <typename Point>
struct TNNeighboursList
{
    typedef std::multiset < TKDNodePriority <Point> > Type;
};

方法:

template <typename Point>
template <typename Point2>
void KDTree2D <Point>::findAllKNN2 ( const Point2 * point, typename TNNeighboursList <Point>::Type & knn, unsigned int k, KDNode <Point> *node, const unsigned int depth ) const
{
    if ( node == NULL )
    {
            return;
    }

    if ( point->getCoordinate ( depth % 2 ) <= node->getData()->getCoordinate ( depth % 2 ) )
    {
    findAllKNN2 ( point, knn, k, node->getLeft(), depth + 1 );
    }

    else 
    {
            findAllKNN2 ( point, knn, k, node->getRight(), depth + 1 );
    }

typename Point::Type dist_q_node = ( node->getData()->getX() - point->getX() ) * ( node->getData()->getX() - point->getX() ) +
                             ( node->getData()->getY() - point->getY() ) * ( node->getData()->getY() - point->getY() );

if (knn.size() == k)
{
    if (dist_q_node < knn.begin()->priority )
    {
        knn.erase(knn.begin());
        knn.insert ( TKDNodePriority <Point> ( node,  dist_q_node ) );
    }
}

else
{
    knn.insert ( TKDNodePriority <Point> ( node,  dist_q_node ) );
}

typename Point::Type dist_q_node_straight = ( point->getCoordinate ( node->getDepth() % 2 ) - node->getData()->getCoordinate ( node->getDepth() % 2 ) ) *
                                                ( point->getCoordinate ( node->getDepth() % 2 ) - node->getData()->getCoordinate ( node->getDepth() % 2 ) ) ;

typename Point::Type top_priority =  knn.begin()->priority;
if ( knn.size() < k ||  dist_q_node_straight <  top_priority )
{
            if ( point->getCoordinate ( node->getDepth() % 2 ) < node->getData()->getCoordinate ( node->getDepth() % 2 ) )
            {
        findAllKNN2 ( point, knn, k, node->getRight(), depth + 1 );
    }

    else
    {
        findAllKNN2 ( point, knn, k, node->getLeft(), depth + 1 );
    }
}
}

PQ实施(速度慢，为什么?)

template <typename Point>
struct TKDNodePriority
{
    KDNode <Point> *node;
    typename Point::Type priority;

    TKDNodePriority() : node ( NULL ), priority ( 0 ) {}
    TKDNodePriority ( KDNode <Point> *node_, typename Point::Type priority_ ) : node ( node_ ), priority ( priority_ ) {}

    bool operator < ( const TKDNodePriority <Point> &n1 ) const
    {
            return priority > n1.priority;
    }
};


template <typename Point>
struct TNNeighboursList
{ 
    typedef std::priority_queue< TKDNodePriority <Point> > Type;
};

方法:

template <typename Point>
template <typename Point2>
void KDTree2D <Point>::findAllKNN2 ( const Point2 * point, typename TNNeighboursList <Point>::Type & knn, unsigned int k, KDNode <Point> *node, const unsigned int depth ) const
{

    if ( node == NULL )
    {
            return;
    }

    if ( point->getCoordinate ( depth % 2 ) <= node->getData()->getCoordinate ( depth % 2 ) )
    {
    findAllKNN2 ( point, knn, k, node->getLeft(), depth + 1 );
    }

    else 
    {
            findAllKNN2 ( point, knn, k, node->getRight(), depth + 1 );
    }

typename Point::Type dist_q_node = ( node->getData()->getX() - point->getX() ) * ( node->getData()->getX() - point->getX() ) +
                             ( node->getData()->getY() - point->getY() ) * ( node->getData()->getY() - point->getY() );

if (knn.size() == k)
{
    if (dist_q_node < knn.top().priority )
    {
        knn.pop();

        knn.push ( TKDNodePriority <Point> ( node,  dist_q_node ) );
    }
}

else
{
    knn.push ( TKDNodePriority <Point> ( node,  dist_q_node ) );
}

typename Point::Type dist_q_node_straight = ( point->getCoordinate ( node->getDepth() % 2 ) - node->getData()->getCoordinate ( node->getDepth() % 2 ) ) *
                                                ( point->getCoordinate ( node->getDepth() % 2 ) - node->getData()->getCoordinate ( node->getDepth() % 2 ) ) ;

typename Point::Type top_priority =  knn.top().priority;
if ( knn.size() < k ||  dist_q_node_straight <  top_priority )
{
            if ( point->getCoordinate ( node->getDepth() % 2 ) < node->getData()->getCoordinate ( node->getDepth() % 2 ) )
            {
        findAllKNN2 ( point, knn, k, node->getRight(), depth + 1 );
    }

    else
    {
        findAllKNN2 ( point, knn, k, node->getLeft(), depth + 1 );
    }
}
}

推荐答案

看来，编译器的优化设置可能会对性能产生很大影响.在下面的代码中，多集无需优化即可轻松击败基于矢量和基于双端队列的priority_queue.但是，通过"-O3"优化，基于矢量的优先级队列胜过一切.现在，这些实验是在带有GCC的Linux上运行的，因此也许您会在Windows上获得不同的结果.我相信启用优化可能会消除STL向量中的许多错误检查行为.

It appears that your compiler's optimization settings may have a large impact on performance. In the code below, multiset handily beats vector-based and deque-based priority_queue without optimization. However, with "-O3" optimization, the vector-based priority queue beats all. Now, these experiments were run on Linux with GCC, so perhaps you would get different results on Windows. I believe enabling optimization may remove many error-checking behaviors in STL's vector.

没有优化:

使用-O2优化:

使用-O3优化:

测试工具(不要忘了与-lrt链接):

Test Harness (don't forget to link with -lrt):

#include <iostream>
#include <queue>
#include <deque>
#include <set>
#include <ctime>
#include <cstdlib>
#include <unistd.h>

using namespace std;

template <typename T>
double run_test(T& pq, int size, int iterations)
{
    struct timespec start, end;

    for(int i = 0; i < size; ++i)
        pq.push(rand());

    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
    for(int i = 0; i < iterations; ++i)
    {
        if(rand()%2)
            pq.pop();
        else
            pq.push(rand());

    }
    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);

    end.tv_sec -= start.tv_sec;
    end.tv_nsec -= start.tv_nsec;
    if (end.tv_nsec < 0)
    {
        --end.tv_sec;
        end.tv_nsec += 1000000000ULL;
    }

    return (end.tv_sec*1e3 + end.tv_nsec/1e6);
}

template <class T>
class multiset_pq: public multiset<T>
{
public:
    multiset_pq(): multiset<T>() {};
    void push(T elm) { this->insert(elm); }
    void pop() { if(!this->empty()) this->erase(this->begin()); }
    const T& top() { return *this->begin(); }
};

int main(void)
{
    const int size = 5000;
    const int iterations = 100000;

    priority_queue<int, vector<int> > pqv;
    priority_queue<int, deque<int> > pqd;
    multiset_pq<int> pqms;

    srand(time(0));

    cout<<"pq-w-vector: "<<run_test(pqv, size, iterations)<<"ms"<<endl;
    cout<<"pq-w-deque: "<<run_test(pqd, size, iterations)<<"ms"<<endl;
    cout<<"pq-w-multiset: "<<run_test(pqms, size, iterations)<<"ms"<<endl;

    return 0;
}

这篇关于为什么将std :: multiset用作优先级队列比使用std :: priority_queue更快?的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！