加权分位数的实现
这实现了分位数函数的权重功能,在网格点之间具有线性插值。
#include <vector>
#include <numeric>
#include <algorithm>
#include <iostream>
#include <assert.h>
// https://stackoverflow.com/a/12399290/7128154
template <typename T>
std::vector<size_t> sorted_index(const std::vector<T> &v) {
std::vector<size_t> idx(v.size());
iota(idx.begin(), idx.end(), 0);
stable_sort(idx.begin(), idx.end(),
[&v](size_t i1, size_t i2) {return v[i1] < v[i2];});
return idx;
}
// https://stackoverflow.com/a/1267878/7128154
template< typename order_iterator, typename value_iterator >
void reorder( order_iterator order_begin, order_iterator order_end, value_iterator v ) {
typedef typename std::iterator_traits< value_iterator >::value_type value_t;
typedef typename std::iterator_traits< order_iterator >::value_type index_t;
typedef typename std::iterator_traits< order_iterator >::difference_type diff_t;
diff_t remaining = order_end - 1 - order_begin;
for ( index_t s = index_t(), d; remaining > 0; ++ s ) {
for ( d = order_begin[s]; d > s; d = order_begin[d] ) ;
if ( d == s ) {
-- remaining;
value_t temp = v[s];
while ( d = order_begin[d], d != s ) {
swap( temp, v[d] );
-- remaining;
}
v[s] = temp;
}
}
}
// https://stackoverflow.com/a/1267878/7128154
template< typename order_iterator, typename value_iterator >
void reorder_destructive( order_iterator order_begin, order_iterator order_end, value_iterator v ) {
typedef typename std::iterator_traits< value_iterator >::value_type value_t;
typedef typename std::iterator_traits< order_iterator >::value_type index_t;
typedef typename std::iterator_traits< order_iterator >::difference_type diff_t;
diff_t remaining = order_end - 1 - order_begin;
for ( index_t s = index_t(); remaining > 0; ++ s ) {
index_t d = order_begin[s];
if ( d == (diff_t) -1 ) continue;
-- remaining;
value_t temp = v[s];
for ( index_t d2; d != s; d = d2 ) {
std::swap( temp, v[d] );
std::swap( order_begin[d], d2 = (diff_t) -1 );
-- remaining;
}
v[s] = temp;
}
}
// https://stackoverflow.com/a/29677616/7128154
// https://stackoverflow.com/a/37708864/7128154
template <typename T>
double quantile(double q, std::vector<T> values, std::vector<double> weights = std::vector<double>())
{
assert( 0. <= q && q <= 1. && "expecting quantile in range [0; 1]");
if (weights.empty())
{
weights = std::vector<double>(values.size(), 1.);
}
else
{
assert (values.size() == weights.size() && "values and weights missfit in quantiles");
std::vector<size_t> inds = sorted_index(values);
reorder_destructive(inds.begin(), inds.end(), weights.begin());
}
stable_sort(values.begin(), values.end());
// values and weights are sorted now
std::vector<double> quantiles (weights.size());
quantiles[0] = weights[0];
for (int ii = 1; ii < quantiles.size(); ii++)
{
quantiles[ii] = quantiles[ii-1] + weights[ii];
}
double norm = std::accumulate(weights.begin(), weights.end(), 0.0);
int ind = 0;
double qCurrent = 0;
for (; ind < quantiles.size(); ind++)
{
qCurrent = (quantiles[ind] - weights[ind] / 2. ) / norm;
quantiles[ind] = qCurrent;
if (qCurrent > q)
{
if (ind == 0) {return values[0];}
double rat = (q - quantiles[ind-1]) / (quantiles[ind] - quantiles[ind-1]);
return values[ind-1] + (values[ind] - values[ind-1]) * rat;
}
}
return values[values.size()-1];
}
template <typename T>
double quantile(double q, std::vector<T> values, std::vector<int> weights)
{
std::vector<double> weights_double (weights.begin(), weights.end());
return quantile(q, values, weights_double);
}
int main()
{
std::vector<int> vals {5, 15, 25, 35, 45, 55, 65, 75, 85, 95};
std::cout << "quantile(0, vals)=" << quantile(0, vals) << std::endl;
std::cout << "quantile(.73, vals)=" << quantile(.73, vals) << std::endl;
std::vector<int> vals2 {1, 2, 3};
std::vector<double> ws2 {1, 2, 3};
std::cout << "quantile(.13, vals2, ws2)=" << quantile(.13, vals2, ws2) << std::endl;
}
输出
quantile(0, vals)=5
quantile(.73, vals)=73
quantile(.13, vals2, ws2)=1.18667
关于加权分位数
而在 中unweighted case
,输入值形成等距分布
values: [1, 2, 3] -> positions: [1/6, 3/6, 5/6]
在 中weighted case
,距离被修改。
values: [1, 2, 3], weights: [1, 2, 1] -> positions: [1/8, 4/8, 7/8]
这不等同于未加权情况的重复值
values: [1, 2, 2, 3] -> positions: [1/8, 3/8, 5/8, 7/8],
因为在重复值之间形成了一个平台。这表示:
quantile(q=3/8, values=[1, 2, 2, 3]) = 2
但
quantile(q=3/8, values=[1, 2, 3], weights=[1, 2, 1]) = 1.67