1

我从几个不以静态速率产生值并且需要重新采样和规范化以进行进一步处理的来源输入原始饲料。使用平均值将值重新采样到 500 毫秒以聚合多个值。然后应用前向填充以使用最后一个值填充缺失值,并应用反向填充以填充数据开头的可能缺失值。

#raw feed
time          value     source
09:30:00.230     2         B
09:30:00.417     3         B
09:30:00.417     1         A
09:30:00.653     3         A
09:30:01.450     2         B
09:30:01.887     5         A
09:30:02.653     5         B
09:30:02.763     3         B
09:30:02.967     5         B
09:30:03.107     6         A
09:30:03.670     6         B

#resampled to 500ms intervals using average
time             A        B
09:30:00.000     NULL     2
09:30:00.500     2        3
09:30:01.000     NULL     NULL
09:30:01.500     NULL     2
09:30:02.000     5        NULL
09:30:02.500     NULL     5
09:30:03.000     6        4
09:30:03.500     NULL     6

#ffill+bfill
time             A     B
09:30:00.000     2     2
09:30:00.500     2     3
09:30:01.000     2     3
09:30:01.500     2     2
09:30:02.000     5     2
09:30:02.500     5     5
09:30:03.000     6     4
09:30:03.500     6     6

我使用了以下代码,但我怀疑它是使用 Deedle 的有效方法,并且由于完全外连接,结果数据框包含重复值,所以现在我需要这样的方式来聚合它们或将它们拆分为系列并再次重新采样?请告知是否有更好的方法来满足要求。

 private void Resample(IList<(DateTime time, double value, string source)> rawSource)
        {

            var sourceASeries = rawSource.Where(x => x.source.ToLowerInvariant() == "A").Select(x => KeyValue.Create(x.time, x.value)).ToSeries();
            var sourceBSeries = rawSource.Where(x => x.source.ToLowerInvariant() == "B").Select(x => KeyValue.Create(x.time, x.value)).ToSeries();

            var sourceAResampled = sourceASeries.ResampleUniform(dt => dt.RoundMs(500), dt => dt.RoundMs(500).AddMilliseconds(500),
                Lookup.ExactOrSmaller);
            var sourceBResampled = sourceBSeries.ResampleUniform(dt => dt.RoundMs(500), dt => dt.RoundMs(500).AddMilliseconds(500),
                Lookup.ExactOrSmaller);

            var df = Frame.FromColumns(new[] { sourceAResampled, sourceBResampled });
            df = df.FillMissing(Direction.Forward).FillMissing(Direction.Backward);
        }

在使用 Pandas 的 Python 中,使用以下代码对我来说效果很好:

import Bs as pd
A_vals = vals.where(vals['Source']==' A', inplace=False).rename(columns={"Value":" A"}).drop(['Source'], axis=1)
B_vals = vals.where(vals['Source']=='B', inplace=False).rename(columns={"Value":"B"}).drop(['Source'], axis=1)
A_vals= A_vals.resample('100ms').mean().ffill().bfill()
B_vals=B_vals.resample('100ms').mean().ffill().bfill()
result=pd.concat([ A_vals,B_vals], axis=1)
4

1 回答 1

0

我设法使用以下代码获得了正确的结果,但我确信它可以优化性能:

 private IList<(int rownum, DateTime time, double A, double B)> ResampleAndNormalize(IList<(DateTime time, double value, string source)> rawTicks, int interval = 100)
        {
            var uniqueTicks = rawTicks.GroupBy(x => (time: x.time, source: x.source), x => x,
                (k, ticks) => (time: k.time, value: ticks.Average(x => x.value), source: k.source)).ToList();

            var ASeries = uniqueTicks.Where(x => x.source.ToLowerInvariant() == "A").Select(x => KeyValue.Create(x.time, x.value)).ToSeries();
            var BSeries = uniqueTicks.Where(x => x.source.ToLowerInvariant() == "B").Select(x => KeyValue.Create(x.time, x.value)).ToSeries();

            var startTime = ASeries.FirstKey().MinTime(BSeries.FirstKey()).RoundMs(interval);
            var endTime = ASeries.LastKey().MaxTime(BSeries.LastKey()).RoundMs(interval);
            var newKeys = Enumerable.Range(0, (int)Math.Ceiling(endTime.Subtract(startTime).TotalMilliseconds / interval))
                .Select(x => startTime.AddMilliseconds(x * interval)).ToList();

            var AResampled = ASeries.ResampleEquivalence(x => x.RoundMs(interval), x => x.Mean());
            var BResampled = BSeries.ResampleEquivalence(x => x.RoundMs(interval), x => x.Mean());

            AResampled = AResampled.Realign(newKeys).FillMissing(Direction.Forward).FillMissing(Direction.Backward);
            BResampled = BResampled.Realign(newKeys).FillMissing(Direction.Forward).FillMissing(Direction.Backward);

            var results = new List<(int rownum, DateTime time, double A, double B)>();
            for (int i = 0; i < newKeys.Count; i++)
            {
                var time = newKeys[i];
                var Avalue = AResampled.GetAt(i);
                var Bvalue = BResampled.GetAt(i);
                results.Add((rownum: 0, time: time, A: Avalue, B: Bvalue));
            }

            return results;
        }

 public static class DateTimeExtensions
    {
        public static DateTime RoundMs(this DateTime time, int precision)
        {
            var ticksPrecision = precision * TimeSpan.TicksPerMillisecond;
            var ticksRemainder = time.Ticks % ticksPrecision;
            if (ticksRemainder >= ticksPrecision / 2)
                ticksRemainder = ticksPrecision - ticksRemainder;
            else
                ticksRemainder = -ticksRemainder;
            return time.AddTicks(ticksRemainder);
        }

        public static DateTime MinTime(this DateTime a, DateTime b)
        {
            return a >= b ? b : a;
        }
        public static DateTime MaxTime(this DateTime a, DateTime b)
        {
            return a < b ? b : a;
        }
    }
于 2019-09-18T13:22:42.303 回答