0

这是我尝试过的。取决于用户在我想将 String 或 Double 添加到新块的函数中的内容。

package org.apache.spark.h2o.utils

import water.fvec.{NewChunk, Frame, Chunk}
import water._
import water.parser.ValueString

class ReplaceNa[T >: Any](a: T) extends MRTask{
  override def map(c: Chunk, nc: NewChunk): Unit = {
    for (row <- 0 until c.len()) {

        a match{
             case s: ValueString if(c.isNA(row)) => nc.addStr(s)           
             case d: Double      if(c.isNA(row)) => nc.addNum(d)

      }
    }
  }
}

但我得到了错误

 error: value outputFrame is not a member of Nothing
          pred.add(new ReplaceNa(3).doAll(1, pred.vec(4)).outputFrame(Array("s"), null))

谢谢你的帮助!

4

1 回答 1

2

我有几点意见:

  • 检查交换机分支外的 NA
  • 您缺少非 NA 案例,因此您正在生成比输入向量短的向量(我希望您希望生成相同长度的向量)

关于泛型,您需要提供类型特化。例如,类似于以下代码段:

class ReplaceNA[T](val value: T)(implicit add: TAdd[T]) extends MRTask[ReplaceNA[T]] {
  override def map(c: Chunk, nc: NewChunk): Unit = {
    for (row <- 0 until c.len()) {
      // Replace NAs by given value
      if (c.isNA(row)) {
        add.addValue(nc, value)
      } else {
        // Do something with default value
        nc.addNA()
      }
    }
  }

}

trait TAdd[T] extends Serializable {
  def addValue(nc: NewChunk, value: T)
}

object TAdd extends Serializable {
  implicit val addDouble = new TAdd[Double] { def addValue(nc: NewChunk, value: Double) = nc.addNum(value) }
  implicit val addFloat = new TAdd[Float] { def addValue(nc: NewChunk, value: Float) = nc.addNum(value) }
  implicit val addValueString = new TAdd[ValueString] { def addValue(nc: NewChunk, value: ValueString) = nc.addStr(value) }
}
于 2015-10-30T22:33:20.283 回答