dataSet <- read.csv("E:/TempDataSetWithAttributes.csv", header = TRUE)
## **nItemId** **sUnSpsc**
## 1 7440421 26121609 Network cable
## 2 7440442 26121609 Network cable
## 3 7440522 26121609 Network cable
## 4 7440623 26121609 Network cable
## 5 7460893 26121609 Network cable
## 6 7462277 26121609 Network cable
## **ProductDesctiption**
## 1 Copper cable, category 6A F/FTP, low smoke zero halogen (LSZH), 4-pair, conductors are 23 AWG with PE insulation, twisted in pairs, wrapped in foil, surrounded by an overall metallic foil shield and protected by a low smoke, flame retardant LSZH jacket, w
## 2 Category 6A, Low Smoke Zero Halogen (LSZH), 4-pair, F/FTP copper cable. Copper conductors are 23 AWG with PE insulation. Conductors are twisted in pairs, wrapped in foil, surrounded by an overall metallic foil shield and protected by a low smoke, flame re
## 3 Category 6A, Low Smoke Zero Halogen (LSZH), 4-pair, F/FTP copper cable. Copper conductors are 23 AWG with PE insulation. Conductors are twisted in pairs, wrapped in foil, surrounded by an overall metallic foil shield and protected by a low smoke, flame re
## 4 Category 6A, Low Smoke Zero Halogen (LSZH), 4-pair, F/FTP copper cable. Copper conductors are 23 AWG with PE insulation. Conductors are twisted in pairs, wrapped in foil, surrounded by an overall metallic foil shield and protected by a low smoke, flame re
## 5 Category 6A 4-pair, 23 AWG U/UTP copper cable, LSZH (IEC60332-1), blue.|Length: 1000 FT|Construction: LSZH PVC|Color: Blue|Number Of Pins: 4|Brand Name: Panduit|Outside Diameter: 0.285 IN|Type: NetKey Cable|Sub Brand: NetKey
## 6 Shielded marine MUD-resistant copper cable, category 7 S/FTP, low smoke zero halogen (LSZH), 4-pair, conductors are 22 AWG construction with foamed PE insulation, twisted in pairs, each surrounded by a foil, covered with an overall braided shield, within
documents <- Corpus(VectorSource(dataSet$ProductDesctiption))
documents <- tm_map(documents, content_transformer(tolower),lazy=TRUE)
documents <- tm_map(documents, removePunctuation,lazy=TRUE)
documents <- tm_map(documents, stripWhitespace,lazy=TRUE)
documents <- tm_map(documents, removeNumbers,lazy=TRUE)
documents <- tm_map(documents, stripWhitespace,lazy=TRUE)
documents <- tm_map(documents, removeWords, stopwords("english"),lazy=TRUE)
documents <- tm_map(documents, stripWhitespace,lazy=TRUE)
documents <- tm_map(documents, stemDocument, language = "english",lazy=TRUE)
documents <- tm_map(documents, stripWhitespace,lazy=TRUE)
documentTermMatrix <- DocumentTermMatrix(documents)
**Create Data Matrix**
documentTermMatrixFrame <- data.matrix(documentTermMatrix)
## aaa abov abs absbrand absmounting accept
## 0 0 0 0 0 0
**Create Training set**
training <- sample(nrow(documentTermMatrixFrame), 750)
Scaling of Document term Matrix
Scaledtraining <- scale(documentTermMatrixFrame[training,])
ScaledNonNAtraining <- Scaledtraining[, colSums(is.na(Scaledtraining)) != nrow(Scaledtraining)]
UnSupervised learning using Self Organizing maps
som.wines <- som(ScaledNonNAtraining, grid = somgrid(5, 5, "rectangular"))
Scaling of Test Set
Xtest <- scale(documentTermMatrixFrame[-training,])
ScaledXtest <- Xtest[, colSums(is.na(Xtest)) != nrow(Xtest)]
x<- dataSet$sUnSpsc
y<- dataSet$nItemId
**Prediction using SOM trained model**
som.prediction <- predict(som.wines, newdata = ScaledXtest,trainY=as.factor(x[training]))
## [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [24] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [47] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [70] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 25
## [93] ## Heading ##5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [116] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [139] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [162] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [185] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [208] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [231] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
**Accuracy calculation**
confusion.mat <- (table("Predictions" = som.prediction$unit.classif, Actual = x[-training]))
resultinmatrix<- as.data.frame.array(confusion.mat)
accuracy <- sum(diag(confusion.mat))/nrow(Xtest) * 100