1

我需要对一些包含电影评论的 csv 文件进行情感分析。我正在使用 SentiWordNet 进行情感分析。我面临的主要问题是 main() 函数。在这里我可以找到单个句子的极性。(String sentence="我爱你但讨厌当前的政治气候。”)但我想使用一个完整的 csv 文件并找到每个评论的极性。sentiwordnet 的路径是“C:\Users\INTEL\Desktop\Sentiment Final\FINALSENTIMENT.txt”

package swn3;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Scanner;
import java.util.Set;
import java.util.Vector;

public class SWN3 {

   private String pathToSWN = "C:\\Users\\INTEL\\Desktop\\Sentiment FInal\\FINALSENTIMENT.txt";
        private HashMap<String, Double> _dict;


        public SWN3(){

            _dict = new HashMap<String, Double>();
            HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
            try{
                BufferedReader csv =  new BufferedReader(new FileReader(pathToSWN));
                String line = "";           
                while((line = csv.readLine()) != null)
                {
                    String[] data = line.split("\t");
                    Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);// Calculate synset score as score = PosS - NegS
                    String[] words = data[4].split(" ");// Get all Synset terms
                    for(String w:words)//Go through all terms of current synset.
                    { // Get synterm and synterm rank
                        String[] w_n = w.split("#");
                        w_n[0] += "#"+data[0];
                        int index = Integer.parseInt(w_n[1])-1;
                        if(_temp.containsKey(w_n[0]))
                        {
                            Vector<Double> v = _temp.get(w_n[0]);
                            if(index>v.size())
                                for(int i = v.size();i<index; i++)
                                    v.add(0.0);
                            v.add(index, score);
                            _temp.put(w_n[0], v);
                        }
                        else
                        {
                            Vector<Double> v = new Vector<Double>();
                            for(int i = 0;i<index; i++)
                                v.add(0.0);
                            v.add(index, score);
                            _temp.put(w_n[0], v);
                        }
                    }
                }
                Set<String> temp = _temp.keySet();
                for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
                    String word = (String) iterator.next();
                    Vector<Double> v = _temp.get(word);
                    // Calculate weighted average. Weigh the synsets according to their rank.
                    // Score= 1/2*first + 1/3*second + 1/4*third ..... etc.
                    // Sum = 1/1 + 1/2 + 1/3 ...
                    double score = 0.0;
                    double sum = 0.0;
                    for(int i = 0; i < v.size(); i++)
                        score += ((double)1/(double)(i+1))*v.get(i);
                    for(int i = 1; i<=v.size(); i++)
                        sum += (double)1/(double)i;
                    score /= sum;
                    String sent = "";               
                    if(score>=0.75)
                        sent = "strong_positive";
                    else
                    if(score > 0.25 && score<=0.5)
                        sent = "positive";
                    else
                    if(score > 0 && score<=0.25)
                        sent = "weak_positive";
                    else
                    if(score < 0 && score>=-0.25)
                        sent = "weak_negative";
                    else
                    if(score < -0.25 && score>=-0.5)
                        sent = "negative";
                    else
                    if(score<=-0.75)
                        sent = "strong_negative";
                    _dict.put(word, score);
                }
            }
            catch(Exception e){e.printStackTrace();}        
        }

public Double extract(String word)
{
    Double total = new Double(0);
    if(_dict.get(word+"#n") != null)
         total = _dict.get(word+"#n") + total;
    if(_dict.get(word+"#a") != null)
        total = _dict.get(word+"#a") + total;
    if(_dict.get(word+"#r") != null)
        total = _dict.get(word+"#r") + total;
    if(_dict.get(word+"#v") != null)
        total = _dict.get(word+"#v") + total;
    return total;
}
public static void main(String[] args) throws Exception {
    SWN3 test = new SWN3();
    String sentence="I love you but hate the current political climate.";
    String[] words = sentence.split("\\s+"); //splits the sentence and put it into an array.
    double totalScore = 0;
    int review=0;
    for(String word : words) {
        word = word.replaceAll("([^a-zA-Z\\s])", "");//^ means not and a-zA-Z means all upper and lower case characters.overall means all but not letters
        if (test.extract(word) == null)
            continue;
        totalScore += test.extract(word);

    }
                    if(totalScore>=0.75)
                        review= 5;
                    else
                    if(totalScore> 0.25 && totalScore<=0.5)
                        review = 4;
                    else
                    if(totalScore > 0 && totalScore<=0.25)
                        review = 3;
                    else
                    if(totalScore< 0 && totalScore>=-0.25)
                        review= 2;
                    else
                    if(totalScore< -0.25 && totalScore>=-0.5)
                        review= 1;
                    else
                    if(totalScore<=-0.75)
                        review= 0;
   System.out.println(review);
    System.out.println(totalScore);


}

}
4

0 回答 0