android - 语音识别和与音乐的声音比较

Question

我正在尝试使用语音识别制作一个 Android 应用程序，但不幸的是，谷歌不支持我的语言（马其顿语），我正在尝试比较两种录音声音。

我正在使用http://code.google.com/p/musicg/来记录和比较语音，并且我在初始化用于检测语音的设置时堆栈。有人可以告诉我如何重写这个初始化函数语音检测对我来说非常重要..或其他一些想法如何做到这一点。

这是哨声检测的初始化

            // settings for detecting a whistle

            minFrequency = 600.0f;
            maxFrequency = Double.MAX_VALUE;

            minIntensity = 100.0f;
            maxIntensity = 100000.0f;

            minStandardDeviation = 0.1f;
            maxStandardDeviation = 1.0f;

            highPass = 500;
            lowPass = 10000;

            minNumZeroCross = 50;
            maxNumZeroCross = 200;

            numRobust = 10;

score 1 · Accepted Answer

我的理解是，就目前而言，musicg DetectionApi 仅用于分析单个声音块并告诉您它是否包含该类型的声音。例如包含的口哨或拍手 api 示例。即是拍手/是口哨。

使用 musicg，您可能做的最好的事情就是识别声音是否是声音。尽管这可能超出了DetectionApi。

既然你说 google api 不支持 Macedonian，也许你可以试试Pocketsphinx，在这篇 stackoverflow 文章中提到了。

score 0 · Accepted Answer

以下是我如何将临时录制的声音保存为 wav 格式：

public class RecorderActivity {

    private static final int RECORDER_BPP = 16;
    private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
    private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound";
    private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";
    private String AUDIO_RECORDER_FILE = "";
    private static final int RECORDER_SAMPLERATE = 8000;
    private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
    private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;

    private RealDoubleFFT transformer;

    EndPointDetection endpoint;

    int blockSize = 256;

    private AudioRecord recorder = null;
    private int bufferSize = 0;
    private RecorderAsynctask recordingThread = null;
    private boolean isRecording = false;

    float tempFloatBuffer[] = new float[3];
    int tempIndex = 0;
    int totalReadBytes = 0;

    ImageView imageView;
    Bitmap bitmap;
    Canvas canvas;
    Paint paint;

    Context con;

    RecorderActivity(String file, Context con, ImageView image) {
        AUDIO_RECORDER_FILE = file;
        this.con = con;

        this.imageView = image;
        bitmap = Bitmap.createBitmap((int) 256, (int) 100,
                Bitmap.Config.ARGB_8888);
        canvas = new Canvas(bitmap);
        paint = new Paint();
        paint.setStrokeWidth(5);
        paint.setColor(Color.BLUE);
        imageView.setImageBitmap(bitmap);

        transformer = new RealDoubleFFT(256);

        bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE,
                RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING);

    }

    public String getFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV);
    }

    private String getTempFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE);

        if (tempFile.exists())
            tempFile.delete();

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
    }

    public void startRecording() {
        recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
                RECORDER_SAMPLERATE, RECORDER_CHANNELS,
                RECORDER_AUDIO_ENCODING, bufferSize);

        recorder.startRecording();

        isRecording = true;

        recordingThread = new RecorderAsynctask();
        recordingThread.execute(this);

    }

    class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void> {

        public void shareLockedfuntionProgreesUpdate(double[] fttrezult) {

            publishProgress(fttrezult);

        }

        @Override
        protected Void doInBackground(RecorderActivity... params) {
            // TODO Auto-generated method stub

            byte data[] = new byte[bufferSize];
            String filename = getTempFilename();
            FileOutputStream os = null;

            try {
                os = new FileOutputStream(filename);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            int read = 0;

            AudioTrack tempAudioTrack;

            double[] toTransform = new double[blockSize];

            if (null != os) {
                while (isRecording) {
                    // sampleRateTextField.setText(recorder.getSampleRate());

                    int bufferReadResult = recorder.read(data, 0, blockSize);

                    for (int i = 0; i < blockSize && i < bufferReadResult; i++) {
                        toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                        // 16
                                                                        // bit
                    }

                    transformer.ft(toTransform);
                    publishProgress(toTransform);

                    if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                        try {

                            os.write(data);
                            tempIndex++;

                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                }

                try {
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

            return null;
        }

        @Override
        protected void onProgressUpdate(double[]... toTransform) {
            canvas.drawColor(Color.GRAY);
            Paint p = new Paint();
            for (int i = 0; i < toTransform[0].length; i++) {

                int x = i;
                int downy = (int) (100 - (toTransform[0][i] * 10));
                int upy = 100;
                p.setColor(Color.rgb(downy % 256, i % 256, upy % 256));
                canvas.drawLine(x, upy, x, downy, p);

            }
            imageView.invalidate();
        }

    }

    public void writeAudioDataToFile(RecorderAsynctask asyntask) {
        byte data[] = new byte[bufferSize];
        String filename = getTempFilename();
        FileOutputStream os = null;

        try {
            os = new FileOutputStream(filename);
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        int read = 0;

        double[] toTransform = new double[256];

        if (null != os) {
            while (isRecording) {
                // sampleRateTextField.setText(recorder.getSampleRate());

                int bufferReadResult = recorder.read(data, 0, 256);

                for (int i = 0; i < 256 && i < bufferReadResult; i++) {
                    toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                    // 16
                                                                    // bit
                }

                transformer.ft(toTransform);
                asyntask.shareLockedfuntionProgreesUpdate(toTransform);

                if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                    try {

                        os.write(data);
                        tempIndex++;

                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }

            }

            try {
                os.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void stopRecording() {
        if (null != recorder) {
            isRecording = false;

            recorder.stop();
            recorder.release();

            recorder = null;
            recordingThread = null;
        }

        copyWaveFile(getTempFilename(), getFilename());
        deleteTempFile();
    }

    private void deleteTempFile() {
        File file = new File(getTempFilename());

        file.delete();
    }

    private void copyWaveFile(String inFilename, String outFilename) {
        FileInputStream in = null;
        FileOutputStream out = null;
        long totalAudioLen = 0;
        long totalDataLen = totalAudioLen + 36;
        long longSampleRate = RECORDER_SAMPLERATE;
        int channels = 1;
        long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels / 8;

        byte[] data = new byte[bufferSize];

        try {
            in = new FileInputStream(inFilename);
            out = new FileOutputStream(outFilename);
            totalAudioLen = in.getChannel().size();
            totalDataLen = totalAudioLen + 36;

            WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                    longSampleRate, channels, byteRate);

            while (in.read(data) != -1) {
                out.write(data);
            }

            in.close();
            out.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen,
            long totalDataLen, long longSampleRate, int channels, long byteRate)
            throws IOException {

        byte[] header = new byte[44];

        header[0] = 'R'; // RIFF/WAVE header
        header[1] = 'I';
        header[2] = 'F';
        header[3] = 'F';
        header[4] = (byte) (totalDataLen & 0xff);
        header[5] = (byte) ((totalDataLen >> 8) & 0xff);
        header[6] = (byte) ((totalDataLen >> 16) & 0xff);
        header[7] = (byte) ((totalDataLen >> 24) & 0xff);
        header[8] = 'W';
        header[9] = 'A';
        header[10] = 'V';
        header[11] = 'E';
        header[12] = 'f'; // 'fmt ' chunk
        header[13] = 'm';
        header[14] = 't';
        header[15] = ' ';
        header[16] = 16; // 4 bytes: size of 'fmt ' chunk
        header[17] = 0;
        header[18] = 0;
        header[19] = 0;
        header[20] = 1; // format = 1
        header[21] = 0;
        header[22] = (byte) channels;
        header[23] = 0;
        header[24] = (byte) (longSampleRate & 0xff);
        header[25] = (byte) ((longSampleRate >> 8) & 0xff);
        header[26] = (byte) ((longSampleRate >> 16) & 0xff);
        header[27] = (byte) ((longSampleRate >> 24) & 0xff);
        header[28] = (byte) (byteRate & 0xff);
        header[29] = (byte) ((byteRate >> 8) & 0xff);
        header[30] = (byte) ((byteRate >> 16) & 0xff);
        header[31] = (byte) ((byteRate >> 24) & 0xff);
        header[32] = (byte) (2 * 16 / 8); // block align
        header[33] = 0;
        header[34] = RECORDER_BPP; // bits per sample
        header[35] = 0;
        header[36] = 'd';
        header[37] = 'a';
        header[38] = 't';
        header[39] = 'a';
        header[40] = (byte) (totalAudioLen & 0xff);
        header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
        header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
        header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

        out.write(header, 0, 44);
    }

    public void closeThreadIfisnot() {
        recordingThread.cancel(true);
    }
}

score 0 · Accepted Answer

首先，您所要做的就是将录制的声音保存到 wav 中，然后很容易从他们的 API 中使用指纹类https://code.google.com/p/musicg/source/browse/#git%2Fsrc% 2Fcom%2Fmusicg%2F指纹

这是我进行比较的方法，一个临时录制的 WAV 声音与我数据中的所有 wav 声音。

public Cursor FP(String recordedClip, Context context) {

    Baza baza = new Baza(context);

    Cursor allSound = baza.getAllProtocolsForSoundCheck();

    List<Protocol> protocols = new ArrayList<Protocol>();
    int PID =-1;

    Log.d("broj",allSound.getCount()+"");

    for (int i = 0; i < allSound.getCount(); i++) {


        Protocol protocol = new Protocol();
        allSound.moveToNext();
        protocol.setSoundPath(allSound.getString(4));
        protocol.setId(Integer.parseInt(allSound.getString(1)));
        protocols.add(protocol);

        Log.d("brojProtocol",allSound.getString(2)+" ");
        baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1);
    }

    Wave record = new Wave(recordedClip);

    List<Wave> waves = new ArrayList<Wave>();

    if (protocols != null) {
        for (int i = 0; i < protocols.size(); i++) {
            waves.add(new Wave(protocols.get(i).getSoundPath()));
        }
    }

    for (int i = 0; i < waves.size(); i++) {

        Log.d("similarity", record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity()+"");


        baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(),
                record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity());
    }

    Cursor similarCursor = baza.getSimilarProtocols();
    similarCursor.moveToFirst();
    TransferClass protocolForTransfer = new TransferClass();
    protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1)));
    protocolForTransfer.setName(similarCursor.getString(2));

    Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId());
//  return protocolForTransfer;

    return similarCursor;
}

android - 语音识别和与音乐的声音比较

3 回答 3

Related

Reference