Recognizing multiple keywords using PocketSphinx

谁说胖子不能爱 提交于 2019-11-26 11:22:55

you can use addKeywordSearch which uses to file with keyphrases. One phrase per line with threshold for each phrase in //, for example

up /1.0/
down /1.0/
left /1.0/
right /1.0/
forwards /1e-1/

Threshold must be selected to avoid false alarms.

Thanks to Nikolay's tip (see his answer above), I have developed the following code which works fine, and does not recognize words unless they're on the list. You can copy and paste this directly over the main class in the PocketSphinxDemo code:

public class PocketSphinxActivity extends Activity implements RecognitionListener
{
private static final String DIGITS_SEARCH = "digits";
private SpeechRecognizer recognizer;

@Override
public void onCreate(Bundle state)
{
    super.onCreate(state);

    setContentView(R.layout.main);

    ((TextView) findViewById(R.id.caption_text)).setText("Preparing the recognizer");

    try
    {
        Assets assets = new Assets(PocketSphinxActivity.this);
        File assetDir = assets.syncAssets();
        setupRecognizer(assetDir);
    }
    catch (IOException e)
    {
        // oops
    }

    ((TextView) findViewById(R.id.caption_text)).setText("Say up, down, left, right, forwards, backwards");

    reset();
}

@Override
public void onPartialResult(Hypothesis hypothesis)
{
}

@Override
public void onResult(Hypothesis hypothesis)
{
    ((TextView) findViewById(R.id.result_text)).setText("");

    if (hypothesis != null)
    {
        String text = hypothesis.getHypstr();
        makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
    }
}

@Override
public void onBeginningOfSpeech()
{
}

@Override
public void onEndOfSpeech()
{
    reset();
}

private void setupRecognizer(File assetsDir)
{
    File modelsDir = new File(assetsDir, "models");

    recognizer = defaultSetup().setAcousticModel(new File(modelsDir, "hmm/en-us-semi"))
                               .setDictionary(new File(modelsDir, "dict/cmu07a.dic"))
                               .setRawLogDir(assetsDir).setKeywordThreshold(1e-20f)
                               .getRecognizer();

    recognizer.addListener(this);

    File digitsGrammar = new File(modelsDir, "grammar/digits.gram");
    recognizer.addKeywordSearch(DIGITS_SEARCH, digitsGrammar);
}

private void reset()
{
    recognizer.stop();
    recognizer.startListening(DIGITS_SEARCH);
}
}

Your digits.gram file should be something like:

up /1e-1/
down /1e-1/
left /1e-1/
right /1e-1/
forwards /1e-1/
backwards /1e-1/

You should experiment with the thresholds within the double slashes // for performance, where 1e-1 represents 0.1 (I think). I think the maximum is 1.0.

And it's 5.30pm so I can stop working now. Result.

Working on updating Antinous amendment to the PocketSphinx demo to allow it to run on Android Studio. This is what I have so far,

//Note: change MainActivity to PocketSphinxActivity for demo use...
public class MainActivity extends Activity implements RecognitionListener {
private static final String DIGITS_SEARCH = "digits";
private SpeechRecognizer recognizer;

/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;

@Override
public void onCreate(Bundle state) {
    super.onCreate(state);

    setContentView(R.layout.main);
    ((TextView) findViewById(R.id.caption_text))
            .setText("Preparing the recognizer");

    // Check if user has given permission to record audio
    int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
    if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
        ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
        return;
    }

    new AsyncTask<Void, Void, Exception>() {
        @Override
        protected Exception doInBackground(Void... params) {
            try {
                Assets assets = new Assets(MainActivity.this);
                File assetDir = assets.syncAssets();
                setupRecognizer(assetDir);
            } catch (IOException e) {
                return e;
            }
            return null;
        }
        @Override
        protected void onPostExecute(Exception result) {
            if (result != null) {
                ((TextView) findViewById(R.id.caption_text))
                        .setText("Failed to init recognizer " + result);
            } else {
                reset();
            }
        }
    }.execute();
    ((TextView) findViewById(R.id.caption_text)).setText("Say one, two, three, four, five, six...");
}

/**
 * In partial result we get quick updates about current hypothesis. In
 * keyword spotting mode we can react here, in other modes we need to wait
 * for final result in onResult.
 */

@Override
public void onPartialResult(Hypothesis hypothesis) {
    if (hypothesis == null) {
        return;
    } else if (hypothesis != null) {
        if (recognizer != null) {
            //recognizer.rapidSphinxPartialResult(hypothesis.getHypstr());
            String text = hypothesis.getHypstr();
            if (text.equals(DIGITS_SEARCH)) {
                recognizer.cancel();
                performAction();
                recognizer.startListening(DIGITS_SEARCH);
            }else{
                //Toast.makeText(getApplicationContext(),"Partial result = " +text,Toast.LENGTH_SHORT).show();
            }
        }
    }
}
@Override
public void onResult(Hypothesis hypothesis) {
    ((TextView) findViewById(R.id.result_text)).setText("");
    if (hypothesis != null) {
        String text = hypothesis.getHypstr();
        makeText(getApplicationContext(), "Hypothesis" +text, Toast.LENGTH_SHORT).show();
    }else if(hypothesis == null){
        makeText(getApplicationContext(), "hypothesis = null", Toast.LENGTH_SHORT).show();
    }
}
@Override
public void onDestroy() {
    super.onDestroy();
    recognizer.cancel();
    recognizer.shutdown();
}
@Override
public void onBeginningOfSpeech() {
}
@Override
public void onEndOfSpeech() {
   reset();
}
@Override
public void onTimeout() {
}
private void setupRecognizer(File assetsDir) throws IOException {
    // The recognizer can be configured to perform multiple searches
    // of different kind and switch between them
    recognizer = defaultSetup()
            .setAcousticModel(new File(assetsDir, "en-us-ptm"))
            .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
            // .setRawLogDir(assetsDir).setKeywordThreshold(1e-20f)
            .getRecognizer();
    recognizer.addListener(this);

    File digitsGrammar = new File(assetsDir, "digits.gram");
    recognizer.addKeywordSearch(DIGITS_SEARCH, digitsGrammar);
}
private void reset(){
    recognizer.stop();
    recognizer.startListening(DIGITS_SEARCH);
}
@Override
public void onError(Exception error) {
    ((TextView) findViewById(R.id.caption_text)).setText(error.getMessage());
}

public void performAction() {
    // do here whatever you want
    makeText(getApplicationContext(), "performAction done... ", Toast.LENGTH_SHORT).show();
}
}

Caveat emptor: this is a work in progress. Check back later. Suggestions would be appreciated.

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!