Continuous speech recogn. with SFSpeechRecognizer (ios10-beta)

后端 未结 5 1044
情歌与酒
情歌与酒 2020-12-08 01:28

I am trying to perform cont. speech recognition using AVCapture on iOS 10 beta. I have setup captureOutput(...) to continuously get CMSampleB

5条回答
  •  陌清茗
    陌清茗 (楼主)
    2020-12-08 01:54

    This works perfectly in my app. You can ask for queries at saifurrahman3126@gmail.com Apple does not allow users to continuously translate for more than one minute. https://developer.apple.com/documentation/speech/sfspeechrecognizer check here

    "Plan for a one-minute limit on audio duration. Speech recognition places a relatively high burden on battery life and network usage. To minimize this burden, the framework stops speech recognition tasks that last longer than one minute. This limit is similar to the one for keyboard-related dictation." This is what Apple says in its documentation.

    For now, I have made requests for 40 seconds and then I reconnect it again if you speak before 40 secs and then pause , the recording will start again.

    @objc  func startRecording() {
        
        self.fullsTring = ""
        audioEngine.reset()
        
        if recognitionTask != nil {
            recognitionTask?.cancel()
            recognitionTask = nil
        }
        
        let audioSession = AVAudioSession.sharedInstance()
        do {
            try audioSession.setCategory(.record)
            try audioSession.setMode(.measurement)
            try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
            try audioSession.setPreferredSampleRate(44100.0)
            
            if audioSession.isInputGainSettable {
                let error : NSErrorPointer = nil
                
                let success = try? audioSession.setInputGain(1.0)
                
                guard success != nil else {
                    print ("audio error")
                    return
                }
                if (success != nil) {
                    print("\(String(describing: error))")
                }
            }
            else {
                print("Cannot set input gain")
            }
        } catch {
            print("audioSession properties weren't set because of an error.")
        }
        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
        
        let inputNode = audioEngine.inputNode
        guard let recognitionRequest = recognitionRequest else {
            fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
        }
        
        recognitionRequest.shouldReportPartialResults = true
        self.timer4 = Timer.scheduledTimer(timeInterval: TimeInterval(40), target: self, selector: #selector(againStartRec), userInfo: nil, repeats: false)
        
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error ) in
            
            var isFinal = false  //8
            
            if result != nil {
                self.timer.invalidate()
                self.timer = Timer.scheduledTimer(timeInterval: TimeInterval(2.0), target: self, selector: #selector(self.didFinishTalk), userInfo: nil, repeats: false)
                
                let bestString = result?.bestTranscription.formattedString
                self.fullsTring = bestString!
                
                self.inputContainerView.inputTextField.text = result?.bestTranscription.formattedString
                
                isFinal = result!.isFinal
                
            }
            if error == nil{
                
            }
            if  isFinal {
                
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)
                
                self.recognitionRequest = nil
                self.recognitionTask = nil
                isFinal = false
                
            }
            if error != nil{
                URLCache.shared.removeAllCachedResponses()
                
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)
                
                guard let task = self.recognitionTask else {
                    return
                }
                task.cancel()
                task.finish()
            }
        })
        audioEngine.reset()
        inputNode.removeTap(onBus: 0)
        
        let recordingFormat = AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
            self.recognitionRequest?.append(buffer)
        }
        
        audioEngine.prepare()
        
        do {
            try audioEngine.start()
        } catch {
            print("audioEngine couldn't start because of an error.")
        }
        
        self.hasrecorded = true
    }
    
    @objc func againStartRec(){
        
        self.inputContainerView.uploadImageView.setBackgroundImage( #imageLiteral(resourceName: "microphone") , for: .normal)
        self.inputContainerView.uploadImageView.alpha = 1.0
        self.timer4.invalidate()
        timer.invalidate()
        self.timer.invalidate()
        
        if ((self.audioEngine.isRunning)){
            
            self.audioEngine.stop()
            self.recognitionRequest?.endAudio()
            self.recognitionTask?.finish()
        }
        self.timer2 = Timer.scheduledTimer(timeInterval: 2, target: self, selector: #selector(startRecording), userInfo: nil, repeats: false)
    }
    
    @objc func didFinishTalk(){
        
        if self.fullsTring != ""{
            
            self.timer4.invalidate()
            self.timer.invalidate()
            self.timer2.invalidate()
            
            if ((self.audioEngine.isRunning)){
                self.audioEngine.stop()
                guard let task = self.recognitionTask else {
                    return
                }
                task.cancel()
                task.finish()
            }
        }
    }
    

提交回复
热议问题