关于语音合成和识别

匿名 (未验证) 提交于 2019-12-03 00:37:01

最近研究了下语音合成和语音识别。分别看了一些文章,也下载jdk写了些代码测试了下。

发现,对于语音合成。中文来说,百度语音和科大讯飞,基本都差不多。

英文的话,百度合成出来的效果不佳。科大讯飞稍好点。但是总体都没有国外语音合成好。比如 iSpeech、FreeTTS,可能国外的主语都是英语的缘故吧。

百度日调用额度比较多,据说有2万额度。讯飞每天就500,有点少。iSpeech 是要收费的。FreeTTS 可以离线使用。

百度识别和合成代码:

public class SoundAPI { 	private static final Logger logger = LoggerFactory.getLogger(SoundAPI.class); 	final static String FILE_PATH = Config.getString("download.folder"); 	// 设置APPID/AK/SK 	private static final String APP_ID = "你的APP ID"; 	private static final String API_KEY = "你的key"; 	private static final String SECRET_KEY = "你的秘钥"; 	// 初始化一个AipSpeech 	private static AipSpeech client = null; 	private static long iniTime = 0L; 	/** 30 天 24 小时 **/ 	private static final long MONTH_TIME = 30 * 24 * 60 * 60 * 1000; 	private static final Base64 base64 = new Base64();  	private static void iniAPI() 	{ 		boolean needToReset = false; 		// 判断是否一个月了,如果一个月后,需要重新初始话 		long currentTime = System.currentTimeMillis(); 		if (currentTime - iniTime > MONTH_TIME) 		{ 			needToReset = true; 		} 		if (client == null || needToReset) 		{ 			client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY); 			/** 2秒超时时间 **/ 			client.setConnectionTimeoutInMillis(2000);  			iniTime = System.currentTimeMillis(); 		} 	}  	public static String getSoundMp3(String text, String fileName, QuestionTypeEnum questionType) 	{ 		String rtnfileName = ""; 		String type = "zh"; 		if (StringUtils.isEmpty(text)) 			return "";  		try 		{ 			iniAPI();  			if (QuestionTypeEnum.ENGLISH_WORD.getType().equals(questionType.getType())) 			{ 				type = "en"; 			}  			TtsResponse res = client.synthesis(text, type, 1, null); 			byte[] data = res.getData(); 			if (data != null) 			{  				// String uuid = UUID.randomUUID().toString().replace("-", 				// "").toLowerCase(); 				String uuid = base64.encodeToString(fileName.getBytes()); 				rtnfileName = type + "/" + uuid.replaceAll("=", "") + ".mp3"; 				String path = FILE_PATH + rtnfileName; 				File file = new File(path); 				if (!file.exists()) 				{ 					Util.writeBytesToFileSystem(data, path); 				}  			} else 			{ 				JSONObject jsonObj = res.getResult(); 				logger.info("invoke baidu synthesis API error:", jsonObj); 			} 		} catch (Exception e) 		{ 			rtnfileName = ""; 			logger.error("invoke baidu synthesis API error:", e); 		}  		return rtnfileName; 	}  	public static String recognizeSound(String filePath, QuestionTypeEnum questionType) 	{ 		String result = ""; 		JSONObject asrRes = null;  		if (StringUtils.isEmpty(filePath)) 			return "";  		try 		{ 			iniAPI(); 			if (QuestionTypeEnum.ENGLISH_WORD.getType().equals(questionType.getType())) 			{ 				HashMap<String, Object> options = new HashMap<>(); 				options.put("dev_pid", 1737); 				asrRes = client.asr(filePath, "pcm", 16000, options); 			} else 			{ 				asrRes = client.asr(filePath, "pcm", 16000, null); 			}  			result = getResult(asrRes);  		} catch (Exception e) 		{ 			logger.error("invoke baidu asr API error:", e); 		}  		return result; 	}  	private static String getResult(JSONObject asrRes) 	{ 		String result = ""; 		if (asrRes.getInt("err_no") == 0) 		{ 			JSONArray arrayResult = asrRes.getJSONArray("result"); 			StringBuilder sbResult = new StringBuilder(); 			for (int i = 0; i < arrayResult.length(); i++) 			{ 				if (i == 0) 				{ 					sbResult.append(arrayResult.get(i).toString()); 				} else 				{ 					if (!StringUtils.isEmpty(arrayResult.get(i).toString())) 						sbResult.append(";" + arrayResult.get(i).toString()); 				} 			}  			result = sbResult.toString().replaceAll(",", ""); 		} else 		{ 			logger.error("invoke baidu asr API error:", asrRes); 		} 		return result; 	} 

  科大讯飞的语音识别及合成

public class IatAPI { 	private static final Logger logger = LoggerFactory.getLogger(IatAPI.class); 	/** 	 * 科大讯飞语音识别写入参考 	 * https://github.com/IflytekAIUI/DemoCode/blob/master/webapi/java/Iat.java 	 */ 	final static String APPID = "你的APPID"; 	final static String APPKEY_IAT = "你的秘钥"; 	final static String URL_IAT = "http://api.xfyun.cn/v1/service/v1/iat"; 	final static String IP = "服务器IP地址";  	/** 	 *  	 * 发送语音,获取文字 	 *  	 * @param audioByteArray 	 * @return 	 * @throws Exception 	 */ 	public static String process(String filePath) throws Exception 	{ 		Map<String, String> header = getHeader("raw", "sms16k"); 		// 读取音频文件,转二进制数组,然后Base64编码 		byte[] audioByteArray = FileUtil.read2ByteArray(filePath); 		String audioBase64 = new String(Base64.encodeBase64(audioByteArray), "UTF-8"); 		String bodyParam = "audio=" + audioBase64; 		// logger.info(bodyParam); 		String result = HttpUtil.doPost(URL_IAT, header, bodyParam);  		return result; 	}  	/** 	 * 组装http请求头 	 *  	 * @param aue 	 * @param resultLevel 	 * @param language 	 * @param category 	 * @return 	 * @throws UnsupportedEncodingException 	 */ 	private static Map<String, String> getHeader(String aue, String engineType) throws UnsupportedEncodingException 	{ 		// 系统当前时间戳 		String X_CurTime = System.currentTimeMillis() / 1000L + ""; 		// 业务参数 		String param = "{\"aue\":\"" + aue + "\"" + ",\"engine_type\":\"" + engineType + "\"}"; 		String X_Param = new String(Base64.encodeBase64(param.getBytes("UTF-8"))); 		// 接口密钥 		String apiKey = APPKEY_IAT; 		// 讯飞开放平台应用ID 		String X_Appid = APPID; 		// 生成令牌 		String X_CheckSum = DigestUtils.md5Hex(apiKey + X_CurTime + X_Param);  		// 组装请求头 		Map<String, String> header = new HashMap<String, String>(); 		header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"); 		header.put("X-Param", X_Param); 		header.put("X-CurTime", X_CurTime); 		header.put("X-CheckSum", X_CheckSum); 		header.put("X-Appid", X_Appid); 		header.put("X-Real-Ip", IP); 		return header;  	} 

  

public class TtsAPI { 	private static final Logger logger = LoggerFactory.getLogger(TtsAPI.class); 	/** 	 * 科大讯飞语音识别写入参考 	 * https://github.com/IflytekAIUI/DemoCode/blob/master/webapi/java/Iat.java 	 */ 	final static String APPID = "你的APP id"; 	final static String APPKEY_TTS = "你的秘钥"; 	final static String URL_TTS = "http://api.xfyun.cn/v1/service/v1/tts"; 	final static String IP = "服务器地址"; 	final static String FILE_PATH = Config.getString("download.folder");  	/** 	 *  	 * 发送文字,获取语音 	 *  	 * @param text 	 * @throws Exception 	 */ 	public static String process(String text) throws Exception 	{ 		String result = null; 		Long startTime = System.currentTimeMillis(); 		try 		{ 			Map<String, String> header = getHeader("audio/L16;rate=16000", "lame", "xiaoyan", "50", "50", "", "text", 					"50"); 			Map<String, Object> resultMap = HttpUtil.doMultiPost(URL_TTS, header, "text=" + text); 			// 合成成功 			if ("audio/mpeg".equals(resultMap.get("Content-Type"))) 			{ 				FileUtil.save(FILE_PATH, resultMap.get("sid") + ".mp3", (byte[]) resultMap.get("body")); 				result = resultMap.get("sid") + ".mp3"; 			} else 			{ // 合成失败 				logger.error(resultMap.get("body").toString()); 			} 		} catch (Exception e) 		{ 			logger.error("there is error:", e); 		}  		Long endTime = System.currentTimeMillis(); 		logger.info("finish get voice:" + (endTime - startTime));  		return result; 	}  	/** 	 * 组装http请求头 	 *  	 * @param aue 	 * @param resultLevel 	 * @param language 	 * @param category 	 * @return 	 * @throws UnsupportedEncodingException 	 */ 	private static Map<String, String> getHeader(String auf, String aue, String voiceName, String speed, String volume, 			String engineType, String textType, String pitch) throws UnsupportedEncodingException 	{ 		String curTime = System.currentTimeMillis() / 1000L + ""; 		StringBuilder param = new StringBuilder("{\"auf\":\"" + auf + "\""); 		if (!StringUtil.isNullOrEmpty(aue)) 		{ 			param.append(",\"aue\":\"" + aue + "\""); 		} 		if (!StringUtil.isNullOrEmpty(voiceName)) 		{ 			param.append(",\"voice_name\":\"" + voiceName + "\""); 		} 		if (!StringUtil.isNullOrEmpty(speed)) 		{ 			param.append(",\"speed\":\"" + speed + "\""); 		} 		if (!StringUtil.isNullOrEmpty(volume)) 		{ 			param.append(",\"volume\":\"" + volume + "\""); 		} 		if (!StringUtil.isNullOrEmpty(pitch)) 		{ 			param.append(",\"pitch\":\"" + pitch + "\""); 		} 		if (!StringUtil.isNullOrEmpty(engineType)) 		{ 			param.append(",\"engine_type\":\"" + engineType + "\""); 		} 		if (!StringUtil.isNullOrEmpty(textType)) 		{ 			param.append(",\"text_type\":\"" + textType + "\""); 		} 		param.append("}");  		String paramBase64 = new String(Base64.encodeBase64(param.toString().getBytes("UTF-8"))); 		String checkSum = DigestUtils.md5Hex(APPKEY_TTS + curTime + paramBase64); 		Map<String, String> header = new HashMap<String, String>(); 		header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"); 		header.put("X-Param", paramBase64); 		header.put("X-CurTime", curTime); 		header.put("X-CheckSum", checkSum); 		header.put("X-Real-Ip", IP); 		header.put("X-Appid", APPID); 		// logger.info(JSON.toJSONString(header)); 		return header; 	} 

  

原文:https://www.cnblogs.com/liguoyi/p/9231607.html

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!