feat(ai-nutritionist): Coze TTS and streaming robustness
- Add Coze TTS endpoint and service; expose binary MP3 from controller. - Bypass ResponseFilter for /audio/speech so MP3 bodies are not UTF-8 wrapped. - UniApp: cozeTextToSpeech, TTS UI and play flow; SSE HTTP errors and diagnostics. - Document TTS in docs/features.md; extend test-0325-1 with curl verification. Made-with: Cursor
This commit is contained in:
@@ -22,6 +22,7 @@ import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Coze API 控制器
|
||||
@@ -132,4 +133,23 @@ public class CozeController {
|
||||
public CozeBaseResponse<Object> workflowResume(@RequestBody CozeWorkflowResumeRequest request) {
|
||||
return toolCozeService.workflowResume(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 文本转语音 (TTS)
|
||||
*/
|
||||
@ApiOperation(value = "文本转语音", notes = "调用 Coze TTS 将文本合成为 MP3 音频并直接返回二进制流")
|
||||
@PostMapping("/audio/speech")
|
||||
public void textToSpeech(@RequestBody Map<String, Object> params, HttpServletResponse response) throws IOException {
|
||||
String input = (String) params.get("input");
|
||||
String voiceId = (String) params.get("voiceId");
|
||||
String format = params.get("format") != null ? (String) params.get("format") : "mp3";
|
||||
Float speed = params.get("speed") != null ? ((Number) params.get("speed")).floatValue() : null;
|
||||
|
||||
byte[] audioData = toolCozeService.textToSpeech(input, voiceId, format, speed);
|
||||
response.setContentType("audio/mpeg");
|
||||
response.setHeader("Content-Disposition", "inline; filename=speech.mp3");
|
||||
response.setContentLength(audioData.length);
|
||||
response.getOutputStream().write(audioData);
|
||||
response.getOutputStream().flush();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,17 @@ public class ResponseFilter implements Filter {
|
||||
@Override
|
||||
public void doFilter(ServletRequest request, ServletResponse response, FilterChain filterChain)
|
||||
throws IOException, ServletException {
|
||||
HttpServletRequest httpRequest = (HttpServletRequest) request;
|
||||
String uri = httpRequest.getRequestURI();
|
||||
String accept = httpRequest.getHeader("Accept");
|
||||
// SSE 流式响应和二进制音频响应不能被缓冲,直接透传
|
||||
boolean isSseStream = uri != null && uri.contains("/stream");
|
||||
boolean acceptsSse = accept != null && accept.contains("text/event-stream");
|
||||
boolean isAudioResponse = uri != null && uri.contains("/audio/speech");
|
||||
if (isSseStream || acceptsSse || isAudioResponse) {
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
}
|
||||
ResponseWrapper wrapperResponse = new ResponseWrapper((HttpServletResponse) response);//转换成代理类
|
||||
// 这里只拦截返回,直接让请求过去,如果在请求前有处理,可以在这里处理
|
||||
filterChain.doFilter(request, wrapperResponse);
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
package com.zbkj.service.service.impl.tool;
|
||||
|
||||
import com.coze.openapi.client.audio.common.AudioFormat;
|
||||
import com.coze.openapi.client.audio.speech.CreateSpeechReq;
|
||||
import com.coze.openapi.client.audio.speech.CreateSpeechResp;
|
||||
import com.coze.openapi.client.chat.CreateChatReq;
|
||||
import com.coze.openapi.client.chat.CreateChatResp;
|
||||
import com.coze.openapi.client.chat.RetrieveChatReq;
|
||||
@@ -437,6 +440,27 @@ public class ToolCozeServiceImpl implements ToolCozeService {
|
||||
}
|
||||
}
|
||||
|
||||
private static final String DEFAULT_VOICE_ID = "7468518753626652709";
|
||||
|
||||
@Override
|
||||
public byte[] textToSpeech(String input, String voiceId, String format, Float speed) {
|
||||
try {
|
||||
CozeAPI client = getClient();
|
||||
AudioFormat audioFormat = (format != null) ? AudioFormat.fromString(format) : AudioFormat.MP3;
|
||||
CreateSpeechReq req = CreateSpeechReq.builder()
|
||||
.input(input)
|
||||
.voiceID(voiceId != null ? voiceId : DEFAULT_VOICE_ID)
|
||||
.responseFormat(audioFormat)
|
||||
.speed(speed != null ? speed : 1.0f)
|
||||
.build();
|
||||
CreateSpeechResp resp = client.audio().speech().create(req);
|
||||
return resp.getResponse().bytes();
|
||||
} catch (Exception e) {
|
||||
logger.error("Coze TTS error", e);
|
||||
throw new RuntimeException("语音合成失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取访问令牌
|
||||
*/
|
||||
|
||||
@@ -79,4 +79,15 @@ public interface ToolCozeService {
|
||||
* @return 恢复结果
|
||||
*/
|
||||
CozeBaseResponse<Object> workflowResume(CozeWorkflowResumeRequest request);
|
||||
|
||||
/**
|
||||
* 文本转语音 (TTS)
|
||||
*
|
||||
* @param input 要合成的文本
|
||||
* @param voiceId 音色ID,为 null 时使用默认中文音色
|
||||
* @param format 音频格式,如 "mp3",为 null 时默认 mp3
|
||||
* @param speed 语速,1.0 为正常速度,为 null 时使用默认值
|
||||
* @return 音频二进制数据
|
||||
*/
|
||||
byte[] textToSpeech(String input, String voiceId, String format, Float speed);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user