feat(ai-nutritionist): Coze TTS and streaming robustness

- Add Coze TTS endpoint and service; expose binary MP3 from controller.
- Bypass ResponseFilter for /audio/speech so MP3 bodies are not UTF-8 wrapped.
- UniApp: cozeTextToSpeech, TTS UI and play flow; SSE HTTP errors and diagnostics.
- Document TTS in docs/features.md; extend test-0325-1 with curl verification.

Made-with: Cursor
This commit is contained in:
msh-agent
2026-03-31 07:07:21 +08:00
parent 35052d655f
commit 2facd355ab
8 changed files with 433 additions and 351 deletions

View File

@@ -1,5 +1,8 @@
package com.zbkj.service.service.impl.tool;
import com.coze.openapi.client.audio.common.AudioFormat;
import com.coze.openapi.client.audio.speech.CreateSpeechReq;
import com.coze.openapi.client.audio.speech.CreateSpeechResp;
import com.coze.openapi.client.chat.CreateChatReq;
import com.coze.openapi.client.chat.CreateChatResp;
import com.coze.openapi.client.chat.RetrieveChatReq;
@@ -437,6 +440,27 @@ public class ToolCozeServiceImpl implements ToolCozeService {
}
}
private static final String DEFAULT_VOICE_ID = "7468518753626652709";
@Override
public byte[] textToSpeech(String input, String voiceId, String format, Float speed) {
try {
CozeAPI client = getClient();
AudioFormat audioFormat = (format != null) ? AudioFormat.fromString(format) : AudioFormat.MP3;
CreateSpeechReq req = CreateSpeechReq.builder()
.input(input)
.voiceID(voiceId != null ? voiceId : DEFAULT_VOICE_ID)
.responseFormat(audioFormat)
.speed(speed != null ? speed : 1.0f)
.build();
CreateSpeechResp resp = client.audio().speech().create(req);
return resp.getResponse().bytes();
} catch (Exception e) {
logger.error("Coze TTS error", e);
throw new RuntimeException("语音合成失败: " + e.getMessage(), e);
}
}
/**
* 获取访问令牌
*/

View File

@@ -79,4 +79,15 @@ public interface ToolCozeService {
* @return 恢复结果
*/
CozeBaseResponse<Object> workflowResume(CozeWorkflowResumeRequest request);
/**
* 文本转语音 (TTS)
*
* @param input 要合成的文本
* @param voiceId 音色ID为 null 时使用默认中文音色
* @param format 音频格式,如 "mp3",为 null 时默认 mp3
* @param speed 语速1.0 为正常速度,为 null 时使用默认值
* @return 音频二进制数据
*/
byte[] textToSpeech(String input, String voiceId, String format, Float speed);
}