fix(ai-nutritionist): 语音录入跳过 OSS 上传,改用一句话识别 base64 直传(test-0415 反馈3-1)

- 旧链路把 mp3 上传到 /api/front/upload/imageOuter,被图片扩展名校验拒绝
- 改为本地读 base64 直接走 /api/front/tencent/asr/sentence-recognition(sourceType=1)
- 适用 ≤60s 短音频场景,命中 ai-nutritionist 限时录音上限 60s 的设计

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
msh-agent
2026-05-03 02:27:27 +08:00
parent 6187a92029
commit 82735a52b9
2 changed files with 239 additions and 268 deletions

View File

@@ -322,20 +322,189 @@ function queryAsrStatus(taskId) {
return request(`/api/front/tencent/asr/query-status/${taskId}`)
}
/**
* 一句话识别≤60s 短音频base64 直传,无需先走 OSS 上传,规避图片接口对 mp3 的扩展名校验)
* @param {string} base64Data 不含 data:URI 头的纯 base64
* @param {number} dataLen 解码后字节长度
* @param {string} format 音频格式,如 'mp3'
*/
function sentenceRecognition(base64Data, dataLen, format = 'mp3') {
return request('/api/front/tencent/asr/sentence-recognition', {
method: 'POST',
data: {
engineModelType: '16k_zh',
sourceType: 1,
data: base64Data,
dataLen: dataLen,
voiceFormat: format,
filterDirty: false,
filterModal: false,
convertNumMode: false
}
})
}
// ==================== KieAI Gemini ChatBUG-005AI 营养师文本/多模态对话) ====================
/** 将 message.content 规范为展示用字符串(多模态 parts / Gemini 嵌套结构) */
function normalizeGeminiContentToString(content) {
if (content == null) return ''
if (typeof content === 'number' || typeof content === 'boolean') return String(content)
if (typeof content === 'string') return content
if (Array.isArray(content)) {
return content
.map((part) => (part && part.text) ? part.text : '')
.filter(Boolean)
.join('')
}
if (typeof content === 'object') {
if (Array.isArray(content.parts)) {
return content.parts
.map((part) => (part && part.text) ? part.text : '')
.filter(Boolean)
.join('')
}
if (typeof content.text === 'string') return content.text
if (typeof content.content === 'string') return content.content
}
return ''
}
/**
* 将 CommonResult.data 规范为可直接读 choices 的 OpenAI 形态(避免偶发多包一层 data)。
* 页面统一从返回值的 data.choices[0].message.content 取正文
* BUG-005从「已是 OpenAI chat completion 形态」的对象上读取 data.choices[0].message.content规整为字符串)。
* 用于 CommonResult.data 及浅层嵌套的 data/result/output/body
*/
function readKieaiGeminiDataChoicesAssistantText(data) {
if (!data || typeof data !== 'object') return ''
if (!Array.isArray(data.choices) || !data.choices[0]) return ''
const msg = data.choices[0].message
if (!msg || typeof msg !== 'object') return ''
const t = normalizeGeminiContentToString(msg.content)
return t && t.trim() ? t.trim() : ''
}
/**
* 从单个 completion 节点读取首条助手正文OpenAI choices 或 Gemini candidates
*/
function getFirstChoiceOrCandidateText(node) {
if (!node || typeof node !== 'object') return ''
if (Array.isArray(node.choices) && node.choices[0]) {
const msg = node.choices[0].message
if (msg) {
const t = normalizeGeminiContentToString(msg.content)
if (t && t.trim()) return t.trim()
}
const delta = node.choices[0].delta
if (delta) {
const t = normalizeGeminiContentToString(delta.content)
if (t && t.trim()) return t.trim()
}
}
if (Array.isArray(node.candidates) && node.candidates[0]) {
const t = normalizeGeminiContentToString(node.candidates[0].content)
if (t && t.trim()) return t.trim()
}
return ''
}
/**
* 将 CommonResult.data或上游返回体规范为含 choices/candidates 的对象,便于读 data.choices[0].message.content。
* 收集 data / result / output / body 链上所有带 choices 的节点,优先选用「首条正文非空」且更深的节点,
* 避免外层占位 choices 导致解析为空、误走其它逻辑。
*/
function unwrapGeminiCompletionData(payload) {
if (payload == null || typeof payload !== 'object') return payload
if (Array.isArray(payload.choices) && payload.choices.length > 0) return payload
const nested = payload.data
if (nested && typeof nested === 'object' && Array.isArray(nested.choices) && nested.choices.length > 0) {
return nested
const seen = new Set()
const hits = []
function visit(node, depth) {
if (depth > 12 || node == null || typeof node !== 'object') return
if (seen.has(node)) return
seen.add(node)
const hasCh = Array.isArray(node.choices) && node.choices.length > 0
const hasCa = Array.isArray(node.candidates) && node.candidates.length > 0
if (hasCh || hasCa) {
const text = getFirstChoiceOrCandidateText(node)
hits.push({ node, depth, textLen: text.length })
}
for (const k of ['data', 'result', 'output', 'body']) {
const child = node[k]
if (child && typeof child === 'object') visit(child, depth + 1)
}
}
return payload
visit(payload, 0)
if (hits.length === 0) return payload
hits.sort((a, b) => {
if (a.textLen > 0 && b.textLen === 0) return -1
if (a.textLen === 0 && b.textLen > 0) return 1
if (a.depth !== b.depth) return b.depth - a.depth
return b.textLen - a.textLen
})
return hits[0].node
}
/** CommonResult.data 顶层已是 OpenAI choices 且首条助手正文非空时,不再 deep-unwrap避免误选更深层的空 choices 节点BUG-005。 */
function hasNonEmptyFirstChoiceMessageContent(obj) {
if (!obj || typeof obj !== 'object') return false
if (!Array.isArray(obj.choices) || !obj.choices[0]) return false
const msg = obj.choices[0].message
if (!msg) return false
const t = normalizeGeminiContentToString(msg.content)
return !!(t && t.trim())
}
/**
* 从 kieaiGeminiChat() 的返回值读取模型正文。
* BUG-005严格以 CommonResult.data 上的 OpenAI choices 为准,即 data.choices[0].message.content
* 若网关/上游将 completion 再包一层data.result.output先浅层下钻再 deep-unwrap
* 不生成本地固定话术,也不把业务失败当成功。
*/
function getKieaiGeminiChatMessageContent(apiResult) {
if (!apiResult || typeof apiResult !== 'object') return ''
let payload
if (Object.prototype.hasOwnProperty.call(apiResult, 'data')) {
payload = apiResult.data
if (payload == null) return ''
} else {
payload = apiResult
}
if (typeof payload === 'string') {
try {
payload = JSON.parse(payload)
} catch (e) {
return ''
}
}
if (typeof payload !== 'object' || payload == null) return ''
/** 浅层payload 或其一阶子对象上是否已有非空 choices[0].message.content */
function tryShallowChoices(root) {
if (!root || typeof root !== 'object') return ''
let t = readKieaiGeminiDataChoicesAssistantText(root)
if (t) return t
for (const key of ['data', 'result', 'output', 'body']) {
const nested = root[key]
if (nested != null && typeof nested === 'object') {
t = readKieaiGeminiDataChoicesAssistantText(nested)
if (t) return t
const inner = nested.data
if (inner != null && typeof inner === 'object') {
t = readKieaiGeminiDataChoicesAssistantText(inner)
if (t) return t
}
}
}
return ''
}
let out = tryShallowChoices(payload)
if (out) return out
const node = unwrapGeminiCompletionData(payload)
out = readKieaiGeminiDataChoicesAssistantText(node)
if (out) return out
const fb = getFirstChoiceOrCandidateText(node)
return fb && fb.trim() ? fb.trim() : ''
}
/**
@@ -376,7 +545,22 @@ function kieaiGeminiChat(data) {
return res
}
}
outData = unwrapGeminiCompletionData(outData)
if (outData != null && typeof outData === 'object' && !hasNonEmptyFirstChoiceMessageContent(outData)) {
let promoted = null
for (const key of ['data', 'result', 'output', 'body']) {
const nested = outData[key]
if (nested && typeof nested === 'object' && hasNonEmptyFirstChoiceMessageContent(nested)) {
promoted = nested
break
}
const d2 = nested && typeof nested === 'object' ? nested.data : null
if (d2 && typeof d2 === 'object' && hasNonEmptyFirstChoiceMessageContent(d2)) {
promoted = d2
break
}
}
outData = promoted != null ? promoted : unwrapGeminiCompletionData(outData)
}
return { ...res, data: outData }
})
}
@@ -981,8 +1165,12 @@ export default {
uploadFile,
createAsrTask,
queryAsrStatus,
sentenceRecognition,
kieaiGeminiChat,
kieaiGeminiChatStream,
getKieaiGeminiChatMessageContent,
normalizeGeminiContentToString,
readKieaiGeminiDataChoicesAssistantText,
// Coze API
cozeChat,
cozeChatStream,