AI 应用流式响应实战：打造流畅的生成式 AI 体验

最新推荐文章于 2025-05-03 22:29:40 发布

Ethan独立开发

最新推荐文章于 2025-05-03 22:29:40 发布

阅读量1.3k

点赞数 31

文章标签：人工智能前端 React

本文链接：https://blog.csdn.net/ChengFengTech/article/details/144280311

版权

"为什么 AI 回答要等这么久？"一个月前，我们刚上线的 AI 写作助手收到了这样的用户反馈。虽然生成的内容质量不错，但用户需要等待 15-20 秒才能看到完整回答，这种体验确实不够理想。作为技术负责人，我决定改造整个响应系统，实现流式输出。😊

今天，我想和大家分享如何在全栈项目中实现流畅的 AI 流式响应，包括前后端的实现细节和优化技巧。

理解流式响应

在开始之前，我们先理解为什么需要流式响应：

// 传统的响应方式
async function generateContent(prompt: string) {
  const response = await openai.createCompletion({
    model: "gpt-3.5-turbo",
    prompt,
    max_tokens: 1000
  });

  return response.choices[0].text;
  // 问题：用户需要等待全部内容生成完才能看到
}

// 流式响应方式
async function* streamContent(prompt: string) {
  const stream = await openai.createCompletion({
    model: "gpt-3.5-turbo",
    prompt,
    max_tokens: 1000,
    stream: true // 启用流式输出
  });

  for await (const chunk of stream) {
    yield chunk.choices[0].text;
    // 优势：用户可以看到实时生成的内容
  }
}

后端实现

1. Node.js 服务器流式响应

使用 Node.js 和 Express 实现流式响应端点：

// server/routes/ai.ts
import { Router } from 'express';
import { OpenAIStream } from './utils/openai';

const router = Router();

router.post('/generate', async (req, res) => {
  try {
    // 设置响应头
    res.setHeader('Content-Type', 'text/event-stream');
    res.setHeader('Cache-Control', 'no-cache');
    res.setHeader('Connection', 'keep-alive');

    const { prompt } = req.body;
    const stream = await OpenAIStream(prompt);

    // 将 OpenAI 的响应转换为 SSE 格式
    for await (const chunk of stream) {
      const formattedChunk = formatChunk(chunk);
      res.write(`data: ${JSON.stringify(formattedChunk)}\n\n`);
    }

    res.write('data: [DONE]\n\n');
    res.end();
  } catch (error) {
    console.error('Stream error:', error);
    res.write('data: [ERROR]\n\n');
    res.end();
  }
});

// 格式化响应块
function formatChunk(chunk: any) {
  return {
    text: chunk.choices[0].text,
    timestamp: Date.now()
  };
}

2. 错误处理和重试机制

实现了健壮的错误处理和重试逻辑：

// utils/openai.ts
import { backOff } from 'exponential-backoff';

export async function OpenAIStream(prompt: string) {
  const getStream = async () => {
    const response = await openai.createCompletion({
      model: "gpt-3.5-turbo",
      prompt,
      stream: true,
      max_tokens: 1000
    });

    if (!response.ok) {
      throw new Error(`OpenAI API error: ${response.statusText}`);
    }

    return response;
  };

  // 使用指数退避重试
  const stream = await backOff(getStream, {
    numOfAttempts: 3,
    startingDelay: 1000,
    timeMultiple: 2,
    retry: (e: any) => {
      // 只重试特定类型的错误
      return e.status === 429 || e.status >= 500;
    }
  });

  return stream;
}

前端实现

1. React 组件实现

创建一个流式响应的 React 组件：

// components/StreamingResponse.tsx
import { useState, useEffect, useRef } from 'react';

export function StreamingResponse({ prompt }: { prompt: string }) {
  const [content, setContent] = useState('');
  const [isStreaming, setIsStreaming] = useState(false);
  const abortController = useRef<AbortController>();

  useEffect(() => {
    if (!prompt) return;

    async function startStreaming() {
      try {
        setIsStreaming(true);
        abortController.current = new AbortController();

        const response = await fetch('/api/generate', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({ prompt }),
          signal: abortController.current.signal
        });

        const reader = response.body?.getReader();
        const decoder = new TextDecoder();

        while (reader) {
          const { done, value } = await reader.read();
          if (done) break;

          const chunk = decoder.decode(value);
          const lines = chunk.split('\n');

          lines.forEach(line => {
            if (line.startsWith('data: ')) {
              const data = JSON.parse(line.slice(6));
              if (data === '[DONE]') return;

              setContent(prev => prev + data.text);
            }
          });
        }
      } catch (error) {
        console.error('Streaming error:', error);
      } finally {
        setIsStreaming(false);
      }
    }

    startStreaming();

    return () => {
      abortController.current?.abort();
    };
  }, [prompt]);

  return (
    <div className="streaming-response">
      <div className="content">
        {content || (isStreaming && <span className="cursor" />)}
      </div>
      {isStreaming && (
        <button onClick={() => abortController.current?.abort()}>
          停止生成
        </button>
      )}
    </div>
  );
}

2. 优化用户体验

添加打字机效果和高亮显示：

// components/TypewriterEffect.tsx
import { useState, useEffect } from 'react';

export function TypewriterEffect({ content }: { content: string }) {
  const [displayContent, setDisplayContent] = useState('');
  const [currentIndex, setCurrentIndex] = useState(0);

  useEffect(() => {
    if (currentIndex >= content.length) return;

    const timer = setTimeout(() => {
      setDisplayContent(prev => prev + content[currentIndex]);
      setCurrentIndex(prev => prev + 1);
    }, 30); // 调整速度

    return () => clearTimeout(timer);
  }, [content, currentIndex]);

  return (
    <div className="typewriter">
      <pre>
        <code className="language-markdown">
          {displayContent}
          {currentIndex < content.length && <span className="cursor">|</span>}
        </code>
      </pre>
    </div>
  );
}

性能优化

1. 内存管理

为了避免内存泄漏，我们实现了清理机制：

// hooks/useStreamingResponse.ts
export function useStreamingResponse() {
  const chunks = useRef<string[]>([]);
  const maxChunks = 1000; // 防止内存溢出

  const addChunk = (chunk: string) => {
    chunks.current.push(chunk);
    if (chunks.current.length > maxChunks) {
      // 当累积太多块时，合并旧的块
      const merged = chunks.current.slice(0, 100).join('');
      chunks.current = [merged, ...chunks.current.slice(100)];
    }
  };

  useEffect(() => {
    return () => {
      chunks.current = []; // 清理内存
    };
  }, []);

  return { addChunk, getContent: () => chunks.current.join('') };
}

2. 网络优化

实现了智能的重连机制：

// utils/streaming.ts
export async function createStreamingConnection(url: string, options: StreamOptions) {
  const maxRetries = 3;
  let retryCount = 0;

  const connect = async () => {
    try {
      const response = await fetch(url, {
        ...options,
        headers: {
          ...options.headers,
          'Keep-Alive': 'timeout=60'
        }
      });

      return response;
    } catch (error) {
      if (retryCount >= maxRetries) throw error;

      retryCount++;
      const delay = Math.pow(2, retryCount) * 1000;
      await new Promise(resolve => setTimeout(resolve, delay));

      return connect();
    }
  };

  return connect();
}