一、实现的效果
以下是实现 实时 AI 字幕平滑显示(类似豆包效果)并 避免闪屏 的完整 Java 代码方案,结合高频更新优化和动画控制:
二、实现思路
由于公司的保密性,这里借用豆包官方一张图说明,实现流程如下,首先是接入实时语音或者实时视频通话,仅需推送音频流即可,然后大模型平台接收到音频流后,对因音频流进行ASR、推理(LLM)、最后语音合成(TTS)反馈给用户。可以基于豆包开发平台实现,有资源 的公司也可以实现。
1.接入流程(这里用豆包进行演示)
Android
推荐使用 Maven 在项目中接入 RTC SDK。
-
在 Android Studio 左上角将工程视图切换为 Project 模式,在项目根目录的
settings.gradle
文件配置 Maven 仓库地址。
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
...
maven { url 'https://artifact.bytedance.com/repository/Volcengine/' }
}
}
说明
如果你的 Android Gradle Plugin 版本低于 v7.1.0,则应在项目根目录的 build.gradle
文件配置 Maven 仓库地址。
allprojects {
repositories {
...
maven { url 'https://artifact.bytedance.com/repository/Volcengine/' }
}
}
2.在 App 的 build.gradle
文件中添加 RTC SDK 依赖。
说明
- 你需要将
'3.x.y.z'
替换为具体的版本号,最新版本号请参看下载 SDK。如果希望引入先前版本,请咨询技术支持。 - 集成 3.37 ~ 3.42 版本 SDK 时,需要引入 Kotlin 依赖,Kotlin 版本号请咨询技术支持。3.44+ 版本不需要引入 Kotlin 依赖。
- 3.50 版本起,RTC SDK 开始拆分为主库加功能插件的形式,你可以按需集成以适应不同的业务场景和 App 体积缩减需求。功能插件详情及集成方式参看减小 App 体积。
- 如果开发目标平台为 Android 12 (targetSdkVersion >= 31) 及以上版本的应用,请先参考 OpenCL 库 Android 12 适配说明。
dependencies {
...
implementation 'com.volcengine:VolcEngineRTC:3.x.y.z' // 填写需要接入的 RTC SDK 版本号
}
3.在项目根目录的 gradle.properties
文件中添加 android.enableJetifier=true
,解决兼容性问题。
2.进入直播房间
这里只有实现思路,具体代码查看豆包demo
public class AiActivity extends BaseActivity<AiDoctorActivityLayoutBinding, AiVm>{
private static final String APP_ID = ""; // 填写 appId
private static final String roomId = ""; // 填写房间号
private static final String userId = ""; //填写 userId
private static final String streamId = ""; //填写 streamId
private static final String token = ""; // 填写临时 token
private RTCVideo rtcVideo;
private RTCRoom rtcRoom;
@Override
protected int getLayoutResId() {
return R.layout.ai_doctor_activity_layout;
}
@Override
protected void initViewModel() {
mViewModel = new ViewModelProvider(this, new ViewModelProvider.NewInstanceFactory()).get(AiDoctorVm.class);
}
@Override
protected void bindViewModel() {
}
@Override
protected void init() {
requestPermission();
// 创建引擎
rtcVideo = RTCVideo.createRTCVideo(this, APP_ID, videoEventHandler, null, null);
new Thread(new Runnable() {
@Override
public void run() {
// 设置本端渲染视图
// setLocalRenderView();
// 开启音视频采集
// rtcVideo.startVideoCapture();
rtcVideo.startAudioCapture();
// 推流
rtcVideo.startPlayPublicStream(streamId);
joinRoom(roomId);
}
}).start();
//初始化布局
initTitleLayout();
// 初始化适配器
initAiAdapter();
eventListener();
}
/**
* 事件监听
*/
private void eventListener() {
uuid = UUID.randomUUID().toString();
mDataBinding.aiStop.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
finish();
}
});
}
/**
* 初始化
* 适配器
*/
private AiDoctorListAdapter adapter;
private List<SubtitleMsgData> aiMsgConn = new ArrayList<>();
private void initAiAdapter() {
String usId = "userId9132122113";
adapter = new AiDoctorListAdapter(usId, mDataBinding.rclv);
LinearLayoutManager lineachat = new MyLinearLayoutManager(this, LinearLayoutManager.VERTICAL, false);
mDataBinding.rclv.setLayoutManager(lineachat);
mDataBinding.rclv.setHasFixedSize(true);
mDataBinding.rclv.setItemViewCacheSize(20);
mDataBinding.rclv.setDrawingCacheEnabled(true);
mDataBinding.rclv.setDrawingCacheQuality(View.DRAWING_CACHE_QUALITY_HIGH);
mDataBinding.rclv.setAdapter(adapter);
adapter.onItemDatasChangeds(aiMsgConn);
}
public void requestPermission() {
String[] PERMISSIONS_STORAGE = {Manifest.permission.RECORD_AUDIO, Manifest.permission.CAMERA};
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
if (ContextCompat.checkSelfPermission(this, "android.permission.CAMERA") != PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this, "android.permission.RECORD_AUDIO") != PackageManager.PERMISSION_GRANTED) {
requestPermissions(PERMISSIONS_STORAGE, 22);
}
}
}
/**
* 引擎回调信息
*/
IRTCVideoEventHandler videoEventHandler = new IRTCVideoEventHandler() {
// 登录
@Override
public void onLoginResult(String uid, int errorCode, int elapsed) {
super.onLoginResult(uid, errorCode, elapsed);
Logger.d("====登录==22==" + uid);
}
};
IRTCRoomEventHandler rtcRoomEventHandler = new IRTCRoomEventHandler() {
// 二进制BinaryMessageReceived
@Override
public void onRoomBinaryMessageReceived(String uid, ByteBuffer message) {
super.onRoomBinaryMessageReceived(uid, message);
ByteBuffer msg = message.duplicate();
// 字幕 number为subv,则是字幕内容
onRoomBinaryMessageReceivedTextContent(uid, msg);
// 智能体说话状态 number为conv, 则是智能体状态消息
onRoomBinaryMessageReceivedDataDeal(message);
}
// 用户消息接收
@Override
public void onUserMessageReceived(String uid, String message) {
super.onUserMessageReceived(uid, message);
Logger.d("===onUserMessageReceived==3333==" + message);
}
//远端可见用户加入房间
@Override
public void onUserJoined(UserInfo userInfo, int elapsed) {
super.onUserJoined(userInfo, elapsed);
Logger.d("===用户加入房间===" + new Gson().toJson(userInfo));
}
//远端可见用户离开房间
@Override
public void onUserLeave(String uid, int reason) {
super.onUserLeave(uid, reason);
Logger.d("===用户离开房间===" + new Gson().toJson(uid));
}
// 开启本地音频采集
//远端用户麦克风采集音视频流
@Override
public void onUserPublishStream(String uid, MediaStreamType type) {
super.onUserPublishStream(uid, type);
// 开启智能体
Logger.d("===开启本地音频采集==1111==" + uid);
}
};
/**
* 智能体通话状态
* 消息
*/
public void onRoomBinaryMessageReceivedDataDeal(ByteBuffer buffer) {
byte[] prefixBytes = new byte[4];
buffer.get(prefixBytes);
String prefix = new String(prefixBytes, StandardCharsets.UTF_8);
if (prefix.equals("conv")) { // number为conv, 则是智能体状态消息
int length = buffer.getInt();
byte[] jsonBytes = new byte[length];
buffer.get(jsonBytes);
String jsonContent = new String(jsonBytes, StandardCharsets.UTF_8);
ConversionStatusMessage statusMessage = new Gson().fromJson(jsonContent, ConversionStatusMessage.class);
// Logger.d("===BinaryMessageReceived==0000==" + jsonContent);
} else {
}
}
/**
* 字幕解析
* 回调
*/
public void onRoomBinaryMessageReceivedTextContent(String uid, ByteBuffer buffer) {
StringBuilder subtitles = new StringBuilder();
boolean ret = unpack(buffer, subtitles);
if (ret) {
parseData(subtitles.toString());
}
}
/**
* 拆包校验
*/
public static boolean unpack(ByteBuffer message, StringBuilder subtitles) {
final int kSubtitleHeaderSize = 8;
if (message.remaining() < kSubtitleHeaderSize) {
return false;
}
// 魔法数字 "subv"
int magicNumber = (message.get() << 24) | (message.get() << 16) | (message.get() << 8) | (message.get());
if (magicNumber != 0x73756276) {
return false;
}
int length = message.getInt();
if (message.remaining() != length) {
return false;
}
// 读取字幕内容
byte[] subtitleBytes = new byte[length];
message.get(subtitleBytes);
subtitles.append(new String(subtitleBytes, StandardCharsets.UTF_8));
return true;
}
/**
* 解析
* 字幕消息
*/
public void parseData(String msg) {
try {
// 解析 JSON 字符串
ObjectMapper objectMapper = new ObjectMapper();
JsonNode jsonData = objectMapper.readTree(msg);
// 存储解析后的数据
List<SubtitleMsgData> subtitles = new ArrayList<>();
// 遍历 JSON 数据并填充结构体
for (JsonNode item : jsonData.get("data")) {
SubtitleMsgData subData = new SubtitleMsgData();
subData.definite = item.get("definite").asBoolean();
subData.language = item.get("language").asText();
subData.paragraph = item.get("paragraph").asBoolean();
subData.sequence = item.get("sequence").asInt();
subData.text = item.get("text").asText();
subData.userId = item.get("userId").asText();
subtitles.add(subData);
}
// 消息
Message msgList = Message.obtain();
msgList.what = 6;
// bundle对象
Bundle bd = new Bundle();
bd.putSerializable("subtitles", (ArrayList<? extends Serializable>) subtitles);
// bundle装进消息里面
msgList.setData(bd);
// 发送
} catch (Exception e) {
// Logger.d("====字幕消息333===" + e.toString());
e.printStackTrace();
}
}
/**
* 加入房间
*
* @param roomId
*/
private void joinRoom(String roomId) {
rtcRoom = rtcVideo.createRTCRoom(roomId);
rtcRoom.setRTCRoomEventHandler(rtcRoomEventHandler);
// 用户信息
UserInfo userInfo = new UserInfo(userId, "");
// 设置房间配置
boolean isAutoPublish = true;
boolean isAutoSubscribeAudio = true;
boolean isAutoSubscribeVideo = true;
RTCRoomConfig roomConfig = new RTCRoomConfig(ChannelProfile.CHANNEL_PROFILE_CHAT_ROOM, isAutoPublish, isAutoSubscribeAudio, isAutoSubscribeVideo);
// 加入房间
rtcRoom.joinRoom(token, userInfo, roomConfig);
// 开启智能体
// mViewModel.startAIVoiceChat(userId, userId);
}
/**
* 离开房间
*/
private void leaveRoom() {
// mViewModel.stopAIVoiceChat(userId, userId);
if (rtcRoom != null) {
// 结束智能体Ai
rtcRoom.leaveRoom();
rtcRoom.destroy();
rtcRoom = null;
}
}
@Override
protected void onDestroy() {
super.onDestroy();
leaveRoom();
if (rtcVideo != null) {
rtcVideo.stopVideoCapture();
rtcVideo.stopAudioCapture();
}
RTCVideo.destroyRTCVideo();
}
/**
* 字幕
* 消息处理
* 1:字幕消息
*/
@Override
public void handlerMessage(Message msg) {
switch (msg.what) {
case 1:// 字幕消息
break;
}
}
}
接收到的字幕消息
三、字幕消息处理
1. 数据模型与 DiffUtil 实现
字幕数据类 CaptionItem.java
public class CaptionItem {
private final String id; // 唯一标识(避免数据重复刷新)
private final String text;
private final long timestamp;
public CaptionItem(String text) {
this.id = UUID.randomUUID().toString();
this.text = text;
this.timestamp = System.currentTimeMillis();
}
// Getters
public String getId() { return id; }
public String getText() { return text; }
public long getTimestamp() { return timestamp; }
}
差异计算工具 CaptionDiffCallback.java
public class CaptionDiffCallback extends DiffUtil.Callback {
private final List<CaptionItem> oldList;
private final List<CaptionItem> newList;
public CaptionDiffCallback(List<CaptionItem> oldList, List<CaptionItem> newList) {
this.oldList = oldList;
this.newList = newList;
}
@Override
public int getOldListSize() { return oldList.size(); }
@Override
public int getNewListSize() { return newList.size(); }
@Override
public boolean areItemsTheSame(int oldPos, int newPos) {
return oldList.get(oldPos).getId().equals(newList.get(newPos).getId());
}
@Override
public boolean areContentsTheSame(int oldPos, int newPos) {
return oldList.get(oldPos).getText().equals(newList.get(newPos).getText());
}
}
2. 高性能适配器 CaptionAdapter.java
public class CaptionAdapter extends RecyclerView.Adapter<CaptionAdapter.ViewHolder> {
private List<CaptionItem> items = new ArrayList<>();
private final Handler uiHandler = new Handler(Looper.getMainLooper());
private final Queue<CaptionItem> bufferQueue = new LinkedList<>(); // 缓冲队列
private static final int BATCH_SIZE = 3; // 批量更新阈值
// 添加新字幕(线程安全)
public void addCaption(CaptionItem item) {
synchronized (bufferQueue) {
bufferQueue.add(item);
if (bufferQueue.size() >= BATCH_SIZE) {
flushBuffer();
}
}
}
// 批量刷新到UI
private void flushBuffer() {
List<CaptionItem> batch = new ArrayList<>();
synchronized (bufferQueue) {
while (!bufferQueue.isEmpty() && batch.size() < BATCH_SIZE) {
batch.add(bufferQueue.poll());
}
}
uiHandler.post(() -> updateData(batch));
}
// 使用 DiffUtil 更新数据
private void updateData(List<CaptionItem> newItems) {
List<CaptionItem> newList = new ArrayList<>(items);
newList.addAll(newItems);
DiffUtil.DiffResult diffResult = DiffUtil.calculateDiff(new CaptionDiffCallback(items, newList));
items = newList;
diffResult.dispatchUpdatesTo(this);
autoScrollToBottom();
}
static class ViewHolder extends RecyclerView.ViewHolder {
TextView textView;
public ViewHolder(View itemView) {
super(itemView);
textView = itemView.findViewById(R.id.tv_caption);
}
}
@NonNull
@Override
public ViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) {
View view = LayoutInflater.from(parent.getContext())
.inflate(R.layout.item_caption, parent, false);
return new ViewHolder(view);
}
@Override
public void onBindViewHolder(@NonNull ViewHolder holder, int position) {
CaptionItem item = items.get(position);
holder.textView.setText(item.getText());
// 添加进入动画(可选)
Animation animation = AnimationUtils.loadAnimation(holder.itemView.getContext(), R.anim.slide_in_bottom);
holder.itemView.startAnimation(animation);
}
@Override
public int getItemCount() { return items.size(); }
// 自动滚动到底部
private void autoScrollToBottom() {
if (getItemCount() > 0) {
RecyclerView recyclerView = (RecyclerView) textView.getParent().getParent();
recyclerView.smoothScrollToPosition(getItemCount() - 1);
}
}
}
3. 动画资源文件
字幕进入动画 res/anim/slide_in_bottom.xml
<?xml version="1.0" encoding="utf-8"?>
<set xmlns:android="http://schemas.android.com/apk/res/android"
android:duration="300">
<translate
android:fromYDelta="100%"
android:toYDelta="0%"
android:interpolator="@android:anim/decelerate_interpolator"/>
<alpha
android:fromAlpha="0.0"
android:toAlpha="1.0"/>
</set>
4. Activity 主逻辑 MainActivity.java
public class MainActivity extends AppCompatActivity {
private CaptionAdapter adapter;
private final ExecutorService backgroundExecutor = Executors.newSingleThreadExecutor();
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// 初始化 RecyclerView
RecyclerView recyclerView = findViewById(R.id.recycler_view);
recyclerView.setLayoutManager(new LinearLayoutManager(this));
recyclerView.setItemAnimator(new DefaultItemAnimator()); // 启用默认动画
adapter = new CaptionAdapter();
recyclerView.setAdapter(adapter);
// 模拟实时字幕输入(替换为实际AI回调)
simulateAIStream();
}
// 模拟实时AI流式输入
private void simulateAIStream() {
backgroundExecutor.execute(() -> {
for (int i = 0; i < 20; i++) {
try {
Thread.sleep(500); // 模拟间隔500ms
} catch (InterruptedException e) {
e.printStackTrace();
}
final String text = "实时字幕内容 " + (i + 1);
adapter.addCaption(new CaptionItem(text));
}
});
}
@Override
protected void onDestroy() {
super.onDestroy();
backgroundExecutor.shutdownNow(); // 防止内存泄漏
}
}
5. 布局文件
字幕条目 item_caption.xml
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="vertical"
android:padding="8dp">
<TextView
android:id="@+id/tv_caption"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:background="@drawable/bg_caption"
android:padding="12dp"
android:textColor="#333"
android:textSize="14sp"/>
</LinearLayout>
圆角背景 drawable/bg_caption.xml
<?xml version="1.0" encoding="utf-8"?>
<shape xmlns:android="http://schemas.android.com/apk/res/android">
<solid android:color="#E0E0E0"/>
<corners android:radius="16dp"/>
</shape>
6. 关键技术点
-
批量更新:通过缓冲队列合并高频更新,减少刷新次数
-
差异刷新:使用
DiffUtil
精确计算变化项,避免全局重绘 -
动画优化:
-
自定义进入动画(滑动 + 渐显)
-
设置
DefaultItemAnimator
确保默认动画流畅
-
-
线程安全:
-
使用
synchronized
保证队列操作线程安全 -
通过
Handler
确保UI操作在主线程
-
-
内存管理:
-
在
onDestroy
中关闭后台线程 -
避免在
onBindViewHolder
中持有外部引用
-