使用HuggingfaceAPI的音频转文本Demo

前置准备

  1、科学上网

  2、音频文件。(可到在线翻译之类的网站生成音频⬇)

  3、使用的openai/whisper-large-v3模型地址:https://huggingface.co/openai/whisper-large-v3

(自行登录申请TOKEN)

音频转文本Demo

1、activity_main布局
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical"
    tools:context=".MainActivity">

    <TextView
        android:id="@+id/tvResult"
        android:layout_width="match_parent"
        android:layout_height="200dp"
        android:layout_marginTop="20dp"
        android:hint="请先选择文件"
        android:scrollbars="vertical"/>

    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:orientation="horizontal">

        <Button
            android:id="@+id/btnSelectFile"
            android:layout_width="0dp"
            android:layout_height="wrap_content"
            android:layout_weight="1"
            android:text="选择文件" />

        <Button
            android:id="@+id/btnConvert"
            android:layout_width="0dp"
            android:layout_height="wrap_content"
            android:layout_weight="1"
            android:text="转换为文本" />

    </LinearLayout>

</LinearLayout>
2、使用在app的build.gradle添加相关插件
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.example.vicetotextdemo"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.example.vicetotextdemo"
        minSdk = 24
        targetSdk = 34
        versionCode = 1
        versionName = "1.0"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
}

dependencies {

    implementation("androidx.core:core-ktx:1.9.0")
    implementation("androidx.appcompat:appcompat:1.6.1")
    implementation("com.google.android.material:material:1.11.0")
    implementation("androidx.constraintlayout:constraintlayout:2.1.4")
    testImplementation("junit:junit:4.13.2")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
    implementation ("com.squareup.retrofit2:retrofit:2.9.0")
    implementation ("com.squareup.retrofit2:converter-gson:2.9.0")
    implementation ("com.squareup.okhttp3:okhttp:4.12.0")
    implementation ("com.google.code.gson:gson:2.10.1")
}
3、创建ApiService接口文件
interface MyApi {
    // 把YOUR_TOKEN替换成自己申请到的TOKEN
    @Headers(
        "Authorization: Bearer YOUR_TOKEN",
        "Content-Type: application/octet-stream"
    )
    @POST("models/openai/whisper-large-v3")
    fun convertText(@Body requestBody: RequestBody): Call<ResponseBody>
}

class TextRequestBody(data: ByteArray) {
    private val mediaTypeBinary = "application/octet-stream".toMediaTypeOrNull()
    val requestBody: RequestBody = RequestBody.create(mediaTypeBinary, data)
}
4、创建ApiResponse数据类
data class ApiResponse(val text: String)
5、修改MainActivity代码
package com.example.vicetotextdemo

import android.annotation.SuppressLint
import android.app.Activity
import android.content.Intent
import android.content.pm.PackageManager
import android.net.Uri
import android.os.Bundle
import android.provider.OpenableColumns
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import com.google.gson.Gson
import okhttp3.ResponseBody
import retrofit2.Call
import retrofit2.Callback
import retrofit2.Response
import retrofit2.Retrofit
import retrofit2.converter.gson.GsonConverterFactory
import java.io.IOException


class MainActivity : AppCompatActivity() {

    companion object {
        private const val FILE_SELECT_CODE = 1
        private const val READ_EXTERNAL_STORAGE_PERMISSION_REQUEST = 22
    }
    private lateinit var btnSelectFile: Button
    private lateinit var btnConvert: Button
    private lateinit var tvResult: TextView
    private var selectedFileUri: Uri? = null

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)
        btnSelectFile = findViewById(R.id.btnSelectFile)
        btnConvert = findViewById(R.id.btnConvert)
        tvResult = findViewById(R.id.tvResult)
        tvResult.movementMethod = ScrollingMovementMethod()

        // 选择文件
        btnSelectFile.setOnClickListener {
            requestReadExternalStoragePermission()
        }

        // 转换为文本
        btnConvert.setOnClickListener {
            selectedFileUri?.let { uri ->
                val retrofit = Retrofit.Builder()
                    .baseUrl("https://api-inference.huggingface.co/")
                    .addConverterFactory(GsonConverterFactory.create())
                    .build()
                val service = retrofit.create(MyApi::class.java)
                val inputStream = contentResolver.openInputStream(uri)
                val data = inputStream?.readBytes()
                inputStream?.close()

                data?.let {
                    val requestBody = TextRequestBody(it)
                    val call = service.convertText(requestBody.requestBody)
                    call.enqueue(object : Callback<ResponseBody> {
                        override fun onResponse(call: Call<ResponseBody>, response: Response<ResponseBody>) {
                            if (response.isSuccessful) {
                                try {
                                    val jsonResponse = response.body()?.string()
                                    Log.d("TAG", "onResponse: $jsonResponse")
                                    // 解析JSON响应
                                    try {
                                        val gson = Gson()
                                        val apiResponse = gson.fromJson(jsonResponse, ApiResponse::class.java)
                                        if (apiResponse != null) {
                                            val text = apiResponse.text
                                            tvResult.text = text
                                        }
                                    } catch (e: Throwable) {
                                        e.printStackTrace()
                                    }
                                } catch (e: IOException) {
                                    e.printStackTrace()
                                }
                            } else {
                                // 处理错误
                            }
                        }
                        override fun onFailure(call: Call<ResponseBody>, t: Throwable) {
                            // 处理请求失败
                            t.printStackTrace()
                        }
                    })
                }
            }
        }
    }

    // 请求读取外部存储权限
    private fun requestReadExternalStoragePermission() {
        if (ContextCompat.checkSelfPermission(this, android.Manifest.permission.READ_EXTERNAL_STORAGE)
            != PackageManager.PERMISSION_GRANTED) {
            ActivityCompat.requestPermissions(
                this,
                arrayOf(android.Manifest.permission.READ_EXTERNAL_STORAGE),
                READ_EXTERNAL_STORAGE_PERMISSION_REQUEST
            )
        } else {
            openFilePicker()
        }
    }

    // 打开文件选择器
    private fun openFilePicker() {
        val intent = Intent(Intent.ACTION_GET_CONTENT)
        intent.type = "audio/*"
        startActivityForResult(intent, FILE_SELECT_CODE)
    }

    // 处理文件选择结果
    override fun onActivityResult(requestCode: Int, resultCode: Int, data: Intent?) {
        super.onActivityResult(requestCode, resultCode, data)
        if (requestCode == FILE_SELECT_CODE && resultCode == Activity.RESULT_OK) {
            data?.data?.let { uri ->
                selectedFileUri = uri
                val fileName = getFileName(uri)
                tvResult.text = "Selected File: $fileName"
            }
        }
    }

    // 获取文件名
    @SuppressLint("Range")
    private fun getFileName(uri: Uri): String {
        var result = ""
        val cursor = contentResolver.query(uri, null, null, null, null)
        cursor?.use {
            if (it.moveToFirst()) {
                val displayName = it.getString(it.getColumnIndex(OpenableColumns.DISPLAY_NAME))
                result = displayName ?: ""
            }
        }
        return result
    }
}
6、在AndroidManifest.xml中注册相关网络和读取外部存储权限
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">
    <uses-permission android:name="android.permission.INTERNET"/>
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.ViceToTextDemo"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:launchMode="singleTop">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>
</manifest>
7、运行结果

  • 7
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值