在 https://github.com/openai/openai-dotnet 下载 OpenAI 的 dotnet 实现源码,编译生成 OpenAI.dll。
创建 dotnet 8 的 Console App 项目,将 OpenAI.dll 复制到项目目录,用 JetBrains Rider 打开项目,在项目 “Dependencies” 上右击,选 “Reference” 添加对 OpenAI.dll 的引用(Add Reference),程序代码如下:
using OpenAI;
using OpenAI.Chat;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
using System;
using System.Text;
using Xceed.Document.NET;
using Xceed.Words.NET;
// this app shows how to use OpenAI to translate a PDF file from English to Chinese
// Add OpenAI.dll build from https://github.com/openai/openai-dotnet OpenAI.csproj and reference to this project
// you should add the following NuGet packages to your project:
// - itext7
// - Xceed.Words.NET
// - Xceed.Document.NET
var endpoint = new Uri("https://models.inference.ai.azure.com");
var credential = System.Environment.GetEnvironmentVariable("GITHUB_TOKEN");
var model = "gpt-4o-mini";
var openAIOptions = new OpenAIClientOptions()
{
Endpoint=endpoint
};
var pdfPath = "./main.pdf";
var pdfText = ExtractTextFromPDF(pdfPath);
int chunkSize = 4000;
List<string> chunks = SplitStringIntoChunks(pdfText, chunkSize);
foreach (var chunk in chunks)
{
Thread.Sleep(60000); // 60 seconds = 60,000 milliseconds
var client = new ChatClient(model, credential, openAIOptions);
List<ChatMessage> messages = new List<ChatMessage>()
{
new SystemChatMessage("You are a helpful assistant."),
new UserChatMessage("将英文翻译为中文:"+chunk),
};
var requestOptions = new ChatCompletionOptions()
{
Temperature = 1,
MaxTokens = 4096,
};
var response = client.CompleteChat(messages, requestOptions);
String responseText = response.Value.Content[0].Text;
System.Console.WriteLine(responseText);
}
// get text from pdf doc
static string ExtractTextFromPDF(string pdfPath)
{
using (PdfReader reader = new PdfReader(pdfPath))
using (PdfDocument pdfDoc = new PdfDocument(reader))
{
StringBuilder text = new StringBuilder();
int numberOfPages = pdfDoc.GetNumberOfPages();
for (int i = 1; i <= numberOfPages; i++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string pageText = PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(i), strategy);
text.Append(pageText);
}
return text.ToString();
}
}
// split a long string into List of substrings for LLM model inference
List<string> SplitStringIntoChunks(string str, int chunkSize)
{
List<string> chunks = new List<string>();
for (int i = 0; i < str.Length; i += chunkSize)
{
if (i + chunkSize > str.Length)
{
chunks.Add(str.Substring(i));
}
else
{
chunks.Add(str.Substring(i, chunkSize));
}
}
return chunks;
}