import * asExcelJSfrom'exceljs';
import { chromium, Browser, Page } from'playwright';
const htmlFilePath = 'path/to/your/html-file.html';
asyncfunctionreadHtmlFile(filePath: string): Promise<string> {
try {
// 读取HTML文件内容const htmlContent = await fs.promises.readFile(filePath, 'utf-8');
return htmlContent;
} catch (err) {
thrownewError('无法读取HTML文件');
}
}
async function getDynamicDataFromPage(page: Page): Promise<string[]> {
const dynamicData = await page.evaluate(() => {
const data = [];
const spanElements = document.querySelectorAll('main div.chip div.chip-head.expanded-true span');
spanElements.forEach((span) => {
data.push(span.textContent.trim());
});
return data;
});
return dynamicData;
}
async function exportDataToExcel(data: string[], excelFilePath: string): Promise<void> {
try {
// 创建Excel工作簿和工作表
const workbook = new ExcelJS.Workbook();
const worksheet = workbook.addWorksheet('Data');
// 将数据放入Excel工作表中的单元格
data.forEach((value, index) => {
const cell = worksheet.getCell(`A${index + 1}`);
cell.value = value;
});
// 保存Excel文件
await workbook.xlsx.writeFile(excelFilePath);
console.log(`数据已成功导出到Excel文件:${excelFilePath}`);
} catch (err) {
console.error('出现错误:', err);
}
}
async function processHtmlFile(htmlFilePath: string, excelFilePath: string): Promise<void> {
try {
const browser: Browser = await chromium.launch();
const page: Page = await browser.newPage();
const htmlContent = await readHtmlFile(htmlFilePath);
// 将HTML内容加载到页面中
await page.setContent(htmlContent);
// 等待页面加载完成(可以根据实际情况进行调整)
await page.waitForTimeout(2000);
// 打印HTML元素
console.log(htmlContent);
const rowData = await getDynamicDataFromPage(page);
// 将数据导出到Excel文件
await exportDataToExcel(rowData, excelFilePath);
await browser.close();
} catch (err) {
console.error('出现错误:', err);
}
}
// 调用函数
const excelFilePath = 'path/to/your/excel-file.xlsx';
processHtmlFile(htmlFilePath, excelFilePath);