业务经常遇到期初数据导入的情况,如果遇到大文件(超过100w行)
可以预先使用powershell把文件按照行数进行切割,分批导入的方式。以避免性能问题以及微软控件(如excel,ssis)遇到超过100w行的数据无法读写的问题。
#split test
$sw = new-object System.Diagnostics.Stopwatch
$sw.Start()
$filename = "C:\Source\FullExtract_data.csv"#源文件路径
$rootName = "C:\Target\splitOutput"#输出文件路径
$ext = "csv"#输出文件后缀
$linesperFile = 600000#修改此处代表分割的数据行数
$filecount = 1
$reader = $null
try{
$reader = [io.file]::OpenText($filename)
try{
"Creating file number $filecount"
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext))
$filecount++
$linecount = 0
while($reader.EndOfStream -ne $true) {
"Reading $linesperFile"
while( ($linecount -lt $linesperFile) -and ($reader.EndOfStream -ne $true)){
$writer.WriteLine($reader.ReadLine());
$linecount++
}
if($reader.EndOfStream -ne $true) {
"Closing file"
$writer.Dispose();
"Creating file number $filecount"
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext))
$filecount++
$linecount = 0
}
}
} finally {
$writer.Dispose();
}
} finally {
$reader.Dispose();
}
$sw.Stop()
Write-Host "Split complete in " $sw.Elapsed.TotalSeconds "seconds"