1.安装客户端,前面已经说过。不在多言
2.修改kettle的源码
/*!
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2013 Pentaho Corporation.. All rights reserved.
*/
package org.pentaho.di.trans.steps.gpload;
//
// The "designer" notes of the Greenplum bulkloader:
// ----------------------------------------------
//
// - "Enclosed" is used in the loader instead of "optionally enclosed" as optionally
// encloses kind of destroys the escaping.
// - A Boolean is output as Y and N (as in the text output step e.g.). If people don't
// like this they can first convert the boolean value to something else before loading
// it.
// - Filters (besides data and datetime) are not supported as it slows down.
//
//
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Semaphore;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Performs a bulk load to an Greenplum table.
*
* Based on (copied from) Sven Boden's Oracle Bulk Loader step
*
* @author Luke Lonergan, Matt Casters, Sean Flatley
* @since 28-mar-2008, 17-dec-2010
*/
public class GPLoad extends BaseStep implements StepInterface {
private static Class<?> PKG = GPLoadMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$
private static String INDENT = " ";
private static String GPLOAD_YAML_VERSION = "VERSION: 1.0.0.1";
private static String SINGLE_QUOTE = "'";
private static String OPEN_BRACKET = "[";
private static String CLOSE_BRACKET = "]";
private static String SPACE_PADDED_DASH = " - ";
private static String COLON = ":";
private static char DOUBLE_QUOTE = '"';
Process gploadProcess = null;
private GPLoadMeta meta;
protected GPLoadData data;
private GPLoadDataOutput output = null;
/*
* Local copy of the transformation "preview" property. We only forward the rows upon previewing, we don't do any of
* the real stuff.
*/
private boolean preview = false;
//public BlockingQueue<String> queue = new ArrayBlockingQueue<String>(1000);//1000为
//
// This class continually reads from the stream, and sends it to the log
// if the logging level is at least basic level.
//
private final class StreamLogger extends Thread {
private InputStream input;
private String type;
StreamLogger( InputStream is, String type ) {
this.input = is;
this.type = type + ">";
}
public void run() {
try {
final BufferedReader br = new BufferedReader( new InputStreamReader( input ) );
String line;
while ( ( line = br.readLine() ) != null ) {
// Only perform the concatenation if at basic level. Otherwise,
// this just reads from the stream.
if ( log.isBasic() ) {
logBasic( type + line );
}
}
} catch ( IOException ioe ) {
ioe.printStackTrace();
}
}
}
public GPLoad( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
/**
* Get the contents of the control file as specified in the meta object
*
* @param meta
* the meta object to model the control file after
*
* @return a string containing the control file contents
*/
public String getControlFileContents( GPLoadMeta meta, RowMetaInterface rm ) throws KettleException {
String[] tableFields = meta.getFieldTable();
boolean[] matchColumn = meta.getMatchColumn();
boolean[] updateColumn = meta.getUpdateColumn();
// TODO: All this validation could be placed in it's own method,
// table name validation
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
String schemaName = meta.getSchemaName();
String targetTableName = meta.getTableName();
// TODO: What is schema name to a GreenPlum database?
// Testing has been with an empty schema name
// We will set it to an empty string if it is null
// If it is not null then we will process what it is
if ( schemaName == null ) {
schemaName = "";
}
if ( targetTableName == null ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.TargetTableNameMissing" ) );
}
targetTableName = environmentSubstitute( targetTableName ).trim();
if ( Const.isEmpty( targetTableName ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.TargetTableNameMissing" ) );
}
// Schema name should be unquoted (gpload yaml parse error)
schemaName = environmentSubstitute( schemaName );
if ( Const.isEmpty( schemaName ) ) {
schemaName = databaseMeta.getPreferredSchemaName();
}
if ( Const.isEmpty( schemaName ) ) {
schemaName = "";
} else {
schemaName = schemaName + ".";
}
targetTableName = schemaName + databaseMeta.quoteField( targetTableName );
String loadAction = meta.getLoadAction();
// match and update column verification
if ( loadAction.equalsIgnoreCase( GPLoadMeta.ACTION_MERGE )
|| loadAction.equalsIgnoreCase( GPLoadMeta.ACTION_UPDATE ) ) {
// throw an exception if we don't have match columns
if ( matchColumn == null ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MatchColumnsNeeded" ) );
}
if ( !meta.hasMatchColumn() ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MatchColumnsNeeded" ) );
}
// throw an exception if we don't have any update columns
if ( updateColumn == null ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.UpdateColumnsNeeded" ) );
}
if ( !meta.hasUpdateColumn() ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.UpdateColumnsNeeded" ) );
}
}
// data file validation
//zxs add 2017-12-13
String dataFilename = this. commandL.substring(0,this. commandL.lastIndexOf(".")+1)+"dat";
//zxs add end 2017-12-13
if ( !Const.isEmpty( dataFilename ) ) {
dataFilename = environmentSubstitute( dataFilename ).trim();
}
if ( Const.isEmpty( dataFilename ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
}
// delimiter validation
String delimiter = meta.getDelimiter();
if ( !Const.isEmpty( delimiter ) ) {
delimiter = environmentSubstitute( delimiter ).trim();
}
if ( Const.isEmpty( delimiter ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
}
// Now we start building the contents
StringBuffer contents = new StringBuffer( 1000 );
// Source: GP Admin Guide 3.3.6, page 635:
contents.append( GPLoad.GPLOAD_YAML_VERSION ).append( Const.CR );
contents.append( "DATABASE: " );
contents.append( environmentSubstitute( databaseMeta.getDatabaseName() ) );
contents.append( Const.CR );
contents.append( "USER: " ).append( environmentSubstitute( databaseMeta.getUsername() ) ).append( Const.CR );
contents.append( "HOST: " ).append( environmentSubstitute( databaseMeta.getHostname() ) ).append( Const.CR );
contents.append( "PORT: " ).append( environmentSubstitute( databaseMeta.getDatabasePortNumberString() ) ).append(
Const.CR );
contents.append( "GPLOAD:" ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "INPUT: " ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "- SOURCE: " ).append( Const.CR );
// Add a LOCAL_HOSTS section
// We first check to see if the array has any elements
// if so we proceed with the string building - if not we do not add LOCAL_HOSTNAME section.
String[] localHosts = meta.getLocalHosts();
String stringLocalHosts = null;
if ( !Const.isEmpty( localHosts ) ) {
StringBuilder sbLocalHosts = new StringBuilder();
String trimmedAndSubstitutedLocalHost;
for ( String localHost : localHosts ) {
trimmedAndSubstitutedLocalHost = environmentSubstitute( localHost.trim() );
if ( !Const.isEmpty( trimmedAndSubstitutedLocalHost ) ) {
sbLocalHosts.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
trimmedAndSubstitutedLocalHost ).append( Const.CR );
}
}
stringLocalHosts = sbLocalHosts.toString();
if ( !Const.isEmpty( stringLocalHosts ) ) {
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "LOCAL_HOSTNAME: " ).append( Const.CR )
.append( stringLocalHosts );
}
}
// Add a PORT section if we have a port
// String localhostPort = meta.getLocalhostPort();
// String localhostPort =this.queue.poll();
//zxs add 2017-12-8
String localhostPort =null;
//zxs add end 2017-12-8
if ( !Const.isEmpty( localhostPort ) ) {
localhostPort = environmentSubstitute( localhostPort ).trim();
if ( !Const.isEmpty( localhostPort ) ) {
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "PORT: " ).append( localhostPort ).append(
Const.CR );
}
}
// TODO: Stream to a temporary file and then bulk load OR optionally stream to a named pipe (like MySQL bulk loader)
dataFilename = GPLoad.SINGLE_QUOTE + environmentSubstitute( dataFilename ) + GPLoad.SINGLE_QUOTE;
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "FILE: " ).append( GPLoad.OPEN_BRACKET ).append(
dataFilename ).append( GPLoad.CLOSE_BRACKET ).append( Const.CR );
// columns
if ( tableFields.length > 0 ) {
contents.append( GPLoad.INDENT ).append( "- COLUMNS: " ).append( Const.CR );
for ( String columnName : tableFields ) {
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
databaseMeta.quoteField( columnName ) ).append( GPLoad.COLON ).append( Const.CR );
}
}
// See also page 155 for formatting information & escaping
// delimiter validation should have been perfomed
contents.append( GPLoad.INDENT ).append( "- FORMAT: TEXT" ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "- DELIMITER: " ).append( GPLoad.SINGLE_QUOTE ).append( delimiter )
.append( GPLoad.SINGLE_QUOTE ).append( Const.CR );
// if ( !Const.isEmpty( meta.getNullAs() ) ) {
if ( !Const.isEmpty( meta.getNullAs() ) ) {
contents.append( GPLoad.INDENT ).append( "- NULL_AS: " ).append( GPLoad.SINGLE_QUOTE ).append( meta.getNullAs() ).append( GPLoad.SINGLE_QUOTE ).append( Const.CR );
}
// TODO: implement escape character
// TODO: test what happens when a single quote is specified- can we specify a single quiote within doubole quotes
// then?
String enclosure = meta.getEnclosure();
// For enclosure we do a null check. !Const.isEmpty will be true if the string is empty.
// it is ok to have an empty string
if ( enclosure != null ) {
enclosure = environmentSubstitute( meta.getEnclosure() );
} else {
enclosure = "";
}
contents.append( GPLoad.INDENT ).append( "- QUOTE: " ).append( GPLoad.SINGLE_QUOTE ).append( enclosure ).append(
GPLoad.SINGLE_QUOTE ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "- HEADER: FALSE" ).append( Const.CR );
// ENCODING
String encoding = meta.getEncoding();
if ( !Const.isEmpty( encoding ) ) {
contents.append( GPLoad.INDENT ).append( "- ENCODING: " ).append( encoding ).append( Const.CR );
}
// Max errors
String maxErrors = meta.getMaxErrors();
if ( maxErrors == null ) {
maxErrors = GPLoadMeta.MAX_ERRORS_DEFAULT;
} else {
maxErrors = environmentSubstitute( maxErrors );
try {
if ( Integer.valueOf( maxErrors ) < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MaxErrorsInvalid" ) );
}
} catch ( NumberFormatException nfe ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MaxErrorsInvalid" ) );
}
}
contents.append( GPLoad.INDENT ).append( "- ERROR_LIMIT: " ).append( maxErrors ).append( Const.CR );
String errorTableName = meta.getErrorTableName();
if ( !Const.isEmpty( errorTableName ) ) {
errorTableName = environmentSubstitute( errorTableName ).trim();
if ( !Const.isEmpty( errorTableName ) ) {
contents.append( GPLoad.INDENT ).append( "- ERROR_TABLE: " ).append( errorTableName ).append( Const.CR );
}
}
// -------------- OUTPUT section
contents.append( GPLoad.INDENT ).append( "OUTPUT:" ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "- TABLE: " ).append( targetTableName ).append( Const.CR );
contents.append( GPLoad.INDENT ).append( "- MODE: " ).append( loadAction ).append( Const.CR );
// TODO: MAPPING
// TODO: add support for BEFORE and AFTER SQL
// do the following block if the load action is an update or merge
if ( loadAction.equals( GPLoadMeta.ACTION_UPDATE ) || loadAction.equals( GPLoadMeta.ACTION_MERGE ) ) {
// if we have match columns then add the specification
if ( meta.hasMatchColumn() ) {
contents.append( GPLoad.INDENT ).append( "- MATCH_COLUMNS: " ).append( Const.CR );
for ( int i = 0; i < matchColumn.length; i++ ) {
if ( matchColumn[i] ) {
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
databaseMeta.quoteField( tableFields[i] ) ).append( Const.CR );
}
}
}
// if we have update columns then add the specification
if ( meta.hasUpdateColumn() ) {
contents.append( GPLoad.INDENT ).append( "- UPDATE_COLUMNS: " ).append( Const.CR );
for ( int i = 0; i < updateColumn.length; i++ ) {
if ( updateColumn[i] ) {
contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
databaseMeta.quoteField( tableFields[i] ) ).append( Const.CR );
}
}
}
// if we have an update condition
String updateCondition = meta.getUpdateCondition();
if ( !Const.isEmpty( updateCondition ) ) {
// replace carriage returns with spaces and trim the whole thing
updateCondition = updateCondition.replaceAll( "[\r\n]", " " ).trim();
// test the contents once again
// the original contents may have just been linefeed/carriage returns
if ( !Const.isEmpty( updateCondition ) ) {
// we'll write out what we have
contents.append( GPLoad.INDENT ).append( "- UPDATE_CONDITION: " ).append( GPLoad.DOUBLE_QUOTE ).append(
updateCondition ).append( GPLoad.DOUBLE_QUOTE ).append( Const.CR );
}
}
}
return contents.toString();
}
/**
* Create a control file.
*
* @param filename
* @param meta
* @throws KettleException
*/
//zxs add 2017-12-13
String commandL = null;
//zxs add end 2017-12-13
public void createControlFile( GPLoadMeta meta ) throws KettleException {
String filename = meta.getControlFile();
//zxs add 2017-12-13
int number = getUniqueStepNrAcrossSlaves();
filename= filename.substring(0, filename.lastIndexOf("."))+number+filename.substring(filename.lastIndexOf("."), filename.length());
commandL=filename;
//zxs add end 2017-12-13
if ( Const.isEmpty( filename ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.NoControlFileSpecified" ) );
} else {
filename = environmentSubstitute( filename ).trim();
if ( Const.isEmpty( filename ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.NoControlFileSpecified" ) );
}
}
File controlFile = new File( filename );
FileWriter fw = null;
try {
controlFile.createNewFile();
fw = new FileWriter( controlFile );
fw.write( getControlFileContents( meta, getInputRowMeta() ) );
} catch ( IOException ex ) {
throw new KettleException( ex.getMessage(), ex );
} finally {
try {
if ( fw != null ) {
fw.close();
}
} catch ( Exception ignored ) {
// Ignore error
}
}
}
/**
* Returns the path to the pathToFile. It should be the same as what was passed but this method will check the file
* system to see if the path is valid.
*
* @param pathToFile
* Path to the file to verify.
* @param exceptionMessage
* The message to use when the path is not provided.
* @param checkExistence
* When true the path's existence will be verified.
* @return
* @throws KettleException
*/
private String getPath( String pathToFile, String exceptionMessage, boolean checkExistenceOfFile )
throws KettleException {
// Make sure the path is not empty
if ( Const.isEmpty( pathToFile ) ) {
throw new KettleException( exceptionMessage );
}
// make sure the variable substitution is not empty
pathToFile = environmentSubstitute( pathToFile ).trim();
if ( Const.isEmpty( pathToFile ) ) {
throw new KettleException( exceptionMessage );
}
FileObject fileObject = KettleVFS.getFileObject( pathToFile, getTransMeta() );
try {
// we either check the existence of the file
if ( checkExistenceOfFile ) {
if ( !fileObject.exists() ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Execption.FileDoesNotExist", pathToFile ) );
}
} else { // if the file does not have to exist, the parent, or source folder, does.
FileObject parentFolder = fileObject.getParent();
if ( parentFolder.exists() ) {
return KettleVFS.getFilename( fileObject );
} else {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.DirectoryDoesNotExist",
parentFolder.getURL().getPath() ) );
}
}
// if Windows is the OS
if ( Const.getOS().startsWith( "Windows" ) ) {
return addQuotes( pathToFile );
} else {
return KettleVFS.getFilename( fileObject );
}
} catch ( FileSystemException fsex ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.GPLoadCommandBuild", fsex.getMessage() ) );
}
}
/**
* Create the command line for GPLoad depending on the meta information supplied.
*
* @param meta
* The meta data to create the command line from
* @param password
* Use the real password or not
*
* @return The string to execute.
*
* @throws KettleException
* Upon any exception
*/
public String createCommandLine( GPLoadMeta meta, boolean password ) throws KettleException {
StringBuffer sbCommandLine = new StringBuffer( 300 );
if ( Const.getOS().startsWith( "Windows" ) ) {
sbCommandLine.append( "cmd /c " );
}
// get path to the executable
sbCommandLine.append( getPath( meta.getGploadPath(), BaseMessages.getString( PKG,
"GPLoad.Exception.GPLoadPathMisssing" ), true ) );
// get
sbCommandLine.append( " -v " );
// get the path to the control file
sbCommandLine.append( " -f " );
sbCommandLine.append(commandL);
// get the path to the log file, if specified
String logfile = meta.getLogFile();
if ( !Const.isEmpty( logfile ) ) {
sbCommandLine.append( " -l " );
sbCommandLine.append( getPath( meta.getLogFile(), BaseMessages.getString( PKG,
"GPLoad.Exception.LogFilePathMissing" ), false ) );
}
return sbCommandLine.toString();
}
private static final java.util.concurrent.Semaphore sp = new Semaphore(32);
public boolean execute(GPLoadMeta meta, boolean wait) throws KettleException {
String commandLine = null;
Runtime rt = Runtime.getRuntime();
int gpLoadExitVal = 0;
try {
commandLine = createCommandLine(meta, true);
long toWait = System.currentTimeMillis();
logBasic(BaseMessages.getString(PKG, "可用装载进程数:"+(sp.availablePermits())));
sp.acquire();
logBasic("Executing: " + commandLine);
long toWaited = System.currentTimeMillis();
logBasic(BaseMessages.getString(PKG, commandLine+"等待:"+(toWaited - toWait)));
gploadProcess = rt.exec(commandLine);
// any error message?
StreamLogger errorLogger = new StreamLogger(gploadProcess.getErrorStream(), "ERROR");
// any output?
StreamLogger outputLogger = new StreamLogger(gploadProcess.getInputStream(), "OUTPUT");
// kick them off
errorLogger.start();
outputLogger.start();
if (wait) {
// any error???
gpLoadExitVal = gploadProcess.waitFor();
sp.release();
logBasic(BaseMessages.getString(PKG, "GPLoad.Log.ExitValuePsqlPath", "" + gpLoadExitVal));
if (gpLoadExitVal != -0) {
throw new KettleException(
BaseMessages.getString(PKG, "GPLoad.Log.ExitValuePsqlPath", "" + gpLoadExitVal));
}
}
} catch (KettleException ke) {
throw ke;
} catch (Exception ex) {
// Don't throw the message upwards, the message contains the password.
throw new KettleException("Error while executing \'" + commandLine + "\'. Exit value = " + gpLoadExitVal);
}
return true;
}
//zxs add 2017-12-8
private long count =0 ;
//zxs add end 2017-12-8
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
count++;
meta = (GPLoadMeta) smi;
data = (GPLoadData) sdi;
try {
Object[] r = getRow(); // Get row from input rowset & set row busy!
// no more input to be expected...
if (r == null) {
setOutputDone();
if (!preview) {
if (output != null) {
// Close the output
try {
output.close();
} catch (IOException e) {
throw new KettleException("Error while closing output", e);
}
output = null;
}
String loadMethod = meta.getLoadMethod();
// if it specified that we are to load at the end of processing
if (GPLoadMeta.METHOD_AUTO_END.equals(loadMethod)) {
// if we actually wrote at least one row
if (getLinesOutput() > 0) {
// we do this
createControlFile(meta);
execute(meta, true);
} else {
// we don't create a control file and execute
logBasic(BaseMessages.getString(PKG, "GPLoad.Info.NoRowsWritten"));
}
} else if (GPLoadMeta.METHOD_MANUAL.equals(loadMethod)) {
// we create the control file but do not execute
createControlFile(meta);
logBasic(BaseMessages.getString(PKG, "GPLoad.Info.MethodManual"));
} else {
throw new KettleException(
BaseMessages.getString(PKG, "GPload.Execption.UnhandledLoadMethod", loadMethod));
}
}
return false;
}
if (!preview) {
//zxs add 2017-12-8
int submitAmount =100000000;
if(count%submitAmount==0) {
first =true;
try {
output.close();
} catch (IOException e) {
logError(BaseMessages.getString(PKG, "close file Exception ") + e.getMessage());
setErrors(1);
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
output = null;
// we do this
createControlFile(meta);
execute(meta, true);
int number = getUniqueStepNrAcrossSlaves();
String dataFile = meta.getDataFile();
dataFile = dataFile.substring(0, dataFile.lastIndexOf(".")) + number
+ dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());
File fd = new File(dataFile);
if(fd.exists()) {
fd.delete();
}
String filename = meta.getControlFile();
filename = filename.substring(0, filename.lastIndexOf(".")) + number
+ filename.substring(filename.lastIndexOf("."), filename.length());
File fc = new File(filename);
if(fc.exists()) {
fc.delete();
}
}
//zxs add end 2017-12-8
if (first) {
int number = getUniqueStepNrAcrossSlaves();
first = false;
//zxs add 2017-12-13
output = new GPLoadDataOutput(this, meta, log.getLogLevel(),number);
//zxs add 2017-12-13
// if ( GPLoadMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod()) )
// {
// execute(meta, false);
// }
output.open(this, gploadProcess);
}
output.writeLine(getInputRowMeta(), r);
}
putRow(getInputRowMeta(), r);
incrementLinesOutput();
} catch (KettleException e) {
logError(BaseMessages.getString(PKG, "GPLoad.Log.ErrorInStep") + e.getMessage());
setErrors(1);
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (GPLoadMeta) smi;
data = (GPLoadData) sdi;
Trans trans = getTrans();
preview = trans.isPreview();
if ( super.init( smi, sdi ) ) {
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (GPLoadMeta) smi;
data = (GPLoadData) sdi;
super.dispose( smi, sdi );
if ( !preview && meta.isEraseFiles() ) {
// Erase the created cfg/dat files if requested. We don't erase
// the rest of the files because it would be "stupid" to erase them
// right after creation. If you don't want them, don't fill them in.
FileObject fileObject = null;
String method = meta.getLoadMethod();
if ( GPLoadMeta.METHOD_AUTO_END.equals( method ) ) {
if ( meta.getControlFile() != null ) {
try {
// zxs add 2017-12-13
int number = getUniqueStepNrAcrossSlaves();
String filename = meta.getControlFile();
filename = filename.substring(0, filename.lastIndexOf(".")) + number
+ filename.substring(filename.lastIndexOf("."), filename.length());
// fileObject = KettleVFS.getFileObject( environmentSubstitute(meta.getControlFile() ), getTransMeta() );
fileObject = KettleVFS.getFileObject(environmentSubstitute(filename),getTransMeta());
fileObject.delete();
fileObject.close();
} catch ( Exception ex ) {
logError( "Error deleting control file \'" + KettleVFS.getFilename( fileObject ) + "\': " + ex.getMessage() );
}
}
}
if ( GPLoadMeta.METHOD_AUTO_END.equals( method ) ) {
// In concurrent mode the data is written to the control file.
if ( meta.getDataFile() != null ) {
try {
//zxs add 2017-12-13
String dataFile = meta.getDataFile();
int number = getUniqueStepNrAcrossSlaves();
dataFile= dataFile.substring(0, dataFile.lastIndexOf("."))+number+dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());
// fileObject = KettleVFS.getFileObject( environmentSubstitute( meta.getDataFile() ), getTransMeta() );
fileObject = KettleVFS.getFileObject( environmentSubstitute(dataFile), getTransMeta() );
//zxs add end 2017-12-13
fileObject.delete();
fileObject.close();
} catch ( Exception ex ) {
logError( "Error deleting data file \'" + KettleVFS.getFilename( fileObject ) + "\': " + ex.getMessage(),
ex );
}
}
}
if ( GPLoadMeta.METHOD_MANUAL.equals( method ) ) {
logBasic( "Deletion of files is not compatible with \'manual load method\'" );
}
}
}
/**
* Adds quotes to the passed string if the OS is Windows and there is at least one space .
*
* @param string
* @return
*/
private String addQuotes( String string ) {
if ( Const.getOS().startsWith( "Windows" ) && string.indexOf( " " ) != -1 ) {
string = "\"" + string + "\"";
}
return string;
}
}
/*!
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2016 Pentaho Corporation.. All rights reserved.
*/
package org.pentaho.di.trans.steps.gpload;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.i18n.BaseMessages;
/**
* Does the opening of the output "stream". It's either a file or inter process communication which is transparent to
* users of this class.
*
* Copied from Sven Boden's Oracle version
*
* @author Luke Lonergan
* @since 28-mar-2008
*/
public class GPLoadDataOutput {
private static Class<?> PKG = GPLoadDataOutput.class; // for i18n purposes, needed by Translator2!!
protected LogChannelInterface log;
private GPLoad gpLoad = null;
private GPLoadMeta meta;
private PrintWriter output = null;
private boolean first = true;
private int[] fieldNumbers = null;
private String enclosure = null;
private String delimiter = null;
private SimpleDateFormat sdfDate = null;
private SimpleDateFormat sdfDateTime = null;
int number ;
public GPLoadDataOutput( GPLoad gpLoad, GPLoadMeta meta,int number ) {
this.meta = meta;
this.gpLoad = gpLoad;
//zxs add 2017-12-13
this.number =number;
//zxs add end 2017-12-13
}
public GPLoadDataOutput( GPLoad gpLoad, GPLoadMeta meta, LogLevel logLevel,int number ) {
this( gpLoad, meta , number);
log = new LogChannel( this );
log.setLogLevel( logLevel );
}
public void open( VariableSpace space, Process sqlldrProcess ) throws KettleException {
// String loadMethod = meta.getLoadMethod();
try {
OutputStream os = null;
// if ( GPLoadMeta.METHOD_AUTO_CONCURRENT.equals(loadMethod)) {
// String dataFile = meta.getControlFile();
// dataFile = StringUtil.environmentSubstitute(dataFile);
// os = new FileOutputStream(dataFile, true);
// } else {
// Else open the data file filled in.
String dataFile = meta.getDataFile();
//zxs add 2017-12-13
dataFile= dataFile.substring(0, dataFile.lastIndexOf("."))+number+dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());
//zxs add end 2017-12-13
if ( Const.isEmpty( dataFile ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
}
dataFile = space.environmentSubstitute( dataFile );
if ( Const.isEmpty( dataFile ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
}
log.logDetailed( "Creating temporary load file " + dataFile );
os = new FileOutputStream( dataFile, false );
//
String encoding = meta.getEncoding();
if ( Const.isEmpty( encoding ) ) {
// Use the default encoding.
output = new PrintWriter( new BufferedWriter( new OutputStreamWriter( os ) ) );
} else {
// Use the specified encoding
output = new PrintWriter( new BufferedWriter( new OutputStreamWriter( os, encoding ) ) );
}
} catch ( IOException e ) {
throw new KettleException( "GPLoadDataOutput.Exception" + e.getMessage(), e );
}
}
public void close() throws IOException {
if ( output != null ) {
output.close();
}
}
PrintWriter getOutput() {
return output;
}
protected void setOutput( PrintWriter output ) {
this.output = output;
}
private String createEscapedString( String orig, String enclosure ) {
StringBuffer buf = new StringBuffer( orig );
Const.repl( buf, enclosure, enclosure + enclosure );
return buf.toString();
}
public void writeLine( RowMetaInterface mi, Object[] row ) throws KettleException {
if ( first ) {
first = false;
enclosure = meta.getEnclosure();
if ( enclosure == null ) {
enclosure = "";
} else {
enclosure = gpLoad.environmentSubstitute( enclosure );
}
delimiter = meta.getDelimiter();
if ( delimiter == null ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
} else {
delimiter = gpLoad.environmentSubstitute( delimiter );
if ( Const.isEmpty( delimiter ) ) {
throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
}
}
// Setup up the fields we need to take for each of the rows
// as this speeds up processing.
fieldNumbers = new int[meta.getFieldStream().length];
for ( int i = 0; i < fieldNumbers.length; i++ ) {
fieldNumbers[i] = mi.indexOfValue( meta.getFieldStream()[i] );
if ( fieldNumbers[i] < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "GPLoadDataOutput.Exception.FieldNotFound", meta
.getFieldStream()[i] ) );
}
}
sdfDate = new SimpleDateFormat( "yyyy-MM-dd" );
sdfDateTime = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss.SSS" );
}
// Write the data to the output
ValueMetaInterface v = null;
int number = 0;
for ( int i = 0; i < fieldNumbers.length; i++ ) {
// TODO: variable substitution
if ( i != 0 ) {
output.print( delimiter );
}
number = fieldNumbers[i];
v = mi.getValueMeta( number );
if ( row[number] == null ) {
// TODO (SB): special check for null in case of Strings.
output.print( enclosure );
output.print( enclosure );
} else {
switch ( v.getType() ) {
case ValueMetaInterface.TYPE_STRING:
String s = mi.getString( row, number );
if ( s.indexOf( enclosure ) >= 0 ) {
s = createEscapedString( s, enclosure );
}
output.print( enclosure );
output.print( s );
output.print( enclosure );
break;
case ValueMetaInterface.TYPE_INTEGER:
Long l = mi.getInteger( row, number );
if ( meta.getEncloseNumbers() ) {
output.print( enclosure );
output.print( l );
output.print( enclosure );
} else {
output.print( l );
}
break;
case ValueMetaInterface.TYPE_NUMBER:
Double d = mi.getNumber( row, number );
if ( meta.getEncloseNumbers() ) {
output.print( enclosure );
output.print( d );
output.print( enclosure );
} else {
output.print( d );
}
break;
case ValueMetaInterface.TYPE_BIGNUMBER:
BigDecimal bd = mi.getBigNumber( row, number );
if ( meta.getEncloseNumbers() ) {
output.print( enclosure );
output.print( bd );
output.print( enclosure );
} else {
output.print( bd );
}
break;
case ValueMetaInterface.TYPE_DATE:
Date dt = mi.getDate( row, number );
output.print( enclosure );
output.print( sdfDate.format( dt ) );
output.print( enclosure );
break;
case ValueMetaInterface.TYPE_BOOLEAN:
Boolean b = mi.getBoolean( row, number );
output.print( enclosure );
if ( b.booleanValue() ) {
output.print( "Y" );
} else {
output.print( "N" );
}
output.print( enclosure );
break;
case ValueMetaInterface.TYPE_BINARY:
byte[] byt = mi.getBinary( row, number );
output.print( "<startlob>" );
output.print( byt );
output.print( "<endlob>" );
break;
case ValueMetaInterface.TYPE_TIMESTAMP:
Date time = mi.getDate( row, number );
output.print( enclosure );
output.print( sdfDateTime.format( time ) );
output.print( enclosure );
break;
default:
throw new KettleException( BaseMessages.getString( PKG, "GPLoadDataOutput.Exception.TypeNotSupported", v
.getType() ) );
}
}
}
output.print( Const.CR );
}
}
代码修改结束
3.一直纠缠的错误
connection with gpfdist failed for gpfdist://192.168.12.94:8011//shsnc/dmp/software/gploadzxs_testd_32zxs_test31912057.dat. effective url: http://192.168.12.94:8011//shsnc/dmp/software/
4.以上错误通过修改客户端的gpload.py 文件 将时间调大
5.net.core.somaxconn的作用
net.core.somaxconn是Linux中的一个kernel参数,表示socket监听(listen)的backlog上限。什么是backlog呢?backlog就是socket的监听队列,当一个请求(request)尚未被处理或建立时,他会进入backlog。而socket server可以一次性处理backlog中的所有请求,处理后的请求不再位于监听队列中。当server处理请求较慢,以至于监听队列被填满后,新来的请求会被拒绝。
在Hadoop 1.0中,参数ipc.server.listen.queue.size控制了服务端socket的监听队列长度,即backlog长度,默认值是128。而Linux的参数net.core.somaxconn默认值同样为128。当服务端繁忙时,如NameNode或JobTracker,128是远远不够的。这样就需要增大backlog,例如我们的3000台集群就将ipc.server.listen.queue.size设成了32768,为了使得整个参数达到预期效果,同样需要将kernel参数net.core.somaxconn设成一个大于等于32768的值。
如何修改net.core.somaxconn
Linux中可以工具syctl来动态调整所有的kernel参数。所谓动态调整就是kernel参数值修改后即时生效。但是这个生效仅限于os层面,对于Hadoop来说,必须重启应用才能生效。
命令
sysctl -a
会显示所有的kernel参数及值。
修改参数值的语法
sysctl -w net.core.somaxconn=32768
以上命令将kernel参数net.core.somaxconn的值改成了32768。这样的改动虽然可以立即生效,但是重启机器后会恢复默认值。为了永久保留改动,需要用vi在/etc/sysctl.conf中增加一行
net.core.somaxconn= 4000
然后执行命令