通过kettle 的gpload 插件和greenplum-loaders实现批量并行加载

1.安装客户端,前面已经说过。不在多言
2.修改kettle的源码

/*!
 * This program is free software; you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
 * Foundation.
 *
 * You should have received a copy of the GNU Lesser General Public License along with this
 * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
 * or from the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * Copyright (c) 2002-2013 Pentaho Corporation..  All rights reserved.
 */

package org.pentaho.di.trans.steps.gpload;

//
// The "designer" notes of the Greenplum bulkloader:
// ----------------------------------------------
//
// - "Enclosed" is used in the loader instead of "optionally enclosed" as optionally
//   encloses kind of destroys the escaping.
// - A Boolean is output as Y and N (as in the text output step e.g.). If people don't
//   like this they can first convert the boolean value to something else before loading
//   it.
// - Filters (besides data and datetime) are not supported as it slows down.
//
// 

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Semaphore;

import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

/**
 * Performs a bulk load to an Greenplum table.
 * 
 * Based on (copied from) Sven Boden's Oracle Bulk Loader step
 * 
 * @author Luke Lonergan, Matt Casters, Sean Flatley
 * @since 28-mar-2008, 17-dec-2010
 */
public class GPLoad extends BaseStep implements StepInterface {
  private static Class<?> PKG = GPLoadMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$

  private static String INDENT = "    ";
  private static String GPLOAD_YAML_VERSION = "VERSION: 1.0.0.1";
  private static String SINGLE_QUOTE = "'";
  private static String OPEN_BRACKET = "[";
  private static String CLOSE_BRACKET = "]";
  private static String SPACE_PADDED_DASH = " - ";
  private static String COLON = ":";
  private static char DOUBLE_QUOTE = '"';

  Process gploadProcess = null;

  private GPLoadMeta meta;
  protected GPLoadData data;
  private GPLoadDataOutput output = null;

  /*
   * Local copy of the transformation "preview" property. We only forward the rows upon previewing, we don't do any of
   * the real stuff.
   */
  private boolean preview = false;

  //public BlockingQueue<String> queue = new ArrayBlockingQueue<String>(1000);//1000为

  //
  // This class continually reads from the stream, and sends it to the log
  // if the logging level is at least basic level.
  //

  private final class StreamLogger extends Thread {
    private InputStream input;
    private String type;

    StreamLogger( InputStream is, String type ) {
      this.input = is;
      this.type = type + ">";
    }

    public void run() {
      try {
        final BufferedReader br = new BufferedReader( new InputStreamReader( input ) );
        String line;
        while ( ( line = br.readLine() ) != null ) {
          // Only perform the concatenation if at basic level. Otherwise,
          // this just reads from the stream.
          if ( log.isBasic() ) {
            logBasic( type + line );
          }
        }
      } catch ( IOException ioe ) {
        ioe.printStackTrace();
      }

    }

  }

  public GPLoad( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) {
    super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
  }

  /**
   * Get the contents of the control file as specified in the meta object
   * 
   * @param meta
   *          the meta object to model the control file after
   * 
   * @return a string containing the control file contents
   */
  public String getControlFileContents( GPLoadMeta meta, RowMetaInterface rm ) throws KettleException {

    String[] tableFields = meta.getFieldTable();
    boolean[] matchColumn = meta.getMatchColumn();
    boolean[] updateColumn = meta.getUpdateColumn();

    // TODO: All this validation could be placed in it's own method,

    // table name validation
    DatabaseMeta databaseMeta = meta.getDatabaseMeta();
    String schemaName = meta.getSchemaName();
    String targetTableName = meta.getTableName();

    // TODO: What is schema name to a GreenPlum database?
    // Testing has been with an empty schema name
    // We will set it to an empty string if it is null
    // If it is not null then we will process what it is
    if ( schemaName == null ) {
      schemaName = "";
    }

    if ( targetTableName == null ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.TargetTableNameMissing" ) );
    }
    targetTableName = environmentSubstitute( targetTableName ).trim();
    if ( Const.isEmpty( targetTableName ) ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.TargetTableNameMissing" ) );
    }

    // Schema name should be unquoted (gpload yaml parse error)
    schemaName = environmentSubstitute( schemaName );
    if ( Const.isEmpty( schemaName ) ) {
      schemaName = databaseMeta.getPreferredSchemaName();
    }
    if ( Const.isEmpty( schemaName ) ) {
      schemaName = "";
    } else {
      schemaName = schemaName + ".";
    }
    targetTableName = schemaName + databaseMeta.quoteField( targetTableName );

    String loadAction = meta.getLoadAction();

    // match and update column verification
    if ( loadAction.equalsIgnoreCase( GPLoadMeta.ACTION_MERGE )
        || loadAction.equalsIgnoreCase( GPLoadMeta.ACTION_UPDATE ) ) {

      // throw an exception if we don't have match columns
      if ( matchColumn == null ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MatchColumnsNeeded" ) );
      }

      if ( !meta.hasMatchColumn() ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MatchColumnsNeeded" ) );
      }

      // throw an exception if we don't have any update columns
      if ( updateColumn == null ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.UpdateColumnsNeeded" ) );
      }

      if ( !meta.hasUpdateColumn() ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.UpdateColumnsNeeded" ) );
      }
    }

    // data file validation
    //zxs add 2017-12-13
    String dataFilename = this. commandL.substring(0,this. commandL.lastIndexOf(".")+1)+"dat";
    //zxs add end  2017-12-13
    if ( !Const.isEmpty( dataFilename ) ) {
      dataFilename = environmentSubstitute( dataFilename ).trim();
    }
    if ( Const.isEmpty( dataFilename ) ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
    }

    // delimiter validation
    String delimiter = meta.getDelimiter();
    if ( !Const.isEmpty( delimiter ) ) {
      delimiter = environmentSubstitute( delimiter ).trim();
    }
    if ( Const.isEmpty( delimiter ) ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
    }

    // Now we start building the contents
    StringBuffer contents = new StringBuffer( 1000 );

    // Source: GP Admin Guide 3.3.6, page 635:
    contents.append( GPLoad.GPLOAD_YAML_VERSION ).append( Const.CR );
    contents.append( "DATABASE: " );
    contents.append( environmentSubstitute( databaseMeta.getDatabaseName() ) );
    contents.append( Const.CR );
    contents.append( "USER: " ).append( environmentSubstitute( databaseMeta.getUsername() ) ).append( Const.CR );
    contents.append( "HOST: " ).append( environmentSubstitute( databaseMeta.getHostname() ) ).append( Const.CR );
    contents.append( "PORT: " ).append( environmentSubstitute( databaseMeta.getDatabasePortNumberString() ) ).append(
        Const.CR );
    contents.append( "GPLOAD:" ).append( Const.CR );
    contents.append( GPLoad.INDENT ).append( "INPUT: " ).append( Const.CR );
    contents.append( GPLoad.INDENT ).append( "- SOURCE: " ).append( Const.CR );

    // Add a LOCAL_HOSTS section
    // We first check to see if the array has any elements
    // if so we proceed with the string building - if not we do not add LOCAL_HOSTNAME section.
    String[] localHosts = meta.getLocalHosts();
    String stringLocalHosts = null;
    if ( !Const.isEmpty( localHosts ) ) {
      StringBuilder sbLocalHosts = new StringBuilder();
      String trimmedAndSubstitutedLocalHost;
      for ( String localHost : localHosts ) {
        trimmedAndSubstitutedLocalHost = environmentSubstitute( localHost.trim() );
        if ( !Const.isEmpty( trimmedAndSubstitutedLocalHost ) ) {
          sbLocalHosts.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
              trimmedAndSubstitutedLocalHost ).append( Const.CR );
        }
      }
      stringLocalHosts = sbLocalHosts.toString();
      if ( !Const.isEmpty( stringLocalHosts ) ) {
        contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "LOCAL_HOSTNAME: " ).append( Const.CR )
            .append( stringLocalHosts );
      }
    }

    // Add a PORT section if we have a port

//    String localhostPort = meta.getLocalhostPort();
   // String localhostPort =this.queue.poll();
    //zxs add 2017-12-8
     String localhostPort =null;
   //zxs add end 2017-12-8
    if ( !Const.isEmpty( localhostPort ) ) {
      localhostPort = environmentSubstitute( localhostPort ).trim();
      if ( !Const.isEmpty( localhostPort ) ) {
        contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "PORT: " ).append( localhostPort ).append(
            Const.CR );
      }
    }

    // TODO: Stream to a temporary file and then bulk load OR optionally stream to a named pipe (like MySQL bulk loader)
    dataFilename = GPLoad.SINGLE_QUOTE + environmentSubstitute( dataFilename ) + GPLoad.SINGLE_QUOTE;
    contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( "FILE: " ).append( GPLoad.OPEN_BRACKET ).append(
        dataFilename ).append( GPLoad.CLOSE_BRACKET ).append( Const.CR );

    // columns
    if ( tableFields.length > 0 ) {
      contents.append( GPLoad.INDENT ).append( "- COLUMNS: " ).append( Const.CR );

      for ( String columnName : tableFields ) {
        contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
            databaseMeta.quoteField( columnName ) ).append( GPLoad.COLON ).append( Const.CR );
      }
    }

    // See also page 155 for formatting information & escaping
    // delimiter validation should have been perfomed
    contents.append( GPLoad.INDENT ).append( "- FORMAT: TEXT" ).append( Const.CR );
    contents.append( GPLoad.INDENT ).append( "- DELIMITER: " ).append( GPLoad.SINGLE_QUOTE ).append( delimiter )
        .append( GPLoad.SINGLE_QUOTE ).append( Const.CR );
//  if ( !Const.isEmpty( meta.getNullAs() ) ) {
    if ( !Const.isEmpty( meta.getNullAs() ) ) {
      contents.append( GPLoad.INDENT ).append( "- NULL_AS: " ).append( GPLoad.SINGLE_QUOTE ).append( meta.getNullAs() ).append( GPLoad.SINGLE_QUOTE ).append( Const.CR );
    }

    // TODO: implement escape character
    // TODO: test what happens when a single quote is specified- can we specify a single quiote within doubole quotes
    // then?
    String enclosure = meta.getEnclosure();

    // For enclosure we do a null check. !Const.isEmpty will be true if the string is empty.
    // it is ok to have an empty string
    if ( enclosure != null ) {
      enclosure = environmentSubstitute( meta.getEnclosure() );
    } else {
      enclosure = "";
    }
    contents.append( GPLoad.INDENT ).append( "- QUOTE: " ).append( GPLoad.SINGLE_QUOTE ).append( enclosure ).append(
        GPLoad.SINGLE_QUOTE ).append( Const.CR );
    contents.append( GPLoad.INDENT ).append( "- HEADER: FALSE" ).append( Const.CR );

    // ENCODING
    String encoding = meta.getEncoding();
    if ( !Const.isEmpty( encoding ) ) {
      contents.append( GPLoad.INDENT ).append( "- ENCODING: " ).append( encoding ).append( Const.CR );
    }

    // Max errors
    String maxErrors = meta.getMaxErrors();
    if ( maxErrors == null ) {
      maxErrors = GPLoadMeta.MAX_ERRORS_DEFAULT;
    } else {
      maxErrors = environmentSubstitute( maxErrors );
      try {
        if ( Integer.valueOf( maxErrors ) < 0 ) {
          throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MaxErrorsInvalid" ) );
        }
      } catch ( NumberFormatException nfe ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.MaxErrorsInvalid" ) );
      }
    }

    contents.append( GPLoad.INDENT ).append( "- ERROR_LIMIT: " ).append( maxErrors ).append( Const.CR );

    String errorTableName = meta.getErrorTableName();
    if ( !Const.isEmpty( errorTableName ) ) {
      errorTableName = environmentSubstitute( errorTableName ).trim();
      if ( !Const.isEmpty( errorTableName ) ) {
        contents.append( GPLoad.INDENT ).append( "- ERROR_TABLE: " ).append( errorTableName ).append( Const.CR );
      }
    }

    // -------------- OUTPUT section

    contents.append( GPLoad.INDENT ).append( "OUTPUT:" ).append( Const.CR );

    contents.append( GPLoad.INDENT ).append( "- TABLE: " ).append( targetTableName ).append( Const.CR );
    contents.append( GPLoad.INDENT ).append( "- MODE: " ).append( loadAction ).append( Const.CR );

    // TODO: MAPPING
    // TODO: add support for BEFORE and AFTER SQL

    // do the following block if the load action is an update or merge
    if ( loadAction.equals( GPLoadMeta.ACTION_UPDATE ) || loadAction.equals( GPLoadMeta.ACTION_MERGE ) ) {

      // if we have match columns then add the specification
      if ( meta.hasMatchColumn() ) {
        contents.append( GPLoad.INDENT ).append( "- MATCH_COLUMNS: " ).append( Const.CR );

        for ( int i = 0; i < matchColumn.length; i++ ) {
          if ( matchColumn[i] ) {
            contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
                databaseMeta.quoteField( tableFields[i] ) ).append( Const.CR );
          }
        }
      }

      // if we have update columns then add the specification
      if ( meta.hasUpdateColumn() ) {
        contents.append( GPLoad.INDENT ).append( "- UPDATE_COLUMNS: " ).append( Const.CR );

        for ( int i = 0; i < updateColumn.length; i++ ) {
          if ( updateColumn[i] ) {
            contents.append( GPLoad.INDENT ).append( GPLoad.INDENT ).append( GPLoad.SPACE_PADDED_DASH ).append(
                databaseMeta.quoteField( tableFields[i] ) ).append( Const.CR );
          }
        }
      }

      // if we have an update condition
      String updateCondition = meta.getUpdateCondition();
      if ( !Const.isEmpty( updateCondition ) ) {

        // replace carriage returns with spaces and trim the whole thing
        updateCondition = updateCondition.replaceAll( "[\r\n]", " " ).trim();

        // test the contents once again
        // the original contents may have just been linefeed/carriage returns
        if ( !Const.isEmpty( updateCondition ) ) {

          // we'll write out what we have
          contents.append( GPLoad.INDENT ).append( "- UPDATE_CONDITION: " ).append( GPLoad.DOUBLE_QUOTE ).append(
              updateCondition ).append( GPLoad.DOUBLE_QUOTE ).append( Const.CR );
        }
      }
    }

    return contents.toString();
  }

  /**
   * Create a control file.
   *
   * @param filename
   * @param meta
   * @throws KettleException
   */
  //zxs add  2017-12-13
  String commandL = null;
  //zxs add   end 2017-12-13
  public void createControlFile( GPLoadMeta meta ) throws KettleException {
    String filename = meta.getControlFile();
    //zxs add  2017-12-13
    int number = getUniqueStepNrAcrossSlaves();
    filename= filename.substring(0, filename.lastIndexOf("."))+number+filename.substring(filename.lastIndexOf("."), filename.length()); 
    commandL=filename;
    //zxs add   end 2017-12-13
    if ( Const.isEmpty( filename ) ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.NoControlFileSpecified" ) );
    } else {
      filename = environmentSubstitute( filename ).trim();
      if ( Const.isEmpty( filename ) ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.NoControlFileSpecified" ) );
      }
    }

    File controlFile = new File( filename );
    FileWriter fw = null;

    try {
      controlFile.createNewFile();
      fw = new FileWriter( controlFile );
      fw.write( getControlFileContents( meta, getInputRowMeta() ) );
    } catch ( IOException ex ) {
      throw new KettleException( ex.getMessage(), ex );
    } finally {
      try {
        if ( fw != null ) {
          fw.close();
        }
      } catch ( Exception ignored ) {
        // Ignore error
      }
    }
  }

  /**
   * Returns the path to the pathToFile. It should be the same as what was passed but this method will check the file
   * system to see if the path is valid.
   * 
   * @param pathToFile
   *          Path to the file to verify.
   * @param exceptionMessage
   *          The message to use when the path is not provided.
   * @param checkExistence
   *          When true the path's existence will be verified.
   * @return
   * @throws KettleException
   */
  private String getPath( String pathToFile, String exceptionMessage, boolean checkExistenceOfFile )
    throws KettleException {

    // Make sure the path is not empty
    if ( Const.isEmpty( pathToFile ) ) {
      throw new KettleException( exceptionMessage );
    }

    // make sure the variable substitution is not empty
    pathToFile = environmentSubstitute( pathToFile ).trim();
    if ( Const.isEmpty( pathToFile ) ) {
      throw new KettleException( exceptionMessage );
    }

    FileObject fileObject = KettleVFS.getFileObject( pathToFile, getTransMeta() );
    try {
      // we either check the existence of the file
      if ( checkExistenceOfFile ) {
        if ( !fileObject.exists() ) {
          throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Execption.FileDoesNotExist", pathToFile ) );
        }
      } else { // if the file does not have to exist, the parent, or source folder, does.
        FileObject parentFolder = fileObject.getParent();
        if ( parentFolder.exists() ) {
          return KettleVFS.getFilename( fileObject );
        } else {
          throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.DirectoryDoesNotExist",
              parentFolder.getURL().getPath() ) );
        }

      }

      // if Windows is the OS
      if ( Const.getOS().startsWith( "Windows" ) ) {
        return addQuotes( pathToFile );
      } else {
        return KettleVFS.getFilename( fileObject );
      }
    } catch ( FileSystemException fsex ) {
      throw new KettleException( BaseMessages.getString( PKG, "GPLoad.Exception.GPLoadCommandBuild", fsex.getMessage() ) );
    }
  }

  /**
   * Create the command line for GPLoad depending on the meta information supplied.
   * 
   * @param meta
   *          The meta data to create the command line from
   * @param password
   *          Use the real password or not
   * 
   * @return The string to execute.
   * 
   * @throws KettleException
   *           Upon any exception
   */
  public String createCommandLine( GPLoadMeta meta, boolean password ) throws KettleException {

    StringBuffer sbCommandLine = new StringBuffer( 300 );

    if ( Const.getOS().startsWith( "Windows" ) ) {
      sbCommandLine.append( "cmd /c " );
    }

    // get path to the executable
    sbCommandLine.append( getPath( meta.getGploadPath(), BaseMessages.getString( PKG,
        "GPLoad.Exception.GPLoadPathMisssing" ), true ) );
    // get
    sbCommandLine.append( " -v " );
    // get the path to the control file
    sbCommandLine.append( " -f " );
    sbCommandLine.append(commandL);

    // get the path to the log file, if specified
    String logfile = meta.getLogFile();
    if ( !Const.isEmpty( logfile ) ) {
      sbCommandLine.append( " -l " );
      sbCommandLine.append( getPath( meta.getLogFile(), BaseMessages.getString( PKG,
          "GPLoad.Exception.LogFilePathMissing" ), false ) );
    }
    return sbCommandLine.toString();
  }


private static final java.util.concurrent.Semaphore sp = new Semaphore(32);
    public boolean execute(GPLoadMeta meta, boolean wait) throws KettleException {
        String commandLine = null;
        Runtime rt = Runtime.getRuntime();
        int gpLoadExitVal = 0;

        try {

            commandLine = createCommandLine(meta, true);
            long toWait = System.currentTimeMillis();
            logBasic(BaseMessages.getString(PKG, "可用装载进程数:"+(sp.availablePermits())));
            sp.acquire();
            logBasic("Executing: " + commandLine);
            long toWaited = System.currentTimeMillis();
            logBasic(BaseMessages.getString(PKG, commandLine+"等待:"+(toWaited - toWait)));
            gploadProcess = rt.exec(commandLine);

            // any error message?
            StreamLogger errorLogger = new StreamLogger(gploadProcess.getErrorStream(), "ERROR");

            // any output?
            StreamLogger outputLogger = new StreamLogger(gploadProcess.getInputStream(), "OUTPUT");

            // kick them off
            errorLogger.start();
            outputLogger.start();

            if (wait) {
                // any error???
                gpLoadExitVal = gploadProcess.waitFor();
                sp.release();
                logBasic(BaseMessages.getString(PKG, "GPLoad.Log.ExitValuePsqlPath", "" + gpLoadExitVal));
                if (gpLoadExitVal != -0) {

                    throw new KettleException(
                            BaseMessages.getString(PKG, "GPLoad.Log.ExitValuePsqlPath", "" + gpLoadExitVal));
                }
            }
        } catch (KettleException ke) {
            throw ke;
        } catch (Exception ex) {
            // Don't throw the message upwards, the message contains the password.
            throw new KettleException("Error while executing \'" + commandLine + "\'. Exit value = " + gpLoadExitVal);
        }

        return true;
    }
//zxs add   2017-12-8
private long count =0 ;
//zxs add  end 2017-12-8
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        count++;
        meta = (GPLoadMeta) smi;
        data = (GPLoadData) sdi;
        try {
            Object[] r = getRow(); // Get row from input rowset & set row busy!
            // no more input to be expected...
            if (r == null) {
                setOutputDone();

                if (!preview) {
                    if (output != null) {
                        // Close the output
                        try {
                            output.close();
                        } catch (IOException e) {
                            throw new KettleException("Error while closing output", e);
                        }

                        output = null;
                    }

                    String loadMethod = meta.getLoadMethod();

                    // if it specified that we are to load at the end of processing
                    if (GPLoadMeta.METHOD_AUTO_END.equals(loadMethod)) {

                        // if we actually wrote at least one row
                        if (getLinesOutput() > 0) {
                            // we do this
                            createControlFile(meta);
                            execute(meta, true);
                        } else {
                            // we don't create a control file and execute
                            logBasic(BaseMessages.getString(PKG, "GPLoad.Info.NoRowsWritten"));
                        }
                    } else if (GPLoadMeta.METHOD_MANUAL.equals(loadMethod)) {

                        // we create the control file but do not execute
                        createControlFile(meta);
                        logBasic(BaseMessages.getString(PKG, "GPLoad.Info.MethodManual"));
                    } else {
                        throw new KettleException(
                                BaseMessages.getString(PKG, "GPload.Execption.UnhandledLoadMethod", loadMethod));
                    }
                }
                return false;
            }

            if (!preview) {

                //zxs add 2017-12-8
                int submitAmount =100000000;
                if(count%submitAmount==0) {
                    first =true;

                    try {
                        output.close();
                    } catch (IOException e) {
                        logError(BaseMessages.getString(PKG, "close file Exception ") + e.getMessage());
                        setErrors(1);
                        stopAll();
                        setOutputDone(); // signal end to receiver(s)
                        return false;
                    }
                    output = null;
                    // we do this
                    createControlFile(meta);
                    execute(meta, true);
                    int number = getUniqueStepNrAcrossSlaves();
                    String dataFile = meta.getDataFile();
                    dataFile = dataFile.substring(0, dataFile.lastIndexOf(".")) + number
                            + dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());
                    File fd = new File(dataFile);
                    if(fd.exists()) {
                        fd.delete();
                    }
                    String filename = meta.getControlFile();
                    filename = filename.substring(0, filename.lastIndexOf(".")) + number
                            + filename.substring(filename.lastIndexOf("."), filename.length());
                    File fc = new File(filename);
                    if(fc.exists()) {
                        fc.delete();
                    }


                }
                //zxs add  end 2017-12-8

                if (first) {
                    int number = getUniqueStepNrAcrossSlaves();
                    first = false;
                       //zxs add 2017-12-13
                    output = new GPLoadDataOutput(this, meta, log.getLogLevel(),number);
                      //zxs add 2017-12-13
                    // if ( GPLoadMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod()) )
                    // {
                    // execute(meta, false);
                    // }
                    output.open(this, gploadProcess);
                }
                output.writeLine(getInputRowMeta(), r);

            }
            putRow(getInputRowMeta(), r);
            incrementLinesOutput();

        } catch (KettleException e) {
            logError(BaseMessages.getString(PKG, "GPLoad.Log.ErrorInStep") + e.getMessage());
            setErrors(1);
            stopAll();
            setOutputDone(); // signal end to receiver(s)
            return false;
        }

        return true;
    }

  public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (GPLoadMeta) smi;
    data = (GPLoadData) sdi;

    Trans trans = getTrans();
    preview = trans.isPreview();

    if ( super.init( smi, sdi ) ) {
      return true;
    }
    return false;
  }

  public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (GPLoadMeta) smi;
    data = (GPLoadData) sdi;

    super.dispose( smi, sdi );

    if ( !preview && meta.isEraseFiles() ) {
      // Erase the created cfg/dat files if requested. We don't erase
      // the rest of the files because it would be "stupid" to erase them
      // right after creation. If you don't want them, don't fill them in.
      FileObject fileObject = null;

      String method = meta.getLoadMethod();
      if ( GPLoadMeta.METHOD_AUTO_END.equals( method ) ) {
        if ( meta.getControlFile() != null ) {
                    try {
                        // zxs add 2017-12-13
                        int number = getUniqueStepNrAcrossSlaves();
                        String filename = meta.getControlFile();
                        filename = filename.substring(0, filename.lastIndexOf(".")) + number
                                + filename.substring(filename.lastIndexOf("."), filename.length());
                        // fileObject = KettleVFS.getFileObject( environmentSubstitute(meta.getControlFile() ), getTransMeta() );
                        fileObject = KettleVFS.getFileObject(environmentSubstitute(filename),getTransMeta());

                        fileObject.delete();
                        fileObject.close();
                    } catch ( Exception ex ) {
            logError( "Error deleting control file \'" + KettleVFS.getFilename( fileObject ) + "\': " + ex.getMessage() );
          }
        }
      }

      if ( GPLoadMeta.METHOD_AUTO_END.equals( method ) ) {
        // In concurrent mode the data is written to the control file.
        if ( meta.getDataFile() != null ) {
          try {
              //zxs add 2017-12-13
              String dataFile = meta.getDataFile();
                int number = getUniqueStepNrAcrossSlaves();
              dataFile= dataFile.substring(0, dataFile.lastIndexOf("."))+number+dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());   
           //   fileObject = KettleVFS.getFileObject( environmentSubstitute( meta.getDataFile() ), getTransMeta() );
              fileObject = KettleVFS.getFileObject( environmentSubstitute(dataFile), getTransMeta() );
              //zxs add end 2017-12-13
            fileObject.delete();
            fileObject.close();
          } catch ( Exception ex ) {
            logError( "Error deleting data file \'" + KettleVFS.getFilename( fileObject ) + "\': " + ex.getMessage(),
                ex );
          }
        }
      }

      if ( GPLoadMeta.METHOD_MANUAL.equals( method ) ) {
        logBasic( "Deletion of files is not compatible with \'manual load method\'" );
      }
    }
  }

  /**
   * Adds quotes to the passed string if the OS is Windows and there is at least one space .
   * 
   * @param string
   * @return
   */
  private String addQuotes( String string ) {
    if ( Const.getOS().startsWith( "Windows" ) && string.indexOf( " " ) != -1 ) {
      string = "\"" + string + "\"";
    }
    return string;
  }

}
/*!
 * This program is free software; you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
 * Foundation.
 *
 * You should have received a copy of the GNU Lesser General Public License along with this
 * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
 * or from the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * Copyright (c) 2002-2016 Pentaho Corporation..  All rights reserved.
 */
package org.pentaho.di.trans.steps.gpload;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.i18n.BaseMessages;

/**
 * Does the opening of the output "stream". It's either a file or inter process communication which is transparent to
 * users of this class.
 * 
 * Copied from Sven Boden's Oracle version
 * 
 * @author Luke Lonergan
 * @since 28-mar-2008
 */
public class GPLoadDataOutput {
  private static Class<?> PKG = GPLoadDataOutput.class; // for i18n purposes, needed by Translator2!!

  protected LogChannelInterface log;

  private GPLoad gpLoad = null;
  private GPLoadMeta meta;
  private PrintWriter output = null;
  private boolean first = true;
  private int[] fieldNumbers = null;
  private String enclosure = null;
  private String delimiter = null;
  private SimpleDateFormat sdfDate = null;
  private SimpleDateFormat sdfDateTime = null;
  int number ;
  public GPLoadDataOutput( GPLoad gpLoad, GPLoadMeta meta,int number ) {
    this.meta = meta;
    this.gpLoad = gpLoad;
    //zxs add 2017-12-13
    this.number =number;
    //zxs add end 2017-12-13
  }

  public GPLoadDataOutput( GPLoad gpLoad, GPLoadMeta meta, LogLevel logLevel,int number ) {
    this( gpLoad, meta , number);
    log = new LogChannel( this );
    log.setLogLevel( logLevel );
  }

  public void open( VariableSpace space, Process sqlldrProcess ) throws KettleException {
    // String loadMethod = meta.getLoadMethod();
    try {
      OutputStream os = null;

      // if ( GPLoadMeta.METHOD_AUTO_CONCURRENT.equals(loadMethod)) {
      // String dataFile = meta.getControlFile();
      // dataFile = StringUtil.environmentSubstitute(dataFile);
      // os = new FileOutputStream(dataFile, true);
      // } else {
      // Else open the data file filled in.

      String dataFile = meta.getDataFile();
      //zxs add 2017-12-13
      dataFile= dataFile.substring(0, dataFile.lastIndexOf("."))+number+dataFile.substring(dataFile.lastIndexOf("."), dataFile.length());   
      //zxs add end 2017-12-13
      if ( Const.isEmpty( dataFile ) ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
      }

      dataFile = space.environmentSubstitute( dataFile );
      if ( Const.isEmpty( dataFile ) ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DataFileMissing" ) );
      }

      log.logDetailed( "Creating temporary load file " + dataFile );
      os = new FileOutputStream( dataFile, false );
      //

      String encoding = meta.getEncoding();
      if ( Const.isEmpty( encoding ) ) {
        // Use the default encoding.
        output = new PrintWriter( new BufferedWriter( new OutputStreamWriter( os ) ) );
      } else {
        // Use the specified encoding
        output = new PrintWriter( new BufferedWriter( new OutputStreamWriter( os, encoding ) ) );
      }
    } catch ( IOException e ) {
      throw new KettleException( "GPLoadDataOutput.Exception" + e.getMessage(), e );
    }
  }

  public void close() throws IOException {
    if ( output != null ) {
      output.close();
    }
  }

  PrintWriter getOutput() {
    return output;
  }

  protected void setOutput( PrintWriter output ) {
    this.output = output;
  }

  private String createEscapedString( String orig, String enclosure ) {
    StringBuffer buf = new StringBuffer( orig );

    Const.repl( buf, enclosure, enclosure + enclosure );
    return buf.toString();
  }

  public void writeLine( RowMetaInterface mi, Object[] row ) throws KettleException {
    if ( first ) {
      first = false;

      enclosure = meta.getEnclosure();
      if ( enclosure == null ) {
        enclosure = "";
      } else {
        enclosure = gpLoad.environmentSubstitute( enclosure );
      }

      delimiter = meta.getDelimiter();
      if ( delimiter == null ) {
        throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
      } else {
        delimiter = gpLoad.environmentSubstitute( delimiter );
        if ( Const.isEmpty( delimiter ) ) {
          throw new KettleException( BaseMessages.getString( PKG, "GPload.Exception.DelimiterMissing" ) );
        }
      }

      // Setup up the fields we need to take for each of the rows
      // as this speeds up processing.
      fieldNumbers = new int[meta.getFieldStream().length];
      for ( int i = 0; i < fieldNumbers.length; i++ ) {
        fieldNumbers[i] = mi.indexOfValue( meta.getFieldStream()[i] );
        if ( fieldNumbers[i] < 0 ) {
          throw new KettleException( BaseMessages.getString( PKG, "GPLoadDataOutput.Exception.FieldNotFound", meta
              .getFieldStream()[i] ) );
        }
      }

      sdfDate = new SimpleDateFormat( "yyyy-MM-dd" );
      sdfDateTime = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss.SSS" );
    }

    // Write the data to the output
    ValueMetaInterface v = null;
    int number = 0;

    for ( int i = 0; i < fieldNumbers.length; i++ ) {
      // TODO: variable substitution
      if ( i != 0 ) {
        output.print( delimiter );
      }
      number = fieldNumbers[i];
      v = mi.getValueMeta( number );
      if ( row[number] == null ) {
        // TODO (SB): special check for null in case of Strings.
        output.print( enclosure );
        output.print( enclosure );
      } else {
        switch ( v.getType() ) {
          case ValueMetaInterface.TYPE_STRING:
            String s = mi.getString( row, number );
            if ( s.indexOf( enclosure ) >= 0 ) {
              s = createEscapedString( s, enclosure );
            }
            output.print( enclosure );
            output.print( s );
            output.print( enclosure );
            break;
          case ValueMetaInterface.TYPE_INTEGER:
            Long l = mi.getInteger( row, number );
            if ( meta.getEncloseNumbers() ) {
              output.print( enclosure );
              output.print( l );
              output.print( enclosure );
            } else {
              output.print( l );
            }
            break;
          case ValueMetaInterface.TYPE_NUMBER:
            Double d = mi.getNumber( row, number );
            if ( meta.getEncloseNumbers() ) {
              output.print( enclosure );
              output.print( d );
              output.print( enclosure );
            } else {
              output.print( d );
            }
            break;
          case ValueMetaInterface.TYPE_BIGNUMBER:
            BigDecimal bd = mi.getBigNumber( row, number );
            if ( meta.getEncloseNumbers() ) {
              output.print( enclosure );
              output.print( bd );
              output.print( enclosure );
            } else {
              output.print( bd );
            }
            break;
          case ValueMetaInterface.TYPE_DATE:
            Date dt = mi.getDate( row, number );
            output.print( enclosure );
            output.print( sdfDate.format( dt ) );
            output.print( enclosure );
            break;
          case ValueMetaInterface.TYPE_BOOLEAN:
            Boolean b = mi.getBoolean( row, number );
            output.print( enclosure );
            if ( b.booleanValue() ) {
              output.print( "Y" );
            } else {
              output.print( "N" );
            }
            output.print( enclosure );
            break;
          case ValueMetaInterface.TYPE_BINARY:
            byte[] byt = mi.getBinary( row, number );
            output.print( "<startlob>" );
            output.print( byt );
            output.print( "<endlob>" );
            break;
          case ValueMetaInterface.TYPE_TIMESTAMP:
            Date time = mi.getDate( row, number );
            output.print( enclosure );
            output.print( sdfDateTime.format( time ) );
            output.print( enclosure );
            break;
          default:
            throw new KettleException( BaseMessages.getString( PKG, "GPLoadDataOutput.Exception.TypeNotSupported", v
                .getType() ) );
        }
      }
    }
    output.print( Const.CR );
  }
}

代码修改结束
3.一直纠缠的错误

connection with gpfdist failed for gpfdist://192.168.12.94:8011//shsnc/dmp/software/gploadzxs_testd_32zxs_test31912057.dat. effective url: http://192.168.12.94:8011//shsnc/dmp/software/

4.以上错误通过修改客户端的gpload.py 文件 将时间调大

5.net.core.somaxconn的作用

net.core.somaxconn是Linux中的一个kernel参数,表示socket监听(listen)的backlog上限。什么是backlog呢?backlog就是socket的监听队列,当一个请求(request)尚未被处理或建立时,他会进入backlog。而socket server可以一次性处理backlog中的所有请求,处理后的请求不再位于监听队列中。当server处理请求较慢,以至于监听队列被填满后,新来的请求会被拒绝。

    在Hadoop 1.0中,参数ipc.server.listen.queue.size控制了服务端socket的监听队列长度,即backlog长度,默认值是128。而Linux的参数net.core.somaxconn默认值同样为128。当服务端繁忙时,如NameNode或JobTracker,128是远远不够的。这样就需要增大backlog,例如我们的3000台集群就将ipc.server.listen.queue.size设成了32768,为了使得整个参数达到预期效果,同样需要将kernel参数net.core.somaxconn设成一个大于等于32768的值。

如何修改net.core.somaxconn

Linux中可以工具syctl来动态调整所有的kernel参数。所谓动态调整就是kernel参数值修改后即时生效。但是这个生效仅限于os层面,对于Hadoop来说,必须重启应用才能生效。

命令

sysctl -a

会显示所有的kernel参数及值。

修改参数值的语法

sysctl -w net.core.somaxconn=32768

以上命令将kernel参数net.core.somaxconn的值改成了32768。这样的改动虽然可以立即生效,但是重启机器后会恢复默认值。为了永久保留改动,需要用vi在/etc/sysctl.conf中增加一行

net.core.somaxconn= 4000

然后执行命令

sysctl -p

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
kettle-sap-plugin-core是一个针对Kettle(现在被称为Pentaho Data Integration)的插件核心库。Kettle是一种用于数据集成和转换的开源工具,拥有强大的ETL(Extract, Transform, Load)功能。而kettle-sap-plugin-core则是Kettle插件的核心库之一,专门用于与SAP系统进行集成。 SAP(Systems, Applications and Products in Data Processing)是一家全球领先的企业级软件公司,其产品广泛应用于各种业务领域,包括财务会计、供应链管理、人力资源管理等。kettle-sap-plugin-core提供了一系列用于连接和与SAP系统交互的功能组件,方便用户在Kettle中进行SAP数据的抽取、转换和加载。 这个插件核心库支持与SAP系统的各种模块进行集成,如SAP ERP(Enterprise Resource Planning)、SAP BW(Business Warehouse)、SAP HANA等。用户可以通过kettle-sap-plugin-core,使用Kettle的图形化界面来配置和管理与SAP系统之间的数据传输、转换和同步任务。 kettle-sap-plugin-core具有以下特点: 1. 支持SAP系统的多种连接方式,包括JCo(Java Connector)、BAPI(Business Application Programming Interface)等。 2. 提供了丰富的连接器,用于与SAP系统的不同模块进行交互,如SAP输入、SAP数据输出、SAP函数调用等。 3. 支持对SAP数据的抽取、转换和加载,提供了多种数据转换和处理操作,如数据映射、过滤、排序、聚合等。 4. 具有高度可扩展性,用户可以根据自己的需求进行插件的定制和扩展。 总之,kettle-sap-plugin-core是一个在Kettle实现与SAP系统集成的重要插件核心库,方便用户进行ETL任务的开发和管理,实现SAP数据的快速、高效地处理。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值