org.apache.hadoop.streaming
Class StreamJob

java.lang.Object
  extended by org.apache.hadoop.streaming.StreamJob
All Implemented Interfaces:
Configurable, Tool

public class StreamJob
extends java.lang.Object
implements Tool

All the client-side work happens here. (Jar packaging, MapRed job submission and monitoring)


Field Summary
protected  java.lang.String additionalConfSpec_
           
protected  java.lang.String addTaskEnvironment_
           
protected  java.net.URI[] archiveURIs
           
protected  java.lang.String[] argv_
           
protected  java.lang.String cacheArchives
           
protected  java.lang.String cacheFiles
           
protected  java.lang.String comCmd_
           
protected  Configuration config_
           
protected  int debug_
           
protected  boolean detailedUsage_
           
protected  Environment env_
           
protected  java.net.URI[] fileURIs
           
protected  boolean hasSimpleInputSpecs_
           
protected  java.lang.String inputFormatSpec_
           
protected  java.util.ArrayList inputSpecs_
           
protected  java.lang.String inReaderSpec_
           
protected  java.lang.String jar_
           
protected  JobClient jc_
           
protected  JobConf jobConf_
           
protected  JobID jobId_
           
protected static java.lang.String LINK_URI
           
protected  boolean localHadoop_
           
protected static org.apache.commons.logging.Log LOG
           
protected  java.lang.String mapCmd_
           
protected  java.lang.String mapDebugSpec_
           
protected  long minRecWrittenToEnableSkip_
           
protected  java.lang.String numReduceTasksSpec_
           
protected  java.lang.String output_
           
protected  java.lang.String outputFormatSpec_
           
protected  boolean outputSingleNode_
           
protected  java.util.ArrayList packageFiles_
           
protected  java.lang.String partitionerSpec_
           
protected  java.lang.String redCmd_
           
protected  java.lang.String reduceDebugSpec_
           
protected  RunningJob running_
           
protected  java.util.TreeSet seenPrimary_
           
protected  java.util.ArrayList shippedCanonFiles_
           
protected  boolean verbose_
           
 
Constructor Summary
StreamJob()
           
StreamJob(java.lang.String[] argv, boolean mayExit)
          Deprecated. use StreamJob() with ToolRunner or set the Configuration using setConf(Configuration) and run with run(String[]).
 
Method Summary
static JobConf createJob(java.lang.String[] argv)
          This method creates a streaming job from the given argument list.
 void exitUsage(boolean detailed)
           
 void fail(java.lang.String message)
           
protected  java.lang.String getClusterNick()
          Deprecated. 
 Configuration getConf()
          Return the configuration used by this object.
protected  java.lang.String getHadoopClientHome()
           
protected  java.lang.String getJobTrackerHostPort()
           
protected  void getURIs(java.lang.String lcacheArchives, java.lang.String lcacheFiles)
          get the uris of all the files/caches
 int go()
          Deprecated. use run(String[]) instead.
protected  void init()
           
protected  boolean isLocalHadoop()
           
protected  void jobInfo()
           
protected  void listJobConfProperties()
          Prints out the jobconf properties on stdout when verbose is specified.
protected  void msg(java.lang.String msg)
           
protected  java.lang.String packageJobJar()
           
 int run(java.lang.String[] args)
          Execute the command with the given arguments.
 void setConf(Configuration conf)
          Set the configuration to be used by this object.
protected  void setJobConf()
           
 int submitAndMonitorJob()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

protected static final org.apache.commons.logging.Log LOG

argv_

protected java.lang.String[] argv_

verbose_

protected boolean verbose_

detailedUsage_

protected boolean detailedUsage_

debug_

protected int debug_

env_

protected Environment env_

jar_

protected java.lang.String jar_

localHadoop_

protected boolean localHadoop_

config_

protected Configuration config_

jobConf_

protected JobConf jobConf_

jc_

protected JobClient jc_

inputSpecs_

protected java.util.ArrayList inputSpecs_

seenPrimary_

protected java.util.TreeSet seenPrimary_

hasSimpleInputSpecs_

protected boolean hasSimpleInputSpecs_

packageFiles_

protected java.util.ArrayList packageFiles_

shippedCanonFiles_

protected java.util.ArrayList shippedCanonFiles_

output_

protected java.lang.String output_

mapCmd_

protected java.lang.String mapCmd_

comCmd_

protected java.lang.String comCmd_

redCmd_

protected java.lang.String redCmd_

cacheFiles

protected java.lang.String cacheFiles

cacheArchives

protected java.lang.String cacheArchives

fileURIs

protected java.net.URI[] fileURIs

archiveURIs

protected java.net.URI[] archiveURIs

inReaderSpec_

protected java.lang.String inReaderSpec_

inputFormatSpec_

protected java.lang.String inputFormatSpec_

outputFormatSpec_

protected java.lang.String outputFormatSpec_

partitionerSpec_

protected java.lang.String partitionerSpec_

numReduceTasksSpec_

protected java.lang.String numReduceTasksSpec_

additionalConfSpec_

protected java.lang.String additionalConfSpec_

mapDebugSpec_

protected java.lang.String mapDebugSpec_

reduceDebugSpec_

protected java.lang.String reduceDebugSpec_

addTaskEnvironment_

protected java.lang.String addTaskEnvironment_

outputSingleNode_

protected boolean outputSingleNode_

minRecWrittenToEnableSkip_

protected long minRecWrittenToEnableSkip_

running_

protected RunningJob running_

jobId_

protected JobID jobId_

LINK_URI

protected static final java.lang.String LINK_URI
See Also:
Constant Field Values
Constructor Detail

StreamJob

@Deprecated
public StreamJob(java.lang.String[] argv,
                            boolean mayExit)
Deprecated. use StreamJob() with ToolRunner or set the Configuration using setConf(Configuration) and run with run(String[]).


StreamJob

public StreamJob()
Method Detail

getConf

public Configuration getConf()
Description copied from interface: Configurable
Return the configuration used by this object.

Specified by:
getConf in interface Configurable

setConf

public void setConf(Configuration conf)
Description copied from interface: Configurable
Set the configuration to be used by this object.

Specified by:
setConf in interface Configurable

run

public int run(java.lang.String[] args)
        throws java.lang.Exception
Description copied from interface: Tool
Execute the command with the given arguments.

Specified by:
run in interface Tool
Parameters:
args - command specific arguments.
Returns:
exit code.
Throws:
java.lang.Exception

createJob

public static JobConf createJob(java.lang.String[] argv)
                         throws java.io.IOException
This method creates a streaming job from the given argument list. The created object can be used and/or submitted to a jobtracker for execution by a job agent such as JobControl

Parameters:
argv - the list args for creating a streaming job
Returns:
the created JobConf object
Throws:
java.io.IOException

go

@Deprecated
public int go()
       throws java.io.IOException
Deprecated. use run(String[]) instead.

This is the method that actually intializes the job conf and submits the job to the jobtracker

Throws:
java.io.IOException

init

protected void init()

msg

protected void msg(java.lang.String msg)

exitUsage

public void exitUsage(boolean detailed)

fail

public void fail(java.lang.String message)

getHadoopClientHome

protected java.lang.String getHadoopClientHome()

isLocalHadoop

protected boolean isLocalHadoop()

getClusterNick

@Deprecated
protected java.lang.String getClusterNick()
Deprecated. 


packageJobJar

protected java.lang.String packageJobJar()
                                  throws java.io.IOException
Returns:
path to the created Jar file or null if no files are necessary.
Throws:
java.io.IOException

getURIs

protected void getURIs(java.lang.String lcacheArchives,
                       java.lang.String lcacheFiles)
get the uris of all the files/caches


setJobConf

protected void setJobConf()
                   throws java.io.IOException
Throws:
java.io.IOException

listJobConfProperties

protected void listJobConfProperties()
Prints out the jobconf properties on stdout when verbose is specified.


getJobTrackerHostPort

protected java.lang.String getJobTrackerHostPort()

jobInfo

protected void jobInfo()

submitAndMonitorJob

public int submitAndMonitorJob()
                        throws java.io.IOException
Throws:
java.io.IOException


Copyright © 2009 The Apache Software Foundation