Skip to content

Instantly share code, notes, and snippets.

@dilip
Created December 5, 2011 04:28
Show Gist options
  • Select an option

  • Save dilip/1432301 to your computer and use it in GitHub Desktop.

Select an option

Save dilip/1432301 to your computer and use it in GitHub Desktop.
Patches to hadoop 0.20.2chd3u2 for hierarchical hive partitions
--- ./src/mapred/org/apache/hadoop/mapred/FileInputFormat.java 2011-12-02 16:10:39.000000000 -0800
+++ /Users/dilipjoseph/Downloads/hadoop-0.20.2-cdh3u2/src/mapred/org/apache/hadoop/mapred/FileInputFormat.java 2011-10-14 01:39:58.000000000 -0700
@@ -139,31 +139,6 @@ public abstract class FileInputFormat<K,
ReflectionUtils.newInstance(filterClass, conf) : null;
}
- /**
- * Add files in the input path recursively into the results.
- * @param result
- * The List to store all files.
- * @param fs
- * The FileSystem.
- * @param path
- * The input path.
- * @param inputFilter
- * The input filter that can be used to filter files/dirs.
- * @throws IOException
- */
- protected void addInputPathRecursively(List<FileStatus> result,
- FileSystem fs, Path path, PathFilter inputFilter)
- throws IOException {
- for(FileStatus stat: fs.listStatus(path, inputFilter)) {
- if (stat.isDir()) {
- addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
- } else {
- result.add(stat);
- }
- }
- }
-
-
/** List input directories.
* Subclasses may override to, e.g., select only files matching a regular
* expression.
@@ -180,9 +155,6 @@ public abstract class FileInputFormat<K,
// get tokens for all the required FileSystems..
TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);
-
- // Whether we need to recursive look into the directory structure
- boolean recursive = job.getBoolean("mapred.input.dir.recursive", false);
List<FileStatus> result = new ArrayList<FileStatus>();
List<IOException> errors = new ArrayList<IOException>();
@@ -209,11 +181,7 @@ public abstract class FileInputFormat<K,
if (globStat.isDir()) {
for(FileStatus stat: fs.listStatus(globStat.getPath(),
inputFilter)) {
- if (recursive && stat.isDir()) {
- addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
- } else {
- result.add(stat);
- }
+ result.add(stat);
}
} else {
result.add(globStat);
--- ./src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java 2011-12-02 16:13:41.000000000 -0800
+++ /Users/dilipjoseph/Downloads/hadoop-0.20.2-cdh3u2/src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java 2011-10-14 01:39:58.000000000 -0700
@@ -173,30 +173,6 @@ public abstract class FileInputFormat<K,
(PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
}
- /**
- * Add files in the input path recursively into the results.
- * @param result
- * The List to store all files.
- * @param fs
- * The FileSystem.
- * @param path
- * The input path.
- * @param inputFilter
- * The input filter that can be used to filter files/dirs.
- * @throws IOException
- */
- protected void addInputPathRecursively(List<FileStatus> result,
- FileSystem fs, Path path, PathFilter inputFilter)
- throws IOException {
- for(FileStatus stat: fs.listStatus(path, inputFilter)) {
- if (stat.isDir()) {
- addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
- } else {
- result.add(stat);
- }
- }
- }
-
/** List input directories.
* Subclasses may override to, e.g., select only files matching a regular
* expression.
@@ -217,9 +193,6 @@ public abstract class FileInputFormat<K,
TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
job.getConfiguration());
- // Whether we need to recursive look into the directory structure
- boolean recursive = job.getConfiguration().getBoolean("mapred.input.dir.recursive", false);
-
List<IOException> errors = new ArrayList<IOException>();
// creates a MultiPathFilter with the hiddenFileFilter and the
@@ -245,11 +218,7 @@ public abstract class FileInputFormat<K,
if (globStat.isDir()) {
for(FileStatus stat: fs.listStatus(globStat.getPath(),
inputFilter)) {
- if (recursive && stat.isDir()) {
- addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
- } else {
- result.add(stat);
- }
+ result.add(stat);
}
} else {
result.add(globStat);
~
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment