Point 1:pagerank.txt————–计算样本
A B D
B C
C A B
D B C
Point 2:Node.class
private double pageRank = 1.0;
private String[] adjacentNodeNames;
public static final char fieldSeparator = '\t';
public double getPageRank() {
return pageRank;
}
public Node setPageRank(double pageRank) {
this.pageRank = pageRank;
return this;
}
public String[] getAdjacentNodeNames() {
return adjacentNodeNames;
}
public Node setAdjacentNodeNames(String[] adjacentNodeNames) {
this.adjacentNodeNames = adjacentNodeNames;
return this;
}
public boolean containsAdjacentNodes() {
return adjacentNodeNames != null && adjacentNodeNames.length > 0;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(pageRank);
if (getAdjacentNodeNames() != null) {
sb.append(fieldSeparator).append(
StringUtils.join(getAdjacentNodeNames(), fieldSeparator));
}
return sb.toString();
}
public static Node fromMR(String value) throws IOException {
String[] parts = StringUtils.splitPreserveAllTokens(value,
fieldSeparator);
if (parts.length < 1) {
throw new IOException("Expected 1 or more parts but received "
+ parts.length);
}
Node node = new Node().setPageRank(Double.valueOf(parts[0]));
if (parts.length > 1) {
node.setAdjacentNodeNames(Arrays.copyOfRange(parts, 1, parts.length));
}
return node;
}
Point 3:Mapper
static class PageRankMapper extends Mapper<Text, Text, Text, Text> {
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
int runCount = context.getConfiguration().getInt("runCount", 1);
String page = key.toString();
Node node = null;
if (runCount == 1) {
node = Node.fromMR("1.0" + "\t" + value.toString());
} else {
node = Node.fromMR(value.toString());
}
// A 1.0 B D
context.write(new Text(page), new Text(node.toString()));
if (node.containsAdjacentNodes()) {
double outValue = node.getPageRank() / node.getAdjacentNodeNames().length;
for (int i = 0; i < node.getAdjacentNodeNames().length; i++) {
String outPage = node.getAdjacentNodeNames()[i];
// B 0.5
// D 0.5
context.write(new Text(outPage), new Text(outValue + ""));
}
}
}
}
Point 4:Reducer
static class PageRankReducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> iterable, Context context)
throws IOException, InterruptedException {
double sum = 0.0;
Node sourceNode = null;
for (Text i : iterable) {
Node node = Node.fromMR(i.toString());
if (node.containsAdjacentNodes()) {
sourceNode = node;
} else {
sum = sum + node.getPageRank();
}
}
// 4为页面总数
double newPR = (0.15 / 4.0) + (0.85 * sum);
System.out.println("*********** new pageRank value is " + newPR);
// 把新的pr值和计算之前的pr比较
double d = newPR - sourceNode.getPageRank();
int j = (int) (d * 1000.0);
j = Math.abs(j);
System.out.println(j + "___________");
context.getCounter(Mycounter.my).increment(j);
sourceNode.setPageRank(newPR);
context.write(key, new Text(sourceNode.toString()));
}
}
Point 5:MapReduce—mian
public class RunJob {
public static enum Mycounter {
my
}
public static void main(String[] args) {
Configuration config = new Configuration();
// config.set("fs.defaultFS", "hdfs://node1:8020");
// config.set("yarn.resourcemanager.hostname", "node3");
double d = 0.001;
int i = 0;
while (true) {
i++;
try {
config.setInt("runCount", i);
FileSystem fs = FileSystem.get(config);
Job job = Job.getInstance(config);
job.setJarByClass(RunJob.class);
job.setJobName("pr" + i);
job.setMapperClass(PageRankMapper.class);
job.setReducerClass(PageRankReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
Path inputPath = new Path("/data/pagerank.txt");
if (i > 1) {
inputPath = new Path("/pagerank/pr" + (i - 1));
}
FileInputFormat.addInputPath(job, inputPath);
Path outpath = new Path("/pagerank/pr" + i);
if (fs.exists(outpath)) {
fs.delete(outpath, true);
}
FileOutputFormat.setOutputPath(job, outpath);
boolean f = job.waitForCompletion(true);
if (f) {
System.out.println("success.");
long sum = job.getCounters().findCounter(Mycounter.my).getValue();
System.out.println(sum);
double avgd = sum / 4000.0;
if (avgd < d) {
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}