Skip to content

Commit f8ca383

Browse files
author
jiayuhan-it
committed
Merge branch 'work/history-rank' into 'master'
add role and rank in job history page See merge request xt_hadoop/hbox!36
2 parents 14e44fe + 8f5796e commit f8ca383

File tree

10 files changed

+361
-317
lines changed

10 files changed

+361
-317
lines changed

common/src/main/java/net/qihoo/hbox/api/HboxConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ public interface HboxConstants {
2727

2828
String SCHEDULER = "scheduler";
2929

30+
String SERVER = "server";
31+
3032
String STREAM_INPUT_DIR = "mapreduce.input.fileinputformat.inputdir";
3133

3234
String STREAM_OUTPUT_DIR = "mapreduce.output.fileoutputformat.outputdir";

common/src/main/java/net/qihoo/hbox/common/AMParams.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ public interface AMParams {
2929

3030
public static final String CONTAINER_ROLE = "container.role";
3131

32+
public static final String CONTAINER_RANK = "container.rank";
33+
3234
public static final String CONTAINER_GPU_DEVICE = "container.gpu.device";
3335

3436
public static final String CONTAINER_LOG_ADDRESS = "container.log.address";

core/libexec/hbox-common-env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ HBOX_CLIENT_OPTS=("-Xmx1024m")
5757
__find_hbox_jar() {
5858
local jars=() pattern="${1:?usage __find_hbox_jar <find-name-pattern>}" full_hbox_home
5959
full_hbox_home=$(cd -- "$HBOX_HOME" && pwd) || return 66
60-
readarray -t jars < <(cd / && find "$full_hbox_home/" -maxdepth 1 -name "hbox-core-*.jar")
60+
readarray -t jars < <(cd / && find "$full_hbox_home/" -maxdepth 1 -name "$pattern")
6161
if ((${#jars[@]} == 0)); then
6262
echo "[ERROR] Failed to find $pattern in $HBOX_HOME/lib." >&2
6363
return 66

core/src/main/java/net/qihoo/hbox/AM/ApplicationMaster.java

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,10 +594,29 @@ public void run() {
594594
Map<HboxContainerId, String> containersAppFinishTime =
595595
applicationContext.getContainersAppFinishTime();
596596

597+
// container info schema (since 1.3a b30b967d7c283e8ac542b66413fcc2bc5c5fb95c):
598+
// 0: node url
599+
// 1: gpu device id
600+
// 2: role
601+
// 3: status
602+
// 4: cpu metrics
603+
// 5: gpu mem metrics
604+
// 6: gpu util metrics
605+
// 7: start time
606+
// 8: finish time
607+
// 9: percent progress
608+
// 10: log url
609+
// 11: stats - cpu
610+
// 12: stats - gpu mem
611+
// 13: stats - gpu utils
612+
// 14: stats - mem usage warn (if cpuStatistics.size > 0 || version >= 1.9.2)
613+
// 15: rank (since 1.9.2)
614+
int workerIdx = 0;
597615
for (Container container : workerContainers) {
598616
List<String> containerMessage = new ArrayList<>();
599617
containerMessage.add(container.getNodeHttpAddress());
600618
HboxContainerId currentContainerID = new HboxContainerId(container.getId());
619+
String rank = "-";
601620
if (applicationContext.getContainerGPUDevice(currentContainerID) != null) {
602621
if (applicationContext
603622
.getContainerGPUDevice(currentContainerID)
@@ -617,6 +636,7 @@ public void run() {
617636
containerMessage.add(HboxConstants.CHIEF);
618637
} else {
619638
containerMessage.add(HboxConstants.WORKER);
639+
rank = "" + workerIdx++;
620640
}
621641

622642
HboxContainerStatus status = applicationContext.getContainerStatus(currentContainerID);
@@ -677,6 +697,8 @@ public void run() {
677697
} else {
678698
usageStatistics.add("false");
679699
}
700+
} else {
701+
usageStatistics.add("-"); // container info schema idx=14
680702
}
681703

682704
if (containersAppStartTime.get(currentContainerID) != null
@@ -729,13 +751,16 @@ public void run() {
729751
container.getId().toString(),
730752
userName));
731753
containerMessage.addAll(usageStatistics);
754+
containerMessage.add(rank); // container info schema idx=15
732755
logMessage.put(container.getId().toString(), containerMessage);
733756
}
734757

758+
int psIdx = 0;
735759
for (Container container : psContainers) {
736760
List<String> containerMessage = new ArrayList<>();
737761
containerMessage.add(container.getNodeHttpAddress());
738762
HboxContainerId currentContainerID = new HboxContainerId(container.getId());
763+
String rank = "-";
739764
if (applicationContext.getContainerGPUDevice(currentContainerID) != null) {
740765
if (applicationContext
741766
.getContainerGPUDevice(currentContainerID)
@@ -750,17 +775,22 @@ public void run() {
750775
containerMessage.add("-");
751776
}
752777
if (hboxAppType.equals("TENSORFLOW") || "TENSOR2TENSOR".equals(hboxAppType)) {
753-
containerMessage.add("ps");
778+
containerMessage.add(HboxConstants.PS);
779+
rank = "" + psIdx++;
754780
} else if (hboxAppType.equals("MXNET")
755781
|| hboxAppType.equals("DISTLIGHTLDA")
756782
|| hboxAppType.equals("XFLOW")) {
757-
containerMessage.add("server");
783+
containerMessage.add(HboxConstants.SERVER);
758784
} else if (hboxAppType.equals("XDL")) {
759785
if (currentContainerID.toString().equals(schedulerContainerId)) {
760786
containerMessage.add(HboxConstants.SCHEDULER);
761787
} else {
762-
containerMessage.add("ps");
788+
containerMessage.add(HboxConstants.PS);
789+
rank = "" + psIdx++;
763790
}
791+
} else {
792+
containerMessage.add(HboxConstants.PS);
793+
rank = "" + psIdx++;
764794
}
765795
HboxContainerStatus status = applicationContext.getContainerStatus(currentContainerID);
766796
if (status != null) {
@@ -814,6 +844,8 @@ public void run() {
814844
} else {
815845
usageStatistics.add("false");
816846
}
847+
} else {
848+
usageStatistics.add("-"); // container info schema idx=14
817849
}
818850

819851
if (containersAppStartTime.get(currentContainerID) != null
@@ -841,6 +873,7 @@ public void run() {
841873
container.getId().toString(),
842874
userName));
843875
containerMessage.addAll(usageStatistics);
876+
containerMessage.add(rank); // container info schema idx=15
844877
logMessage.put(container.getId().toString(), containerMessage);
845878
}
846879

0 commit comments

Comments
 (0)