mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
Worker <> GPU Mapping Information in Dashboard (#9163)
This commit is contained in:
parent
4ba4110dec
commit
ca54da66b6
5 changed files with 182 additions and 81 deletions
|
@ -0,0 +1,5 @@
|
|||
import { styled, Typography } from "@material-ui/core";
|
||||
|
||||
export const RightPaddedTypography = styled(Typography)(({ theme }) => ({
|
||||
paddingRight: theme.spacing(1),
|
||||
}));
|
|
@ -1,4 +1,4 @@
|
|||
import { createStyles, Theme, withStyles, WithStyles } from "@material-ui/core";
|
||||
import { createStyles, makeStyles, Theme, Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
|
||||
const blend = (
|
||||
|
@ -11,58 +11,58 @@ const blend = (
|
|||
b1 * (1 - ratio) + b2 * ratio,
|
||||
];
|
||||
|
||||
const styles = (theme: Theme) =>
|
||||
const useUsageBarStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
root: {
|
||||
borderColor: theme.palette.divider,
|
||||
borderStyle: "solid",
|
||||
borderWidth: 1,
|
||||
display: "flex",
|
||||
flexGrow: 1,
|
||||
},
|
||||
inner: {
|
||||
paddingLeft: theme.spacing(1),
|
||||
paddingRight: theme.spacing(1),
|
||||
},
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
type Props = {
|
||||
type UsageBarProps = {
|
||||
percent: number;
|
||||
text: string;
|
||||
};
|
||||
|
||||
class UsageBar extends React.Component<Props & WithStyles<typeof styles>> {
|
||||
render() {
|
||||
const { classes, text } = this.props;
|
||||
const UsageBar: React.FC<UsageBarProps> = ({ percent, text }) => {
|
||||
const classes = useUsageBarStyles();
|
||||
const safePercent = Math.max(Math.min(percent, 100), 0);
|
||||
const minColor = [0, 255, 0];
|
||||
const maxColor = [255, 0, 0];
|
||||
|
||||
let { percent } = this.props;
|
||||
percent = Math.max(percent, 0);
|
||||
percent = Math.min(percent, 100);
|
||||
const leftColor = minColor;
|
||||
const rightColor = blend(minColor, maxColor, safePercent / 100);
|
||||
const alpha = 0.2;
|
||||
|
||||
const minColor = [0, 255, 0];
|
||||
const maxColor = [255, 0, 0];
|
||||
const gradient = `
|
||||
linear-gradient(
|
||||
to right,
|
||||
rgba(${leftColor.join(",")}, ${alpha}) 0%,
|
||||
rgba(${rightColor.join(",")}, ${alpha}) ${safePercent}%,
|
||||
transparent ${safePercent}%
|
||||
)
|
||||
`;
|
||||
|
||||
const leftColor = minColor;
|
||||
const rightColor = blend(minColor, maxColor, percent / 100);
|
||||
const alpha = 0.2;
|
||||
// Use a nested `span` here because the right border is affected by the
|
||||
// gradient background otherwise.
|
||||
return (
|
||||
<span className={classes.root}>
|
||||
<span
|
||||
className={classes.inner}
|
||||
style={{ background: gradient, flexGrow: 1 }}
|
||||
>
|
||||
<Typography align="center">{text}</Typography>
|
||||
</span>
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
||||
const gradient = `
|
||||
linear-gradient(
|
||||
to right,
|
||||
rgba(${leftColor.join(",")}, ${alpha}) 0%,
|
||||
rgba(${rightColor.join(",")}, ${alpha}) ${percent}%,
|
||||
transparent ${percent}%
|
||||
)
|
||||
`;
|
||||
|
||||
// Use a nested `div` here because the right border is affected by the
|
||||
// gradient background otherwise.
|
||||
return (
|
||||
<div className={classes.root}>
|
||||
<div className={classes.inner} style={{ background: gradient }}>
|
||||
{text}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export default withStyles(styles)(UsageBar);
|
||||
export default UsageBar;
|
||||
|
|
|
@ -21,6 +21,9 @@ export const formatUsage = (
|
|||
export const MiBRatio = (used: number, total: number) =>
|
||||
`${used} MiB / ${total} MiB (${(100 * (used / total)).toFixed(1)}%)`;
|
||||
|
||||
export const MiBRatioNoPercent = (used: number, total: number) =>
|
||||
`${used} MiB / ${total} MiB`;
|
||||
|
||||
export const formatDuration = (durationInSeconds: number) => {
|
||||
const durationSeconds = Math.floor(durationInSeconds) % 60;
|
||||
const durationMinutes = Math.floor(durationInSeconds / 60) % 60;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import { Typography } from "@material-ui/core";
|
||||
import { Box, Tooltip, Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { GPUStats, ResourceSlot } from "../../../../api";
|
||||
import { RightPaddedTypography } from "../../../../common/CustomTypography";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { getWeightedAverage, sum } from "../../../../common/util";
|
||||
import {
|
||||
|
@ -9,9 +11,14 @@ import {
|
|||
WorkerFeatureComponent,
|
||||
} from "./types";
|
||||
|
||||
const GPU_COL_WIDTH = 120;
|
||||
|
||||
const clusterUtilization = (nodes: Array<Node>): number => {
|
||||
const utils = nodes
|
||||
.map((node) => ({ weight: node.gpus.length, value: nodeUtilization(node) }))
|
||||
.map((node) => ({
|
||||
weight: node.gpus.length,
|
||||
value: nodeAverageUtilization(node),
|
||||
}))
|
||||
.filter((util) => !isNaN(util.value));
|
||||
if (utils.length === 0) {
|
||||
return NaN;
|
||||
|
@ -19,7 +26,7 @@ const clusterUtilization = (nodes: Array<Node>): number => {
|
|||
return getWeightedAverage(utils);
|
||||
};
|
||||
|
||||
const nodeUtilization = (node: Node): number => {
|
||||
const nodeAverageUtilization = (node: Node): number => {
|
||||
if (!node.gpus || node.gpus.length === 0) {
|
||||
return NaN;
|
||||
}
|
||||
|
@ -31,7 +38,7 @@ const nodeUtilization = (node: Node): number => {
|
|||
export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
|
||||
const clusterAverageUtilization = clusterUtilization(nodes);
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
<div style={{ minWidth: GPU_COL_WIDTH }}>
|
||||
{isNaN(clusterAverageUtilization) ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
|
@ -47,20 +54,63 @@ export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
|
|||
};
|
||||
|
||||
export const NodeGPU: NodeFeatureComponent = ({ node }) => {
|
||||
const nodeUtil = nodeUtilization(node);
|
||||
const hasGPU = node.gpus !== undefined && node.gpus.length !== 0;
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
{isNaN(nodeUtil) ? (
|
||||
<div style={{ minWidth: GPU_COL_WIDTH }}>
|
||||
{hasGPU ? (
|
||||
node.gpus.map((gpu, i) => <NodeGPUEntry gpu={gpu} slot={i} />)
|
||||
) : (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
) : (
|
||||
<UsageBar percent={nodeUtil} text={`${nodeUtil.toFixed(1)}%`} />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
type NodeGPUEntryProps = {
|
||||
slot: number;
|
||||
gpu: GPUStats;
|
||||
};
|
||||
|
||||
const NodeGPUEntry: React.FC<NodeGPUEntryProps> = ({ gpu, slot }) => {
|
||||
return (
|
||||
<Box display="flex" style={{ minWidth: GPU_COL_WIDTH }}>
|
||||
<Tooltip title={gpu.name}>
|
||||
<RightPaddedTypography variant="body1">[{slot}]:</RightPaddedTypography>
|
||||
</Tooltip>
|
||||
<UsageBar
|
||||
percent={gpu.utilization_gpu}
|
||||
text={`${gpu.utilization_gpu.toFixed(1)}%`}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
type WorkerGPUEntryProps = {
|
||||
resourceSlot: ResourceSlot;
|
||||
};
|
||||
|
||||
const WorkerGPUEntry: React.FC<WorkerGPUEntryProps> = ({ resourceSlot }) => {
|
||||
const { allocation, slot } = resourceSlot;
|
||||
// This is a bit of a dirty hack . For some reason, the slot GPU slot
|
||||
// 0 as assigned always shows up as undefined in the API response.
|
||||
// There are other times, such as a partial allocation, where we truly don't
|
||||
// know the slot, however this will just plug the hole of 0s coming through
|
||||
// as undefined. I have not been able to figure out the root cause.
|
||||
const slotMsg =
|
||||
allocation >= 1 && slot === undefined
|
||||
? "0"
|
||||
: slot === undefined
|
||||
? "?"
|
||||
: slot.toString();
|
||||
return (
|
||||
<Typography variant="body1">
|
||||
[{slotMsg}]: {allocation}
|
||||
</Typography>
|
||||
);
|
||||
};
|
||||
|
||||
export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
|
||||
const workerRes = rayletWorker?.coreWorkerStats.usedResources;
|
||||
const workerUsedGPUResources = workerRes?.["GPU"];
|
||||
|
@ -72,13 +122,19 @@ export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
|
|||
</Typography>
|
||||
);
|
||||
} else {
|
||||
const aggregateAllocation = sum(
|
||||
workerUsedGPUResources.resourceSlots.map(
|
||||
(resourceSlot) => resourceSlot.allocation,
|
||||
),
|
||||
);
|
||||
const plural = aggregateAllocation === 1 ? "" : "s";
|
||||
message = <b>{`${aggregateAllocation} GPU${plural} in use`}</b>;
|
||||
message = workerUsedGPUResources.resourceSlots
|
||||
.sort((slot1, slot2) => {
|
||||
if (slot1.slot === undefined && slot2.slot === undefined) {
|
||||
return 0;
|
||||
} else if (slot1.slot === undefined) {
|
||||
return 1;
|
||||
} else if (slot2.slot === undefined) {
|
||||
return -1;
|
||||
} else {
|
||||
return slot1.slot - slot2.slot;
|
||||
}
|
||||
})
|
||||
.map((resourceSlot) => <WorkerGPUEntry resourceSlot={resourceSlot} />);
|
||||
}
|
||||
return <div style={{ minWidth: 60 }}>{message}</div>;
|
||||
};
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import { Typography } from "@material-ui/core";
|
||||
import { Box, Tooltip, Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { GPUStats } from "../../../../api";
|
||||
import { MiBRatio } from "../../../../common/formatUtils";
|
||||
import { RightPaddedTypography } from "../../../../common/CustomTypography";
|
||||
import { MiBRatioNoPercent } from "../../../../common/formatUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { getWeightedAverage, sum } from "../../../../common/util";
|
||||
import {
|
||||
|
@ -11,6 +12,8 @@ import {
|
|||
WorkerFeatureComponent,
|
||||
} from "./types";
|
||||
|
||||
const GRAM_COL_WIDTH = 120;
|
||||
|
||||
const nodeGRAMUtilization = (node: Node) => {
|
||||
const utilization = (gpu: GPUStats) => gpu.memory_used / gpu.memory_total;
|
||||
if (node.gpus.length === 0) {
|
||||
|
@ -54,43 +57,77 @@ export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => {
|
|||
};
|
||||
|
||||
export const NodeGRAM: NodeFeatureComponent = ({ node }) => {
|
||||
const gramUtil = nodeGRAMUtilization(node);
|
||||
const nodeGRAMEntries = node.gpus.map((gpu, i) => {
|
||||
const props = {
|
||||
gpuName: gpu.name,
|
||||
utilization: gpu.memory_used,
|
||||
total: gpu.memory_total,
|
||||
slot: i,
|
||||
};
|
||||
return <GRAMEntry {...props} />;
|
||||
});
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
{isNaN(gramUtil) ? (
|
||||
{nodeGRAMEntries.length === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
) : (
|
||||
<UsageBar percent={gramUtil} text={`${gramUtil.toFixed(1)}%`} />
|
||||
<div style={{ minWidth: GRAM_COL_WIDTH }}>{nodeGRAMEntries}</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
|
||||
const workerProcessPerGPU = node.gpus
|
||||
.map((gpu) => gpu.processes)
|
||||
.map((processes) =>
|
||||
processes.find((process) => process.pid === worker.pid),
|
||||
);
|
||||
const workerUtilPerGPU = workerProcessPerGPU.map(
|
||||
(proc) => proc?.gpu_memory_usage || 0,
|
||||
);
|
||||
const totalNodeGRAM = sum(node.gpus.map((gpu) => gpu.memory_total));
|
||||
const usedGRAM = sum(workerUtilPerGPU);
|
||||
type GRAMEntryProps = {
|
||||
gpuName: string;
|
||||
slot: number;
|
||||
utilization: number;
|
||||
total: number;
|
||||
};
|
||||
|
||||
const GRAMEntry: React.FC<GRAMEntryProps> = ({
|
||||
gpuName,
|
||||
slot,
|
||||
utilization,
|
||||
total,
|
||||
}) => {
|
||||
const ratioStr = MiBRatioNoPercent(utilization, total);
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
{node.gpus.length === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
) : (
|
||||
<UsageBar
|
||||
percent={100 * (usedGRAM / totalNodeGRAM)}
|
||||
text={MiBRatio(usedGRAM, totalNodeGRAM)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<Box display="flex" style={{ minWidth: GRAM_COL_WIDTH }}>
|
||||
<Tooltip title={gpuName}>
|
||||
<RightPaddedTypography variant="body1">
|
||||
[{slot}]: {ratioStr}
|
||||
</RightPaddedTypography>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
|
||||
const workerGRAMEntries = node.gpus
|
||||
.map((gpu, i) => {
|
||||
const process = gpu.processes.find(
|
||||
(process) => process.pid === worker.pid,
|
||||
);
|
||||
if (!process) {
|
||||
return undefined;
|
||||
}
|
||||
const props = {
|
||||
gpuName: gpu.name,
|
||||
total: gpu.memory_total,
|
||||
utilization: process.gpu_memory_usage,
|
||||
slot: i,
|
||||
};
|
||||
return <GRAMEntry {...props} />;
|
||||
})
|
||||
.filter((entry) => entry !== undefined);
|
||||
|
||||
return workerGRAMEntries.length === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
) : (
|
||||
<div style={{ minWidth: GRAM_COL_WIDTH }}>{workerGRAMEntries}</div>
|
||||
);
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue