Skip to content

Commit

Permalink
Fetch and look up GPU SKUs
Browse files Browse the repository at this point in the history
  • Loading branch information
sam-schu committed Dec 19, 2024
1 parent 02437d2 commit e80cef3
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.google.cloud.billing.v1._
import com.typesafe.config.Config
import com.typesafe.scalalogging.LazyLogging
import common.util.StringUtil.EnhancedToStringable
import common.validation.ErrorOr
import common.validation.ErrorOr._
import common.validation.ErrorOr.ErrorOr
import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage
Expand All @@ -18,9 +19,9 @@ import scala.jdk.CollectionConverters.IterableHasAsScala
import java.time.temporal.ChronoUnit.SECONDS
import scala.util.Using

case class CostCatalogKey(machineType: MachineType,
case class CostCatalogKey(resourceInfo: ResourceInfo,
usageType: UsageType,
machineCustomization: MachineCustomization,
machineCustomization: Option[MachineCustomization],
resourceType: ResourceType,
region: String
)
Expand All @@ -38,28 +39,47 @@ object CostCatalogKey {
final val expectedSku =
(".*?N1 Predefined Instance (Core|Ram) .*|" +
".*?N2 Custom Instance (Core|Ram) .*|" +
".*?N2D AMD Custom Instance (Core|Ram) .*").r
".*?N2D AMD Custom Instance (Core|Ram) .*|" +
"Nvidia Tesla V100 GPU .*|" +
"Nvidia Tesla P100 GPU .*|" +
"Nvidia Tesla P4 GPU .*|" +
"Nvidia Tesla T4 GPU .*").r
// TODO: seems like it will probably still match GPU strings with extra stuff in front -
// it just won't take any of those preceding characters
// What is the point of the .*? ??

def apply(sku: Sku): List[CostCatalogKey] =
for {
_ <- expectedSku.findFirstIn(sku.getDescription).toList
machineType <- MachineType.fromSku(sku).toList
resourceInfo <- ResourceInfo.fromSku(sku).toList
resourceType <- ResourceType.fromSku(sku).toList
usageType <- UsageType.fromSku(sku).toList
machineCustomization <- MachineCustomization.fromSku(sku).toList
region <- sku.getServiceRegionsList.asScala.toList
} yield CostCatalogKey(machineType, usageType, machineCustomization, resourceType, region)
machineCustomization = if (resourceType == Gpu) None else Some(MachineCustomization.fromCpuOrRamSku(sku))
} yield CostCatalogKey(resourceInfo, usageType, machineCustomization, resourceType, region)

def apply(instantiatedVmInfo: InstantiatedVmInfo, resourceType: ResourceType): ErrorOr[CostCatalogKey] =
MachineType.fromGoogleMachineTypeString(instantiatedVmInfo.machineType).map { mType =>
CostCatalogKey(
mType,
if (resourceType == Gpu)
for {
gpuInfo <- ErrorOr(instantiatedVmInfo.gpuInfo.get) // TODO: improve error message (default: "None.get")
gpuType <- GpuType.fromGpuInfo(gpuInfo)
} yield CostCatalogKey(
gpuType,
UsageType.fromBoolean(instantiatedVmInfo.preemptible),
MachineCustomization.fromMachineTypeString(instantiatedVmInfo.machineType),
resourceType,
None,
Gpu,
instantiatedVmInfo.region
)
}
else
MachineType.fromGoogleMachineTypeString(instantiatedVmInfo.machineType).map { mType =>
CostCatalogKey(
mType,
UsageType.fromBoolean(instantiatedVmInfo.preemptible),
Some(MachineCustomization.fromMachineTypeString(instantiatedVmInfo.machineType)),
resourceType,
instantiatedVmInfo.region
)
}
}

case class GcpCostLookupRequest(vmInfo: InstantiatedVmInfo, replyTo: ActorRef) extends ServiceRegistryMessage {
Expand Down Expand Up @@ -116,6 +136,9 @@ object GcpCostCatalogService {
s"Expected usage units of RAM to be 'GiBy.h'. Got ${usageUnit}".invalidNel
}
}

// TODO: implement this
def calculateGpuPricePerHour(gpuSku: Sku, gpuCount: Long): ErrorOr[BigDecimal] = BigDecimal(1).validNel
}

/**
Expand Down Expand Up @@ -200,8 +223,8 @@ class GcpCostCatalogService(serviceConfig: Config, globalConfig: Config, service
// As of Sept 2024 the cost catalog does not contain entries for custom N1 machines. If we're using N1, attempt
// to fall back to predefined.
lazy val n1PredefinedKey =
(key.machineType, key.machineCustomization) match {
case (N1, Custom) => Option(key.copy(machineCustomization = Predefined))
(key.resourceInfo, key.machineCustomization) match {
case (N1, Some(Custom)) => Option(key.copy(machineCustomization = Some(Predefined)))
case _ => None
}
val sku = getSku(key).orElse(n1PredefinedKey.flatMap(getSku)).map(_.catalogObject)
Expand All @@ -212,23 +235,47 @@ class GcpCostCatalogService(serviceConfig: Config, globalConfig: Config, service
}

// TODO consider caching this, answers won't change until we reload the SKUs
def calculateVmCostPerHour(instantiatedVmInfo: InstantiatedVmInfo): ErrorOr[BigDecimal] =
for {
def calculateVmCostPerHour(instantiatedVmInfo: InstantiatedVmInfo): ErrorOr[BigDecimal] = {
val cpuPricingInfoErrorOr = for {
cpuSku <- lookUpSku(instantiatedVmInfo, Cpu)
coreCount <- MachineType.extractCoreCountFromMachineTypeString(instantiatedVmInfo.machineType)
cpuPricePerHour <- GcpCostCatalogService.calculateCpuPricePerHour(cpuSku, coreCount)
} yield (cpuSku, coreCount, cpuPricePerHour)

val ramPricingInfoErrorOr = for {
ramSku <- lookUpSku(instantiatedVmInfo, Ram)
ramMbCount <- MachineType.extractRamMbFromMachineTypeString(instantiatedVmInfo.machineType)
ramGbCount = ramMbCount / 1024d // need sub-integer resolution
ramPricePerHour <- GcpCostCatalogService.calculateRamPricePerHour(ramSku, ramGbCount)
totalCost = cpuPricePerHour + ramPricePerHour
} yield (ramSku, ramGbCount, ramPricePerHour)

val gpuPricingInfoErrorOr = instantiatedVmInfo.gpuInfo match {
case None => (None, 0, BigDecimal(0)).validNel
case Some(gpuInfo) =>
for {
gpuSku <- lookUpSku(instantiatedVmInfo, Gpu)
gpuCount = gpuInfo.count
gpuPricePerHour <- GcpCostCatalogService.calculateGpuPricePerHour(gpuSku, gpuCount)
} yield (Some(gpuSku), gpuCount, gpuPricePerHour)
}

for {
cpuPricingInfo <- cpuPricingInfoErrorOr
(cpuSku, coreCount, cpuPricePerHour) = cpuPricingInfo
ramPricingInfo <- ramPricingInfoErrorOr
(ramSku, ramGbCount, ramPricePerHour) = ramPricingInfo
gpuPricingInfo <- gpuPricingInfoErrorOr
(gpuSku, gpuCount, gpuPricePerHour) = gpuPricingInfo
totalCost = cpuPricePerHour + ramPricePerHour + gpuPricePerHour
_ = logger.info(
s"Calculated vmCostPerHour of ${totalCost} " +
s"(CPU ${cpuPricePerHour} for ${coreCount} cores [${cpuSku.getDescription}], " +
s"RAM ${ramPricePerHour} for ${ramGbCount} Gb [${ramSku.getDescription}]) " +
s"RAM ${ramPricePerHour} for ${ramGbCount} Gb [${ramSku.getDescription}], " +
s"GPU ${gpuPricePerHour} for ${gpuCount} GPUs [${gpuSku.map(_.getDescription)}]) " +
s"for ${instantiatedVmInfo}"
)
} yield totalCost
}

def serviceRegistryActor: ActorRef = serviceRegistry
override def receive: Receive = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,28 @@ case class InstantiatedVmInfo(region: String, machineType: String, gpuInfo: Opti
* These types reflect hardcoded strings found in a google cost catalog.
*/

sealed trait MachineType { def machineTypeName: String }
case object N1 extends MachineType { override val machineTypeName = "n1" }
case object N2 extends MachineType { override val machineTypeName = "n2" }
case object N2d extends MachineType { override val machineTypeName = "n2d" }
sealed trait ResourceInfo

object MachineType {
def fromSku(sku: Sku): Option[MachineType] = {
object ResourceInfo {
def fromSku(sku: Sku): Option[ResourceInfo] = {
val tokenizedDescription = sku.getDescription.toLowerCase.split(" ")
if (tokenizedDescription.contains(N1.machineTypeName)) Some(N1)
else if (tokenizedDescription.contains(N2.machineTypeName)) Some(N2)
else if (tokenizedDescription.contains(N2d.machineTypeName)) Some(N2d)
else if (tokenizedDescription.contains(NvidiaTeslaV100.gpuTypeName)) Some(NvidiaTeslaV100)
else if (tokenizedDescription.contains(NvidiaTeslaP100.gpuTypeName)) Some(NvidiaTeslaP100)
else if (tokenizedDescription.contains(NvidiaTeslaP4.gpuTypeName)) Some(NvidiaTeslaP4)
else if (tokenizedDescription.contains(NvidiaTeslaT4.gpuTypeName)) Some(NvidiaTeslaT4)
else Option.empty
}
}

sealed trait MachineType extends ResourceInfo { def machineTypeName: String }
case object N1 extends MachineType { override val machineTypeName = "n1" }
case object N2 extends MachineType { override val machineTypeName = "n2" }
case object N2d extends MachineType { override val machineTypeName = "n2d" }

object MachineType {
// expects a string that looks something like "n1-standard-1" or "custom-1-4096"
def fromGoogleMachineTypeString(machineTypeString: String): ErrorOr[MachineType] = {
val mType = machineTypeString.toLowerCase
Expand Down Expand Up @@ -63,6 +71,24 @@ object MachineType {
}
}

sealed trait GpuType extends ResourceInfo { def gpuTypeName: String }
case object NvidiaTeslaV100 extends GpuType { override val gpuTypeName = "v100" }
case object NvidiaTeslaP100 extends GpuType { override val gpuTypeName = "p100" }
case object NvidiaTeslaP4 extends GpuType { override val gpuTypeName = "p4" }
case object NvidiaTeslaT4 extends GpuType { override val gpuTypeName = "t4" }

object GpuType {
// expects GpuInfo with a GPU type that looks something like "nvidia-tesla-v100"
def fromGpuInfo(gpuInfo: GpuInfo): ErrorOr[GpuType] = {
val gpuType = gpuInfo.gpuType.toLowerCase
if (gpuType.endsWith("-v100")) NvidiaTeslaV100.validNel
else if (gpuType.endsWith("-p100")) NvidiaTeslaP100.validNel
else if (gpuType.endsWith("-p4")) NvidiaTeslaP4.validNel
else if (gpuType.endsWith("-t4")) NvidiaTeslaT4.validNel
else s"Unrecognized GPU type: $gpuType".invalidNel
}
}

sealed trait UsageType { def typeName: String }
case object OnDemand extends UsageType { override val typeName = "ondemand" }
case object Preemptible extends UsageType { override val typeName = "preemptible" }
Expand All @@ -78,7 +104,6 @@ object UsageType {
case true => Preemptible
case false => OnDemand
}

}

sealed trait MachineCustomization { def customizationName: String }
Expand All @@ -96,28 +121,30 @@ object MachineCustomization {
- For non-N1 machines, both custom and predefined SKUs are included, custom ones include "Custom" in their description
strings and predefined SKUs are only identifiable by the absence of "Custom."
*/
def fromSku(sku: Sku): Option[MachineCustomization] = {
def fromCpuOrRamSku(sku: Sku): MachineCustomization = {
val tokenizedDescription = sku.getDescription.toLowerCase.split(" ")

// ex. "N1 Predefined Instance Core running in Montreal"
if (tokenizedDescription.contains(Predefined.customizationName)) Some(Predefined)
if (tokenizedDescription.contains(Predefined.customizationName)) Predefined
// ex. "N2 Custom Instance Core running in Paris"
else if (tokenizedDescription.contains(Custom.customizationName)) Some(Custom)
else if (tokenizedDescription.contains(Custom.customizationName)) Custom
// ex. "N2 Instance Core running in Paris"
else Some(Predefined)
else Predefined
}
}

sealed trait ResourceType { def groupName: String }
case object Cpu extends ResourceType { override val groupName = "cpu" }
case object Ram extends ResourceType { override val groupName = "ram" }
case object Gpu extends ResourceType { override val groupName = "gpu" }

object ResourceType {
def fromSku(sku: Sku): Option[ResourceType] = {
val tokenizedDescription = sku.getDescription.toLowerCase.split(" ")
sku.getCategory.getResourceGroup.toLowerCase match {
case Cpu.groupName => Some(Cpu)
case Ram.groupName => Some(Ram)
case Gpu.groupName => Some(Gpu)
case "n1standard" if tokenizedDescription.contains("ram") => Some(Ram)
case "n1standard" if tokenizedDescription.contains("core") => Some(Cpu)
case _ => Option.empty
Expand Down

0 comments on commit e80cef3

Please sign in to comment.