diff --git a/Makefile b/Makefile index 4faa6eb..2372872 100644 --- a/Makefile +++ b/Makefile @@ -350,6 +350,14 @@ tensorflow-serving-predict: ## Send prediction REST and webservice requests @curl -d '{"instances": [1.0, 2.0, 5.0, 10.0]}' -X POST http://tensorflow-serving.local/api/v1/prediction/grpc/predict @echo "" +tensorflow-serving-predict-public: ## Send prediction REST and webservice requests to public endpoint + @echo "Predicting via Webservice API accessing REST endpoint of TFS ..." + @curl --user tensorflow-serving:secret -d '{"instances": [1.0, 2.0, 5.0, 10.0]}' -X POST https://tensorflow-serving.polarize.ai/api/v1/prediction/predict + @echo "" + @echo "Predicting via Webservice API accessing gRPC endpoint of TFS ..." + @curl --user tensorflow-serving:secret -d '{"instances": [1.0, 2.0, 5.0, 10.0]}' -X POST https://tensorflow-serving.polarize.ai/api/v1/prediction/grpc/predict + @echo "" + tensorflow-serving-log-show: ## Show log of pod workflow/deploy/tools/log-show tensorflow-serving diff --git a/workflow/deploy/tensorflow-serving/src/Dockerfile b/workflow/deploy/tensorflow-serving/src/Dockerfile index baabeef..d51e5e0 100644 --- a/workflow/deploy/tensorflow-serving/src/Dockerfile +++ b/workflow/deploy/tensorflow-serving/src/Dockerfile @@ -34,7 +34,7 @@ EXPOSE 8500 EXPOSE 8501 # Inc on updates of base image -ENV BUMP=3 +ENV BUMP=5 RUN mkdir -p /meta && \ printf "Built on: %s\n" "$(hostname)" > /meta/tensorflow-serving.build && \ diff --git a/workflow/deploy/tensorflow-serving/src/webservice/main.py b/workflow/deploy/tensorflow-serving/src/webservice/main.py index 30048cc..9f7fc3a 100644 --- a/workflow/deploy/tensorflow-serving/src/webservice/main.py +++ b/workflow/deploy/tensorflow-serving/src/webservice/main.py @@ -1,6 +1,6 @@ from fastapi import FastAPI from .health import healthz -from .prediction import rest, grpc +from webservice.prediction import rest, grpc app = FastAPI( title = 'TensorFlow Serving webservice ', diff --git a/workflow/deploy/tensorflow-serving/src/webservice/prediction/grpc.py b/workflow/deploy/tensorflow-serving/src/webservice/prediction/grpc.py index f0d2186..6e181df 100644 --- a/workflow/deploy/tensorflow-serving/src/webservice/prediction/grpc.py +++ b/workflow/deploy/tensorflow-serving/src/webservice/prediction/grpc.py @@ -1,35 +1,31 @@ -import os, grpc +import os, grpc, datetime from fastapi import APIRouter -from typing import List -from pydantic import BaseModel from tensorflow.core.framework import types_pb2 from tensorflow.contrib.util import make_tensor_proto from tensorflow_serving.apis import predict_pb2 from tensorflow_serving.apis import prediction_service_pb2_grpc +from webservice.prediction import model +from webservice.util import util router = APIRouter() model_name = os.environ['MODEL_NAME'] +jetson_model = os.environ['JETSON_MODEL'] SERVING_HOST = 'localhost' SERVING_GRPC_PORT = int(8500) PREDICT_TIMEOUT = 5.0 -class Request(BaseModel): - instances: List[float] = [] - -class Response(BaseModel): - predictions: List[float] = [] - @router.post( '/predict', - response_model = Response, + response_model = model.Response, operation_id = 'gRPCPredict', tags = [ 'prediction' ], summary = 'Predict via gRPC', description = 'Predict given trained TensorFlow model. Accesses gRPC endpoint of TensorFlow Serving.' ) -async def gRPCPredict(request: Request): +async def gRPCPredict(request: model.Request): + start = datetime.datetime.now() stub = prediction_service_pb2_grpc.PredictionServiceStub( grpc.insecure_channel(f"{SERVING_HOST}:{SERVING_GRPC_PORT}") ) @@ -43,5 +39,11 @@ async def gRPCPredict(request: Request): ) predictResult = stub.Predict(predictRequest, PREDICT_TIMEOUT) return { - 'predictions': list(predictResult.outputs['y'].float_val) + 'predictions': list(predictResult.outputs['y'].float_val), + 'meta': { + 'model_name': model_name, + 'duration': util.millis_interval(start,datetime.datetime.now()), + 'timestamp': datetime.datetime.now().timestamp(), + 'jetson_model': jetson_model + } } diff --git a/workflow/deploy/tensorflow-serving/src/webservice/prediction/model.py b/workflow/deploy/tensorflow-serving/src/webservice/prediction/model.py new file mode 100644 index 0000000..3388101 --- /dev/null +++ b/workflow/deploy/tensorflow-serving/src/webservice/prediction/model.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel +from typing import List + +class Request(BaseModel): + instances: List[float] = [] + +class ResponseMeta(BaseModel): + model_name: str + jetson_model: str + duration: int # milliseconds + timestamp: int + +class Response(BaseModel): + predictions: List[float] = [] + meta: ResponseMeta diff --git a/workflow/deploy/tensorflow-serving/src/webservice/prediction/rest.py b/workflow/deploy/tensorflow-serving/src/webservice/prediction/rest.py index 3c4cf7a..f613f8d 100644 --- a/workflow/deploy/tensorflow-serving/src/webservice/prediction/rest.py +++ b/workflow/deploy/tensorflow-serving/src/webservice/prediction/rest.py @@ -1,31 +1,27 @@ -import os, requests, json, encodings +import os, requests, datetime, json, encodings from fastapi import APIRouter -from typing import List -from pydantic import BaseModel +from webservice.prediction import model +from webservice.util import util router = APIRouter() model_name = os.environ['MODEL_NAME'] +jetson_model = os.environ['JETSON_MODEL'] SERVING_HOST = 'localhost' SERVING_REST_PORT = int(8501) PREDICT_TIMEOUT = 5.0 -class Request(BaseModel): - instances: List[float] = [] - -class Response(BaseModel): - predictions: List[float] = [] - @router.post( '/predict', - response_model = Response, + response_model = model.Response, operation_id = 'restPredict', - tags = [ 'prediction' ], + tags = [ 'prediction' ], summary = 'Predict via REST', description = 'Predict given trained TensorFlow model. Accesses REST endpoint of TensorFlow Serving.' ) -async def restPredict(request: Request): +async def restPredict(request: model.Request): + start = datetime.datetime.now() return { 'predictions': json.loads( requests @@ -36,5 +32,11 @@ async def restPredict(request: Request): ) .content .decode(encodings.utf_8.getregentry().name) - )['predictions'] + )['predictions'], + 'meta': { + 'model_name': model_name, + 'duration': util.millis_interval(start,datetime.datetime.now()), + 'timestamp': datetime.datetime.now().timestamp(), + 'jetson_model': jetson_model + } } diff --git a/workflow/deploy/tensorflow-serving/src/webservice/util/__init__.py b/workflow/deploy/tensorflow-serving/src/webservice/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workflow/deploy/tensorflow-serving/src/webservice/util/util.py b/workflow/deploy/tensorflow-serving/src/webservice/util/util.py new file mode 100644 index 0000000..796290b --- /dev/null +++ b/workflow/deploy/tensorflow-serving/src/webservice/util/util.py @@ -0,0 +1,6 @@ +def millis_interval(start, end): + diff = end - start + millis = diff.days * 24 * 60 * 60 * 1000 + millis += diff.seconds * 1000 + millis += diff.microseconds / 1000 + return int(round(millis))