// Copyright (c) 2021 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
Load and Execute Paddle Model in C++
In this tutorial, we will show you how to load and execute a paddle model in CINN using C++. We will use model ResNet50 as an example.
#include <gtest/gtest.h>
#include "cinn/cinn.h"
using namespace cinn; // NOLINT
Prepare to Load Model
Declare the params and prepare to load and execute the paddle model.
input_name
is the name of input tensor in the model.target_name
is the name of output tensor we want.x_shape
is the input tensor’s shape of the model.
std::string input_name = "inputs";
std::string target_name = "save_infer_model/scale_0.tmp_1";
std::vector<int> x_shape = {1, 3, 224, 224};
Set the target backend
Now CINN only supports two backends: X86 and CUDA.
To choose X86 backends, use :
auto target = common::DefaultHostTarget();
To choose CUDA backends, use :
auto target = common::DefaultNVGPUTarget();
auto target = common::DefaultHostTarget();
Load Model to CINN
Load the paddle model and compile it into CINN IR.
target
is the backend to execute model on.model_dir
is the path where the paddle model is stored.params_combined
implies whether the params of paddle model is stored in one file.
std::string model_dir = "./ResNet50";
bool params_combined = true;
auto computation =
frontend::CinnComputation::CompilePaddleModel(target, model_dir, {input_name}, {x_shape}, params_combined);
Get input tensor and set input data
Here we use all-zero data as input. In practical applications, please replace it with real data according to your needs.
auto input_tensor = computation.GetTensor(input_name);
std::vector<float> fake_input(input_tensor->shape().numel(), 0.f);
auto *input_data = input_tensor->mutable_data<float>(target);
if (target.arch == Target::Arch::X86) {
std::copy(fake_input.begin(), fake_input.end(), input_data);
} else if (target.arch == Target::Arch::NVGPU) {
CUDA_CALL(cudaMemcpy(
input_data, fake_input.data(), input_tensor->shape().numel() * sizeof(float), cudaMemcpyHostToDevice));
}
Execute Model
Execute the model and get output tensor’s data.
computation.Execute();
auto target_tensor = computation.GetTensor(target_name);
std::vector<float> output_data(target_tensor->shape().numel(), 0.f);
if (target.arch == Target::Arch::X86) {
std::copy(target_tensor->data<float>(),
target_tensor->data<float>() + target_tensor->shape().numel(),
output_data.data());
} else if (target.arch == Target::Arch::NVGPU) {
CUDA_CALL(cudaMemcpy(output_data.data(),
reinterpret_cast<void *>(target_tensor->mutable_data<float>(target)),
target_tensor->shape().numel() * sizeof(float),
cudaMemcpyDeviceToHost));
}