目录
说明
官网地址:https://github.com/KwaiVGI/LivePortrait
代码地址:https://github.com/hpc203/liveportrait-onnxrun
模型下载:onnx文件在百度云盘,链接: https://pan.baidu.com/s/13wjBFRHIIyCyBsgnBOKqsw 提取码: si95
效果
LivePortrait实现快速、高质量的人像驱动视频生成
代码
main.cpp
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <string>
#include <math.h>
#include "liveportrait.h"
using namespace cv;
using namespace std;
int main()
{
LivePortraitPipeline mynet;
string imgpath = "/home/wangbo/liveportrait-onnxrun/0.jpg";
string videopath = "/home/wangbo/liveportrait-onnxrun/d0.mp4";
mynet.execute(imgpath, videopath);
return 0;
}
liveportrait.cpp
#include "liveportrait.h"
#include <opencv2/highgui.hpp>
#include <numeric>
using namespace cv;
using namespace std;
using namespace Ort;
LivePortraitPipeline::LivePortraitPipeline()
{
/// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
string model_path = "/home/wangbo/liveportrait-onnxrun/weights/appearance_feature_extractor.onnx";
appearance_feature_extractor_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/motion_extractor.onnx";
motion_extractor_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/warping_spade.onnx";
warping_spade_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/stitching.onnx";
stitching_module_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/landmark.onnx";
/// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); windows写法
/// landmark_runner_ort_session = new Session(env, widestr.c_str(), sessionOptions); windows写法
landmark_runner_ort_session = new Session(env, model_path.c_str(), sessionOptions); linux写法
/// 输出和输出节点名称在头文件里写死,在这里就不调用函数获取了
this->face_analysis = std::make_shared<FaceAnalysis>("/home/wangbo/liveportrait-onnxrun/weights/retinaface_det_static.onnx", "/home/wangbo/liveportrait-onnxrun/weights/face_2dpose_106_static.onnx");
this->mask_crop = imread("/home/wangbo/liveportrait-onnxrun/mask_template.png");
cvtColor(this->mask_crop, this->mask_crop, COLOR_BGRA2BGR);
}
Mat LivePortraitPipeline::landmark_runner(const Mat &img, const float *lmk)
{
std::map<string, Mat> crop_dct;
crop_image(img, lmk, 106, crop_dct, 224, 1.5, -0.1);
std::vector<int64_t> input_img_shape = {1, 3, crop_dct["img_crop"].rows, crop_dct["img_crop"].cols};
vector<cv::Mat> bgrChannels(3);
split(crop_dct["img_crop"], bgrChannels);
for (int c = 0; c < 3; c++)
{
bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
}
const int image_area = input_img_shape[2] * input_img_shape[3];
this->landmark_runner_input_tensor.clear();
this->landmark_runner_input_tensor.resize(input_img_shape[0] * input_img_shape[1] * image_area);
size_t single_chn_size = image_area * sizeof(float);
memcpy(this->landmark_runner_input_tensor.data(), (float *)bgrChannels[0].data, single_chn_size);
memcpy(this->landmark_runner_input_tensor.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
memcpy(this->landmark_runner_input_tensor.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, this->landmark_runner_input_tensor.data(), this->landmark_runner_input_tensor.size(), input_img_shape.data(), input_img_shape.size());
vector<Value> ort_outputs = this->landmark_runner_ort_session->Run(runOptions, this->landmark_runner_input_names.data(), &input_tensor, 1, this->landmark_runner_output_names.data(), this->landmark_runner_output_names.size());
float *out_pts = ort_outputs[2].GetTensorMutableData<float>();
const int num_pts = ort_outputs[2].GetTensorTypeAndShapeInfo().GetShape()[1] / 2;
Mat lmk_mat(num_pts, 2, CV_32FC1);
for (int i = 0; i < num_pts; i++)
{ btachsize=1,不考虑batchsize维度
lmk_mat.at<float>(i, 0) = out_pts[i * 2] * 224 * crop_dct["M_c2o"].at<float>(0, 0) + out_pts[i * 2 + 1] * 224 * crop_dct["M_c2o"].at<float>(0, 1) + crop_dct["M_c2o"].at<float>(0, 2);
lmk_mat.at<float>(i, 1) = out_pts[i * 2] * 224 * crop_dct["M_c2o"].at<float>(1, 0) + out_pts[i * 2 + 1] * 224 * crop_dct["M_c2o"].at<float>(1, 1) + crop_dct["M_c2o"].at<float>(1, 2);
}
return lmk_mat;
}
void LivePortraitPipeline::crop_src_image(const Mat &srcimg, std::map<string, Mat> &crop_info)
{
vector<Bbox> boxes = this->face_analysis->detect(srcimg);
if (boxes.size() == 0)
{
cout << "No face detected in the source image." << endl;
return;
}
else if (boxes.size() > 1)
{
cout << "More than one face detected in the image, only pick one face." << endl;
return;
}
Bbox src_face = boxes[0];
float *lmk = src_face.landmark_2d_106;
crop_image(srcimg, lmk, 106, crop_info, 512, 2.3, -0.125);
Mat lmk_crop = this->landmark_runner(srcimg, lmk);
crop_info["lmk_crop"] = lmk_crop;
Mat img_crop_256x256;
resize(crop_info["img_crop"], img_crop_256x256, Size(256, 256), INTER_AREA);
crop_info["img_crop_256x256"] = img_crop_256x256;
crop_info["lmk_crop_256x256"] = crop_info["lmk_crop"] * 256 / 512;
}
void LivePortraitPipeline::get_kp_info(vector<float> x, vector<int64_t> shape, std::map<string, Mat> &kp_info)
{
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, x.data(), x.size(), shape.data(), shape.size());
vector<Value> ort_outputs = this->motion_extractor_ort_session->Run(runOptions, this->motion_extractor_input_names.data(), &input_tensor, 1, this->motion_extractor_output_names.data(), this->motion_extractor_output_names.size());
pitch, yaw, roll, t, exp, scale, kp
float *pitch = ort_outputs[0].GetTensorMutableData<float>();
float *yaw = ort_outputs[1].GetTensorMutableData<float>();
float *roll = ort_outputs[2].GetTensorMutableData<float>();
float *t = ort_outputs[3].GetTensorMutableData<float>();
float *exp = ort_outputs[4].GetTensorMutableData<float>();
float *scale = ort_outputs[5].GetTensorMutableData<float>();
float *kp = ort_outputs[6].GetTensorMutableData<float>();
vector<float> pred;
softmax(pitch, this->motion_extractor_output_shape[0], pred);
const int bs = this->motion_extractor_output_shape[0][0]; batchsize=1,不考虑多图片输入
const int len = 66; 66
float sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
float degree = sum * 3 - 97.5;
kp_info["pitch"] = (Mat_<float>(1, 1) << degree);
softmax(yaw, this->motion_extractor_output_shape[1], pred);
sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
degree = sum * 3 - 97.5;
kp_info["yaw"] = (Mat_<float>(1, 1) << degree);
softmax(roll, this->motion_extractor_output_shape[2], pred);
sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
degree = sum * 3 - 97.5;
kp_info["roll"] = (Mat_<float>(1, 1) << degree);
kp_info["t"] = Mat(1, 3, CV_32FC1, t);
vector<int> sizes = {1, 21, 3}; 由于在c++的opencv里不支持3维Mat的矩阵乘法,此处不考虑batchsize维度
vector<int> sizes = {21, 3};
kp_info["exp"] = Mat(sizes, CV_32FC1, exp);
kp_info["scale"] = Mat(1, 1, CV_32FC1, scale);
kp_info["kp"] = Mat(sizes, CV_32FC1, kp);
}
void LivePortraitPipeline::extract_feature_3d(vector<float> x, vector<int64_t> shape, vector<float> &f_s)
{
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, x.data(), x.size(), shape.data(), shape.size());
vector<Value> ort_outputs = this->appearance_feature_extractor_ort_session->Run(runOptions, this->appearance_feature_extractor_input_names.data(), &input_tensor, 1, this->appearance_feature_extractor_output_names.data(), this->appearance_feature_extractor_output_names.size());
int numel = ort_outputs[0].GetTensorTypeAndShapeInfo().GetElementCount();
float *out = ort_outputs[0].GetTensorMutableData<float>();
f_s.clear();
f_s.resize(numel);
memcpy(f_s.data(), out, numel * sizeof(float));
}
void LivePortraitPipeline::stitching(const Mat &kp_source, Mat &kp_driving_new)
{
不考虑batchsize维度
const int num_kp = kp_source.size[0];
const int numel = kp_source.total();
const int len = std::accumulate(this->stitching_module_input_shape.begin(), this->stitching_module_input_shape.end(), 1, std::multiplies<int64_t>());
vector<float> feat(len);
memcpy(feat.data(), (float *)kp_source.data, numel * sizeof(float));
memcpy(feat.data() + numel, (float *)kp_driving_new.data, (len - numel) * sizeof(float));
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, feat.data(), feat.size(), this->stitching_module_input_shape.data(), this->stitching_module_input_shape.size());
vector<Value> ort_outputs = this->stitching_module_ort_session->Run(runOptions, this->stitching_module_input_names.data(), &input_tensor, 1, this->stitching_module_output_names.data(), this->stitching_module_output_names.size());
float *delta = ort_outputs[0].GetTensorMutableData<float>();
const float delta_tx_ty[2] = {delta[num_kp * 3], delta[num_kp * 3 + 1]};
for (int i = 0; i < num_kp; i++)
{
kp_driving_new.at<float>(i, 0) += delta[i * 3];
kp_driving_new.at<float>(i, 1) += delta[i * 3 + 1];
kp_driving_new.at<float>(i, 2) += delta[i * 3 + 2];
kp_driving_new.at<float>(i, 0) += delta_tx_ty[0];
kp_driving_new.at<float>(i, 1) += delta_tx_ty[1];
}
}
Mat LivePortraitPipeline::warping_spade(vector<float> feature_3d, const Mat &kp_source, const Mat &kp_driving)
{
vector<Ort::Value> inputTensors;
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, feature_3d.data(), feature_3d.size(), this->warping_spade_input_shape[0].data(), this->warping_spade_input_shape[0].size()));
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, (float *)kp_driving.data, kp_driving.total(), this->warping_spade_input_shape[1].data(), this->warping_spade_input_shape[1].size()));
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, (float *)kp_source.data, kp_source.total(), this->warping_spade_input_shape[2].data(), this->warping_spade_input_shape[2].size()));
vector<Value> ort_outputs = this->warping_spade_ort_session->Run(runOptions, this->warping_spade_input_names.data(), inputTensors.data(), inputTensors.size(), this->warping_spade_output_names.data(), this->warping_spade_output_names.size());
float *out = ort_outputs[0].GetTensorMutableData<float>();
Mat out_mat = Mat(this->warping_spade_output_shape, CV_32FC1, out);
return out_mat;
}
Mat LivePortraitPipeline::predict(const int frame_id, std::map<string, Mat> x_s_info, const Mat &R_s, vector<float> f_s, const Mat &x_s, const Mat &frame)
{
Mat lmk;
if (frame_id > 0)
{
lmk = this->landmark_runner(frame, (float*)this->pred_info_lmk.data);
}
else
{
vector<Bbox> boxes = this->face_analysis->detect(frame);
if (boxes.size() == 0)
{
cout << "No face detected in the frame." << endl;
exit(-1);
}
else if (boxes.size() > 1)
{
cout << "More than one face detected in the driving frame, only pick one face." << endl;
exit(-1);
}
Bbox src_face = boxes[0];
lmk = this->landmark_runner(frame, src_face.landmark_2d_106);
}
lmk.copyTo(this->pred_info_lmk);
float c_d_eyes[2] = {calculate_distance_ratio(lmk, 6, 18, 0, 12), calculate_distance_ratio(lmk, 30, 42, 24, 36)};
float c_d_lip = calculate_distance_ratio(lmk, 90, 102, 48, 66);
Mat img;
resize(frame, img, Size(256, 256));
vector<float> I_d;
vector<int64_t> I_d_shape;
preprocess(img, I_d, I_d_shape);
std::map<string, Mat> x_d_info;
this->get_kp_info(I_d, I_d_shape, x_d_info);
Mat R_d = get_rotation_matrix(x_d_info["pitch"], x_d_info["yaw"], x_d_info["roll"]);
x_d_info["R_d"] = R_d;
x_d_info.erase("pitch");
x_d_info.erase("yaw");
x_d_info.erase("roll");
x_d_info.erase("kp");
if (frame_id == 0)
{
this->pred_info_x_d_0_info["scale"] = x_d_info["scale"].clone(); ///也可以定义结构体的方式打包参数
this->pred_info_x_d_0_info["R_d"] = R_d.clone();
this->pred_info_x_d_0_info["exp"] = x_d_info["exp"].clone();
this->pred_info_x_d_0_info["t"] = x_d_info["t"].clone();
}
Mat R_new = (R_d * this->pred_info_x_d_0_info["R_d"].t()) * R_s;
Mat delta_new = x_s_info["exp"] + (x_d_info["exp"] - this->pred_info_x_d_0_info["exp"]);
Mat scale_new = x_s_info["scale"].mul(x_d_info["scale"] / this->pred_info_x_d_0_info["scale"]); /// scale是1x1矩阵,也就是单个数值
Mat t_new = x_s_info["t"] + (x_d_info["t"] - this->pred_info_x_d_0_info["t"]);
t_new.at<float>(0, 2) = 0;
Mat temp = repeat(t_new, 21, 1);
Mat x_d_new = scale_new.at<float>(0, 0) * (x_s_info["kp"] * R_new + delta_new) + temp;
this->stitching(x_s, x_d_new);
Mat out = this->warping_spade(f_s, x_s, x_d_new); 形状是[1,3,512,512]
const int image_erea = out.size[2] * out.size[3];
float *pdata = (float *)out.data;
Mat rmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata);
Mat gmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata + image_erea);
Mat bmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata + image_erea * 2);
rmat.setTo(0, rmat < 0);
rmat.setTo(1, rmat > 1);
gmat.setTo(0, gmat < 0);
gmat.setTo(1, gmat > 1);
bmat.setTo(0, bmat < 0);
bmat.setTo(1, bmat > 1);
vector<Mat> channel_mats(3);
channel_mats[0] = rmat;
channel_mats[1] = gmat;
channel_mats[2] = bmat;
Mat I_p;
merge(channel_mats, I_p);
I_p *= 255;
I_p.convertTo(I_p, CV_8UC3);
return I_p;
}
int LivePortraitPipeline::execute(string imgpath, string videopath)
{
Mat srcimg = imread(imgpath);
if (srcimg.empty())
{
cout << "opencv读取图片为空, 请检查输入图片的路径" << endl;
return -1;
}
Mat img;
cvtColor(srcimg, img, COLOR_BGRA2BGR);
cvtColor(img, img, COLOR_BGR2RGB);
Mat src_img = src_preprocess(img);
std::map<string, Mat> crop_info;
crop_src_image(src_img, crop_info);
Mat img_crop_256x256 = crop_info["img_crop_256x256"];
vector<float> I_s;
vector<int64_t> I_s_shape;
preprocess(img_crop_256x256, I_s, I_s_shape);
std::map<string, Mat> x_s_info;
this->get_kp_info(I_s, I_s_shape, x_s_info);
Mat R_s = get_rotation_matrix(x_s_info["pitch"], x_s_info["yaw"], x_s_info["roll"]); 返回结果已验证通过
vector<float> f_s;
this->extract_feature_3d(I_s, I_s_shape, f_s);
Mat x_s = transform_keypoint(x_s_info);
cv::VideoCapture capture(videopath);
if (!capture.isOpened())
{
cout << "VideoCapture,open video file failed, " << videopath << endl;
return -1;
}
const int fps = capture.get(cv::CAP_PROP_FPS);
const int video_length = capture.get(cv::CAP_PROP_FRAME_COUNT);
cout<<"video total have "<<video_length<<" frames"<<endl;
int f_h = src_img.rows;
int f_w = src_img.cols;
if (this->flg_composite)
{
f_h = 512;
f_w = 512 * 3;
}
prepare for pasteback
Mat mask_ori = prepare_paste_back(this->mask_crop, crop_info["M_c2o"], Size(src_img.cols, src_img.rows));
VideoWriter video_writer;
video_writer.open("output.mp4", cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, Size(f_w, f_h));
Mat frame;
int frame_id = 0;
while (capture.read(frame))
{
if (frame.empty())
{
break;
}
Mat img_rgb;
cvtColor(frame, img_rgb, COLOR_BGR2RGB);
auto a = std::chrono::high_resolution_clock::now();
Mat I_p = this->predict(frame_id, x_s_info, R_s, f_s, x_s, img_rgb);
auto b = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> c = b - a;
cout<<"frame_id="<<frame_id<<", predict waste time="<<to_string(c.count())<<" s"<<endl;
frame_id += 1;
Mat driving_img;
if (this->flg_composite)
{
concat_frame(img_rgb, img_crop_256x256, I_p, driving_img);
}
else
{
paste_back(I_p, crop_info["M_c2o"], src_img, mask_ori, driving_img);
}
cvtColor(driving_img, driving_img, COLOR_RGB2BGR);
video_writer.write(driving_img);
}
video_writer.release();
capture.release();
///destroyAllWindows();
return 0;
}
#include "liveportrait.h"
#include <opencv2/highgui.hpp>
#include <numeric>
using namespace cv;
using namespace std;
using namespace Ort;
LivePortraitPipeline::LivePortraitPipeline()
{
/// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
string model_path = "/home/wangbo/liveportrait-onnxrun/weights/appearance_feature_extractor.onnx";
appearance_feature_extractor_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/motion_extractor.onnx";
motion_extractor_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/warping_spade.onnx";
warping_spade_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/stitching.onnx";
stitching_module_ort_session = new Session(env, model_path.c_str(), sessionOptions);
model_path = "/home/wangbo/liveportrait-onnxrun/weights/landmark.onnx";
/// std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); windows写法
/// landmark_runner_ort_session = new Session(env, widestr.c_str(), sessionOptions); windows写法
landmark_runner_ort_session = new Session(env, model_path.c_str(), sessionOptions); linux写法
/// 输出和输出节点名称在头文件里写死,在这里就不调用函数获取了
this->face_analysis = std::make_shared<FaceAnalysis>("/home/wangbo/liveportrait-onnxrun/weights/retinaface_det_static.onnx", "/home/wangbo/liveportrait-onnxrun/weights/face_2dpose_106_static.onnx");
this->mask_crop = imread("/home/wangbo/liveportrait-onnxrun/mask_template.png");
cvtColor(this->mask_crop, this->mask_crop, COLOR_BGRA2BGR);
}
Mat LivePortraitPipeline::landmark_runner(const Mat &img, const float *lmk)
{
std::map<string, Mat> crop_dct;
crop_image(img, lmk, 106, crop_dct, 224, 1.5, -0.1);
std::vector<int64_t> input_img_shape = {1, 3, crop_dct["img_crop"].rows, crop_dct["img_crop"].cols};
vector<cv::Mat> bgrChannels(3);
split(crop_dct["img_crop"], bgrChannels);
for (int c = 0; c < 3; c++)
{
bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
}
const int image_area = input_img_shape[2] * input_img_shape[3];
this->landmark_runner_input_tensor.clear();
this->landmark_runner_input_tensor.resize(input_img_shape[0] * input_img_shape[1] * image_area);
size_t single_chn_size = image_area * sizeof(float);
memcpy(this->landmark_runner_input_tensor.data(), (float *)bgrChannels[0].data, single_chn_size);
memcpy(this->landmark_runner_input_tensor.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
memcpy(this->landmark_runner_input_tensor.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, this->landmark_runner_input_tensor.data(), this->landmark_runner_input_tensor.size(), input_img_shape.data(), input_img_shape.size());
vector<Value> ort_outputs = this->landmark_runner_ort_session->Run(runOptions, this->landmark_runner_input_names.data(), &input_tensor, 1, this->landmark_runner_output_names.data(), this->landmark_runner_output_names.size());
float *out_pts = ort_outputs[2].GetTensorMutableData<float>();
const int num_pts = ort_outputs[2].GetTensorTypeAndShapeInfo().GetShape()[1] / 2;
Mat lmk_mat(num_pts, 2, CV_32FC1);
for (int i = 0; i < num_pts; i++)
{ btachsize=1,不考虑batchsize维度
lmk_mat.at<float>(i, 0) = out_pts[i * 2] * 224 * crop_dct["M_c2o"].at<float>(0, 0) + out_pts[i * 2 + 1] * 224 * crop_dct["M_c2o"].at<float>(0, 1) + crop_dct["M_c2o"].at<float>(0, 2);
lmk_mat.at<float>(i, 1) = out_pts[i * 2] * 224 * crop_dct["M_c2o"].at<float>(1, 0) + out_pts[i * 2 + 1] * 224 * crop_dct["M_c2o"].at<float>(1, 1) + crop_dct["M_c2o"].at<float>(1, 2);
}
return lmk_mat;
}
void LivePortraitPipeline::crop_src_image(const Mat &srcimg, std::map<string, Mat> &crop_info)
{
vector<Bbox> boxes = this->face_analysis->detect(srcimg);
if (boxes.size() == 0)
{
cout << "No face detected in the source image." << endl;
return;
}
else if (boxes.size() > 1)
{
cout << "More than one face detected in the image, only pick one face." << endl;
return;
}
Bbox src_face = boxes[0];
float *lmk = src_face.landmark_2d_106;
crop_image(srcimg, lmk, 106, crop_info, 512, 2.3, -0.125);
Mat lmk_crop = this->landmark_runner(srcimg, lmk);
crop_info["lmk_crop"] = lmk_crop;
Mat img_crop_256x256;
resize(crop_info["img_crop"], img_crop_256x256, Size(256, 256), INTER_AREA);
crop_info["img_crop_256x256"] = img_crop_256x256;
crop_info["lmk_crop_256x256"] = crop_info["lmk_crop"] * 256 / 512;
}
void LivePortraitPipeline::get_kp_info(vector<float> x, vector<int64_t> shape, std::map<string, Mat> &kp_info)
{
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, x.data(), x.size(), shape.data(), shape.size());
vector<Value> ort_outputs = this->motion_extractor_ort_session->Run(runOptions, this->motion_extractor_input_names.data(), &input_tensor, 1, this->motion_extractor_output_names.data(), this->motion_extractor_output_names.size());
pitch, yaw, roll, t, exp, scale, kp
float *pitch = ort_outputs[0].GetTensorMutableData<float>();
float *yaw = ort_outputs[1].GetTensorMutableData<float>();
float *roll = ort_outputs[2].GetTensorMutableData<float>();
float *t = ort_outputs[3].GetTensorMutableData<float>();
float *exp = ort_outputs[4].GetTensorMutableData<float>();
float *scale = ort_outputs[5].GetTensorMutableData<float>();
float *kp = ort_outputs[6].GetTensorMutableData<float>();
vector<float> pred;
softmax(pitch, this->motion_extractor_output_shape[0], pred);
const int bs = this->motion_extractor_output_shape[0][0]; batchsize=1,不考虑多图片输入
const int len = 66; 66
float sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
float degree = sum * 3 - 97.5;
kp_info["pitch"] = (Mat_<float>(1, 1) << degree);
softmax(yaw, this->motion_extractor_output_shape[1], pred);
sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
degree = sum * 3 - 97.5;
kp_info["yaw"] = (Mat_<float>(1, 1) << degree);
softmax(roll, this->motion_extractor_output_shape[2], pred);
sum = 0;
for (int i = 0; i < len; i++)
{
sum += (i * pred[i]);
}
degree = sum * 3 - 97.5;
kp_info["roll"] = (Mat_<float>(1, 1) << degree);
kp_info["t"] = Mat(1, 3, CV_32FC1, t);
vector<int> sizes = {1, 21, 3}; 由于在c++的opencv里不支持3维Mat的矩阵乘法,此处不考虑batchsize维度
vector<int> sizes = {21, 3};
kp_info["exp"] = Mat(sizes, CV_32FC1, exp);
kp_info["scale"] = Mat(1, 1, CV_32FC1, scale);
kp_info["kp"] = Mat(sizes, CV_32FC1, kp);
}
void LivePortraitPipeline::extract_feature_3d(vector<float> x, vector<int64_t> shape, vector<float> &f_s)
{
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, x.data(), x.size(), shape.data(), shape.size());
vector<Value> ort_outputs = this->appearance_feature_extractor_ort_session->Run(runOptions, this->appearance_feature_extractor_input_names.data(), &input_tensor, 1, this->appearance_feature_extractor_output_names.data(), this->appearance_feature_extractor_output_names.size());
int numel = ort_outputs[0].GetTensorTypeAndShapeInfo().GetElementCount();
float *out = ort_outputs[0].GetTensorMutableData<float>();
f_s.clear();
f_s.resize(numel);
memcpy(f_s.data(), out, numel * sizeof(float));
}
void LivePortraitPipeline::stitching(const Mat &kp_source, Mat &kp_driving_new)
{
不考虑batchsize维度
const int num_kp = kp_source.size[0];
const int numel = kp_source.total();
const int len = std::accumulate(this->stitching_module_input_shape.begin(), this->stitching_module_input_shape.end(), 1, std::multiplies<int64_t>());
vector<float> feat(len);
memcpy(feat.data(), (float *)kp_source.data, numel * sizeof(float));
memcpy(feat.data() + numel, (float *)kp_driving_new.data, (len - numel) * sizeof(float));
Value input_tensor = Value::CreateTensor<float>(memory_info_handler, feat.data(), feat.size(), this->stitching_module_input_shape.data(), this->stitching_module_input_shape.size());
vector<Value> ort_outputs = this->stitching_module_ort_session->Run(runOptions, this->stitching_module_input_names.data(), &input_tensor, 1, this->stitching_module_output_names.data(), this->stitching_module_output_names.size());
float *delta = ort_outputs[0].GetTensorMutableData<float>();
const float delta_tx_ty[2] = {delta[num_kp * 3], delta[num_kp * 3 + 1]};
for (int i = 0; i < num_kp; i++)
{
kp_driving_new.at<float>(i, 0) += delta[i * 3];
kp_driving_new.at<float>(i, 1) += delta[i * 3 + 1];
kp_driving_new.at<float>(i, 2) += delta[i * 3 + 2];
kp_driving_new.at<float>(i, 0) += delta_tx_ty[0];
kp_driving_new.at<float>(i, 1) += delta_tx_ty[1];
}
}
Mat LivePortraitPipeline::warping_spade(vector<float> feature_3d, const Mat &kp_source, const Mat &kp_driving)
{
vector<Ort::Value> inputTensors;
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, feature_3d.data(), feature_3d.size(), this->warping_spade_input_shape[0].data(), this->warping_spade_input_shape[0].size()));
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, (float *)kp_driving.data, kp_driving.total(), this->warping_spade_input_shape[1].data(), this->warping_spade_input_shape[1].size()));
inputTensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info_handler, (float *)kp_source.data, kp_source.total(), this->warping_spade_input_shape[2].data(), this->warping_spade_input_shape[2].size()));
vector<Value> ort_outputs = this->warping_spade_ort_session->Run(runOptions, this->warping_spade_input_names.data(), inputTensors.data(), inputTensors.size(), this->warping_spade_output_names.data(), this->warping_spade_output_names.size());
float *out = ort_outputs[0].GetTensorMutableData<float>();
Mat out_mat = Mat(this->warping_spade_output_shape, CV_32FC1, out);
return out_mat;
}
Mat LivePortraitPipeline::predict(const int frame_id, std::map<string, Mat> x_s_info, const Mat &R_s, vector<float> f_s, const Mat &x_s, const Mat &frame)
{
Mat lmk;
if (frame_id > 0)
{
lmk = this->landmark_runner(frame, (float*)this->pred_info_lmk.data);
}
else
{
vector<Bbox> boxes = this->face_analysis->detect(frame);
if (boxes.size() == 0)
{
cout << "No face detected in the frame." << endl;
exit(-1);
}
else if (boxes.size() > 1)
{
cout << "More than one face detected in the driving frame, only pick one face." << endl;
exit(-1);
}
Bbox src_face = boxes[0];
lmk = this->landmark_runner(frame, src_face.landmark_2d_106);
}
lmk.copyTo(this->pred_info_lmk);
float c_d_eyes[2] = {calculate_distance_ratio(lmk, 6, 18, 0, 12), calculate_distance_ratio(lmk, 30, 42, 24, 36)};
float c_d_lip = calculate_distance_ratio(lmk, 90, 102, 48, 66);
Mat img;
resize(frame, img, Size(256, 256));
vector<float> I_d;
vector<int64_t> I_d_shape;
preprocess(img, I_d, I_d_shape);
std::map<string, Mat> x_d_info;
this->get_kp_info(I_d, I_d_shape, x_d_info);
Mat R_d = get_rotation_matrix(x_d_info["pitch"], x_d_info["yaw"], x_d_info["roll"]);
x_d_info["R_d"] = R_d;
x_d_info.erase("pitch");
x_d_info.erase("yaw");
x_d_info.erase("roll");
x_d_info.erase("kp");
if (frame_id == 0)
{
this->pred_info_x_d_0_info["scale"] = x_d_info["scale"].clone(); ///也可以定义结构体的方式打包参数
this->pred_info_x_d_0_info["R_d"] = R_d.clone();
this->pred_info_x_d_0_info["exp"] = x_d_info["exp"].clone();
this->pred_info_x_d_0_info["t"] = x_d_info["t"].clone();
}
Mat R_new = (R_d * this->pred_info_x_d_0_info["R_d"].t()) * R_s;
Mat delta_new = x_s_info["exp"] + (x_d_info["exp"] - this->pred_info_x_d_0_info["exp"]);
Mat scale_new = x_s_info["scale"].mul(x_d_info["scale"] / this->pred_info_x_d_0_info["scale"]); /// scale是1x1矩阵,也就是单个数值
Mat t_new = x_s_info["t"] + (x_d_info["t"] - this->pred_info_x_d_0_info["t"]);
t_new.at<float>(0, 2) = 0;
Mat temp = repeat(t_new, 21, 1);
Mat x_d_new = scale_new.at<float>(0, 0) * (x_s_info["kp"] * R_new + delta_new) + temp;
this->stitching(x_s, x_d_new);
Mat out = this->warping_spade(f_s, x_s, x_d_new); 形状是[1,3,512,512]
const int image_erea = out.size[2] * out.size[3];
float *pdata = (float *)out.data;
Mat rmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata);
Mat gmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata + image_erea);
Mat bmat = Mat(out.size[2], out.size[3], CV_32FC1, pdata + image_erea * 2);
rmat.setTo(0, rmat < 0);
rmat.setTo(1, rmat > 1);
gmat.setTo(0, gmat < 0);
gmat.setTo(1, gmat > 1);
bmat.setTo(0, bmat < 0);
bmat.setTo(1, bmat > 1);
vector<Mat> channel_mats(3);
channel_mats[0] = rmat;
channel_mats[1] = gmat;
channel_mats[2] = bmat;
Mat I_p;
merge(channel_mats, I_p);
I_p *= 255;
I_p.convertTo(I_p, CV_8UC3);
return I_p;
}
int LivePortraitPipeline::execute(string imgpath, string videopath)
{
Mat srcimg = imread(imgpath);
if (srcimg.empty())
{
cout << "opencv读取图片为空, 请检查输入图片的路径" << endl;
return -1;
}
Mat img;
cvtColor(srcimg, img, COLOR_BGRA2BGR);
cvtColor(img, img, COLOR_BGR2RGB);
Mat src_img = src_preprocess(img);
std::map<string, Mat> crop_info;
crop_src_image(src_img, crop_info);
Mat img_crop_256x256 = crop_info["img_crop_256x256"];
vector<float> I_s;
vector<int64_t> I_s_shape;
preprocess(img_crop_256x256, I_s, I_s_shape);
std::map<string, Mat> x_s_info;
this->get_kp_info(I_s, I_s_shape, x_s_info);
Mat R_s = get_rotation_matrix(x_s_info["pitch"], x_s_info["yaw"], x_s_info["roll"]); 返回结果已验证通过
vector<float> f_s;
this->extract_feature_3d(I_s, I_s_shape, f_s);
Mat x_s = transform_keypoint(x_s_info);
cv::VideoCapture capture(videopath);
if (!capture.isOpened())
{
cout << "VideoCapture,open video file failed, " << videopath << endl;
return -1;
}
const int fps = capture.get(cv::CAP_PROP_FPS);
const int video_length = capture.get(cv::CAP_PROP_FRAME_COUNT);
cout<<"video total have "<<video_length<<" frames"<<endl;
int f_h = src_img.rows;
int f_w = src_img.cols;
if (this->flg_composite)
{
f_h = 512;
f_w = 512 * 3;
}
prepare for pasteback
Mat mask_ori = prepare_paste_back(this->mask_crop, crop_info["M_c2o"], Size(src_img.cols, src_img.rows));
VideoWriter video_writer;
video_writer.open("output.mp4", cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, Size(f_w, f_h));
Mat frame;
int frame_id = 0;
while (capture.read(frame))
{
if (frame.empty())
{
break;
}
Mat img_rgb;
cvtColor(frame, img_rgb, COLOR_BGR2RGB);
auto a = std::chrono::high_resolution_clock::now();
Mat I_p = this->predict(frame_id, x_s_info, R_s, f_s, x_s, img_rgb);
auto b = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> c = b - a;
cout<<"frame_id="<<frame_id<<", predict waste time="<<to_string(c.count())<<" s"<<endl;
frame_id += 1;
Mat driving_img;
if (this->flg_composite)
{
concat_frame(img_rgb, img_crop_256x256, I_p, driving_img);
}
else
{
paste_back(I_p, crop_info["M_c2o"], src_img, mask_ori, driving_img);
}
cvtColor(driving_img, driving_img, COLOR_RGB2BGR);
video_writer.write(driving_img);
}
video_writer.release();
capture.release();
///destroyAllWindows();
return 0;
}
faceanalysis.cpp
#include "faceanalysis.h"
using namespace cv;
using namespace std;
using namespace Ort;
FaceAnalysis::FaceAnalysis(string model_patha, string model_pathb)
{
/// OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///如果使用cuda加速,需要取消注释
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
/// std::wstring widestr = std::wstring(model_patha.begin(), model_patha.end()); windows写法
/// ort_session = new Session(env, widestr.c_str(), sessionOptions); windows写法
det_face_ort_session = new Session(env, model_patha.c_str(), sessionOptions); linux写法
size_t numInputNodes = det_face_ort_session->GetInputCount();
size_t numOutputNodes = det_face_ort_session->GetOutputCount();
AllocatorWithDefaultOptions allocator;
for (int i = 0; i < numOutputNodes; i++)
{
Ort::TypeInfo output_type_info = det_face_ort_session->GetOutputTypeInfo(i);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
det_face_output_node_dims.push_back(output_dims);
}
/// std::wstring widestr = std::wstring(model_pathb.begin(), model_pathb.end()); windows写法
/// ort_session = new Session(env, widestr.c_str(), sessionOptions); windows写法
landmark_ort_session = new Session(env, model_pathb.c_str(), sessionOptions); linux写法
numInputNodes = landmark_ort_session->GetInputCount();
numOutputNodes = landmark_ort_session->GetOutputCount();
Ort::TypeInfo input_type_info = landmark_ort_session->GetInputTypeInfo(0);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
this->landmark_input_height = input_dims[2];
this->landmark_input_width = input_dims[3];
this->landmark_input_tensor_shape = { 1, 3, this->landmark_input_height, this->landmark_input_width };
Ort::TypeInfo output_type_info = landmark_ort_session->GetOutputTypeInfo(0);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
landmark_output_node_dims.push_back(output_dims);
}
void FaceAnalysis::preprocess(Mat srcimg)
{
const float im_ratio = float(srcimg.rows) / (float)srcimg.cols;
int new_width = this->input_size;
int new_height = int(new_width * im_ratio);
if(im_ratio>1)
{
new_height = this->input_size;
new_width = int(new_height / im_ratio);
}
this->det_scale = float(new_height) / (float)srcimg.rows;
Mat resized_img;
resize(srcimg, resized_img, Size(new_width, new_height));
Mat det_img;
copyMakeBorder(resized_img, det_img, 0, this->input_size - new_height, 0, this->input_size - new_width, BORDER_CONSTANT, 0);
vector<cv::Mat> bgrChannels(3);
split(det_img, bgrChannels);
for (int c = 0; c < 3; c++)
{
bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 128.0, -127.5 / 128.0);
}
const int image_area = this->input_size * this->input_size;
this->input_image.resize(3 * image_area);
size_t single_chn_size = image_area * sizeof(float);
memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
}
void FaceAnalysis::generate_proposal(const float* p_box, const float* p_scores, const float* p_kps, const int stride, vector<Bbox>& boxes)
{
const int feat_h = this->input_size / stride;
const int feat_w = this->input_size / stride;
const int num_anchors = 2;
for (int i = 0; i < feat_h; i++)
{
for (int j = 0; j < feat_w; j++)
{
for(int n=0; n<num_anchors; n++)
{
const int index = i * feat_w*num_anchors + j*num_anchors+n;
if(p_scores[index] >= this->det_thresh)
{
Bbox box;
box.xmin = (j - p_box[index * 4]) * stride;
box.ymin = (i - p_box[index * 4 + 1]) * stride;
box.xmax = (j + p_box[index * 4 + 2]) * stride;
box.ymax = (i + p_box[index * 4 + 3]) * stride;
box.xmin /= this->det_scale;
box.ymin /= this->det_scale;
box.xmax /= this->det_scale;
box.ymax /= this->det_scale;
for(int k=0;k<5;k++)
{
float px = (j + p_kps[index * 10 + k * 2]) * stride;
float py = (i + p_kps[index * 10 + k * 2 + 1]) * stride;
px /= this->det_scale;
py /= this->det_scale;
box.kps[k * 2] = px;
box.kps[k * 2 + 1] = py;
}
box.score = p_scores[index];
boxes.emplace_back(box);
}
}
}
}
}
bool cmp(Bbox a, Bbox b)
{
float area_a = (a.xmax - a.xmin) * (a.ymax - a.ymin);
float area_b = (b.xmax - b.xmin) * (b.ymax - b.ymin);
return area_a > area_b;
}
vector<Bbox> FaceAnalysis::detect(const Mat& srcimg)
{
this->preprocess(srcimg);
std::vector<int64_t> input_img_shape = {1, 3, this->input_size, this->input_size}; ///也可以写在构造函数里, det_face的输入是动态的
Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
vector<Value> det_face_ort_outputs = this->det_face_ort_session->Run(runOptions, this->det_face_input_names.data(), &input_tensor_, 1, this->det_face_output_names.data(), this->det_face_output_names.size());
vector<Bbox> boxes;
for(int i=0;i<3;i++)
{
float *p_scores = det_face_ort_outputs[i].GetTensorMutableData<float>();
float *p_bbox = det_face_ort_outputs[i + this->fmc].GetTensorMutableData<float>();
float *p_kps = det_face_ort_outputs[i + this->fmc*2].GetTensorMutableData<float>();
this->generate_proposal(p_bbox, p_scores, p_kps, this->feat_stride_fpn[i], boxes);
}
nms_boxes(boxes, this->nms_thresh);
for(int i=0;i<boxes.size();i++)
{
get_landmark
float w = boxes[i].xmax - boxes[i].xmin;
float h = boxes[i].ymax - boxes[i].ymin;
float center[2] = {(boxes[i].xmin + boxes[i].xmax) * 0.5f, (boxes[i].ymin + boxes[i].ymax) * 0.5f};
float rot = 0.f*PI/180.f;
float scale_ratio = this->landmark_input_size / (max(w, h) * 1.5);
face_align
Mat M = (Mat_<float>(2, 3) << scale_ratio*cos(rot), -scale_ratio*sin(rot), this->landmark_input_size*0.5-center[0]*scale_ratio, scale_ratio*sin(rot), scale_ratio*cos(rot), this->landmark_input_size*0.5-center[1]*scale_ratio);
Mat cropped;
warpAffine(srcimg, cropped, M, cv::Size(this->landmark_input_size, this->landmark_input_size));
face_align
vector<cv::Mat> bgrChannels(3);
split(cropped, bgrChannels);
for (int c = 0; c < 3; c++)
{
bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1);
}
const int image_area = this->landmark_input_size * this->landmark_input_size;
this->aimg.resize(3 * image_area);
size_t single_chn_size = image_area * sizeof(float);
memcpy(this->aimg.data(), (float *)bgrChannels[0].data, single_chn_size);
memcpy(this->aimg.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
memcpy(this->aimg.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
Value input_tensor2 = Value::CreateTensor<float>(memory_info_handler, this->aimg.data(), this->aimg.size(), this->landmark_input_tensor_shape.data(), this->landmark_input_tensor_shape.size());
vector<Value> landmark_ort_outputs = this->landmark_ort_session->Run(runOptions, this->landmark_input_names.data(), &input_tensor2, 1, this->landmark_output_names.data(), this->landmark_output_names.size());
float *p_landmark = landmark_ort_outputs[0].GetTensorMutableData<float>();
Mat IM;
invertAffineTransform(M, IM);
for(int k=0;k<106;k++)
{
float px = (p_landmark[k * 2] + 1) * this->landmark_input_size*0.5;
float py = (p_landmark[k * 2 + 1] + 1) * this->landmark_input_size*0.5;
boxes[i].landmark_2d_106[k * 2] = IM.at<float>(0, 0) * px + IM.at<float>(0, 1) * py + IM.at<float>(0, 2);
boxes[i].landmark_2d_106[k * 2 + 1] = IM.at<float>(1, 0) * px + IM.at<float>(1, 1) * py + IM.at<float>(1, 2);
}
get_landmark
}
sort(boxes.begin(), boxes.end(), cmp);
return boxes;
}