Carve/LaMa-ONNX · Get very weird image

cs1976

Jun 17

I tried with lama_fp32.onnx . It generated very weird image (like some mask image, the similar code works with libtorch and big-lama.pt). Would you please share your code to test the onnx?

Mat get_image(string filename, bool toGray, int pad, cv::Size new_size) {
cv::Mat matImage;
cv::Mat img_float32;
if (toGray) {
matImage = cv::imread(filename, cv::IMREAD_GRAYSCALE);
cv::Mat mask;
if (new_size.width != 0 && new_size.height != 0)
cv::resize(matImage, matImage, new_size);

    matImage.convertTo(mask, CV_32FC1, 1.0 / 255.0);
    cv::Mat mask_float;
    cv::threshold(mask, mask_float, 0, 1, cv::THRESH_BINARY);
    mask_float.convertTo(img_float32, CV_32FC1);
}
else {
    matImage = cv::imread(filename, cv::IMREAD_COLOR);
    matImage.convertTo(img_float32, CV_32FC3, 1.0 / 255.0);
}
int width = img_float32.cols;
int height = img_float32.rows;

if (pad > 0)
{
    int out_height = ceil_modulo(height, pad);
    int out_width = ceil_modulo(width, pad);

    if (out_height != height || out_width != width) {
        cv::Mat img_padded;
        cv::copyMakeBorder(
            img_float32,
            img_padded,
            0,
            out_height - height,
            0,
            out_width - width,
            cv::BORDER_CONSTANT
        );

        return img_padded;
    }
}

return img_float32;

}

void Biglama::execute(string input_img, string mask_img,string output_img)
{
//load image

const int LAMA_INPUT_SIZE = 512;
cv::Mat matImage = get_image(input_img, false, 8, cv::Size(0, 0));
cv::Mat matMask = get_image(mask_img, true, 8, cv::Size(LAMA_INPUT_SIZE, LAMA_INPUT_SIZE));

int Orgheight = matImage.rows;
int Orgwidth = matImage.cols;

cv::resize(matImage, matImage, cv::Size(LAMA_INPUT_SIZE, LAMA_INPUT_SIZE));
//cv::resize(matMask, matMask, cv::Size(LAMA_INPUT_SIZE, LAMA_INPUT_SIZE));

int channels = matImage.channels();
int stride = matImage.step[0];
cout << channels << stride;

vector<cv::Mat> bgrChannels(3);
split(matImage, bgrChannels);
int image_area = LAMA_INPUT_SIZE * LAMA_INPUT_SIZE;
this->input_image.resize(3 * image_area);
size_t single_chn_size = image_area * sizeof(float);
//rgb order
memcpy(this->input_image.data(), (float*)bgrChannels[2].data, single_chn_size);
memcpy(this->input_image.data() + image_area, (float*)bgrChannels[1].data, single_chn_size);
memcpy(this->input_image.data() + image_area * 2, (float*)bgrChannels[0].data, single_chn_size);

std::vector<Ort::Value> inputs_tensor;
std::vector<int64_t> input_img_shape = { 1, 3, 512, 512 };
inputs_tensor.emplace_back(Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()));

std::vector<float> mask_image;
image_area = LAMA_INPUT_SIZE * LAMA_INPUT_SIZE;
mask_image.resize(image_area);
single_chn_size = image_area * sizeof(float);
memcpy(mask_image.data(), (float*)matMask.data, single_chn_size);
std::fill(mask_image.begin(), mask_image.end(), 0.0f);

std::vector<int64_t> mask_img_shape = { 1, 1, LAMA_INPUT_SIZE, LAMA_INPUT_SIZE };
inputs_tensor.emplace_back(Value::CreateTensor<float>(memory_info_handler, mask_image.data(), mask_image.size(), mask_img_shape.data(), mask_img_shape.size()));

Ort::RunOptions runOptions;
vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), inputs_tensor.data(), inputs_tensor.size(), this->output_names.data(), output_names.size());

float* pdata = ort_outputs[0].GetTensorMutableData<float>(); 
std::vector<int64_t> outs_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
const int out_h = outs_shape[2];
const int out_w = outs_shape[3];
const int channel_step = out_h * out_w;
Mat rmat(out_h, out_w, CV_32FC1, pdata);
Mat gmat(out_h, out_w, CV_32FC1, pdata + channel_step);
Mat bmat(out_h, out_w, CV_32FC1, pdata + 2 * channel_step);
//Mat rmat = bgrChannels[2].clone();
//Mat gmat = bgrChannels[1].clone();
//Mat bmat = bgrChannels[0].clone();

rmat *= 255.f;
gmat *= 255.f;
bmat *= 255.f;
rmat.setTo(0, rmat < 0);
rmat.setTo(255, rmat > 255);
gmat.setTo(0, gmat < 0);
gmat.setTo(255, gmat > 255);
bmat.setTo(0, bmat < 0);
bmat.setTo(255, bmat > 255);

vector<Mat> channel_mats(3);
channel_mats[0] = bmat;
channel_mats[1] = gmat;
channel_mats[2] = rmat;
Mat matOutput;
merge(channel_mats, matOutput);
cv::imwrite(output_img, matOutput);
matOutput.convertTo(matOutput, CV_8UC3);

cv::resize(matOutput, matOutput, cv::Size(Orgwidth, Orgheight));
cv::imwrite(output_img, matOutput);

}

anodev

Carve org Jun 17

This Jupyter Notebook is used for testing this model. This HF Space is also work with the model.
https://huggingface.co/spaces/Carve/LaMa-Demo-ONNX
https://colab.research.google.com/github/Carve-Photos/lama/blob/main/export_LaMa_to_onnx.ipynb

anodev

Carve org Jun 17

This model works perfectly for us. Please check your code.

cs1976

Jun 17

OK, Thank you very much for your reply , I will try to reproduce the result with the python code first. And will report back if there is any progress.

best regards

cs1976

Jun 18

I found the problem. onnx model unlike pytorch model it directly outputs image*255, So no need to multiply the output with 255 (pytorch will output image in 0-1 float data). I fixed the problem and can output correct inpaint image.

best regards

anodev changed discussion status to closed Jun 18