人脸到动漫脸的转换与控制

举报
HWCloudAI 发表于 2022/12/01 14:28:43 2022/12/01
【摘要】 人脸到动漫脸的转换与控制一种稳定、可控、多样化的图像到图像转换(也适用于视频!)使用方法:点击上方菜单,选择 Run(运行) - Run All Cells(运行所有) 第一步 准备代码环境import os! wget https://obs-aigallery-zc.obs.cn-north-4.myhuaweicloud.com/clf/code/GANsNRoses.zipos.s...

人脸到动漫脸的转换与控制

一种稳定、可控、多样化的图像到图像转换(也适用于视频!)

使用方法:点击上方菜单,选择 Run(运行) - Run All Cells(运行所有)

第一步 准备代码环境

import os
! wget https://obs-aigallery-zc.obs.cn-north-4.myhuaweicloud.com/clf/code/GANsNRoses.zip
os.system('unzip GANsNRoses.zip')
os.chdir('GANsNRoses')
--2021-08-10 17:58:22--  https://obs-aigallery-zc.obs.cn-north-4.myhuaweicloud.com/clf/code/GANsNRoses.zip

Resolving proxy-notebook.modelarts-dev-proxy.com (proxy-notebook.modelarts-dev-proxy.com)... 192.168.0.172

Connecting to proxy-notebook.modelarts-dev-proxy.com (proxy-notebook.modelarts-dev-proxy.com)|192.168.0.172|:8083... connected.

Proxy request sent, awaiting response... 200 OK

Length: 598533436 (571M) [application/zip]

Saving to: ‘GANsNRoses.zip.1’



GANsNRoses.zip.1    100%[===================>] 570.81M   210MB/s    in 2.7s    



2021-08-10 17:58:26 (210 MB/s) - ‘GANsNRoses.zip.1’ saved [598533436/598533436]
%cd GANsNRoses
! pip uninstall -y torch torchvision
! pip install torchvision==0.9
! pip install tqdm kornia scipy opencv-python dlib lpips aubio ffmpeg
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils import data
from torchvision import transforms, utils
from tqdm import tqdm
torch.backends.cudnn.benchmark = True
import copy
from util import *
from PIL import Image

from model import *
import scipy
import cv2
import dlib
import kornia.augmentation as K
from aubio import tempo, source

from IPython.display import HTML
from base64 import b64encode

第二步 设置参数

device = 'cuda'
latent_dim = 8
n_mlp = 5
num_down = 3

G_A2B = Generator(256, 4, latent_dim, n_mlp, channel_multiplier=1, lr_mlp=.01,n_res=1).to(device).eval()

ensure_checkpoint_exists('../model/GNR_checkpoint.pt')
ckpt = torch.load('../model/GNR_checkpoint.pt', map_location=device)

G_A2B.load_state_dict(ckpt['G_A2B_ema'])

# mean latent
truncation = 1
with torch.no_grad():
    mean_style = G_A2B.mapping(torch.randn([1000, latent_dim]).to(device)).mean(0, keepdim=True)


test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), inplace=True)
])
/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

  and should_run_async(code)

第三步 人脸到动漫脸的转换

%matplotlib inline
plt.rcParams['figure.dpi'] = 200

torch.manual_seed(84986)

num_styles = 5
style = torch.randn([num_styles, latent_dim]).to(device)


real_A = Image.open('./samples/margot_robbie.jpg')
real_A = test_transform(real_A).unsqueeze(0).to(device)

with torch.no_grad():
    A2B_content, _ = G_A2B.encode(real_A)
    fake_A2B = G_A2B.decode(A2B_content.repeat(num_styles,1,1,1), style)
    A2B = torch.cat([real_A, fake_A2B], 0)

display_image(utils.make_grid(A2B.cpu(), normalize=True, range=(-1, 1), nrow=10)) # 展示转换前后图片
/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

  and should_run_async(code)

/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/torchvision/utils.py:50: UserWarning: range will be deprecated, please use value_range instead.

  warnings.warn(warning)

image.png

第四步 风格插值

人脸转换到动漫脸后,还可以在不同风格之间平滑地插值

torch.manual_seed(13421)

real_A = Image.open('./samples/female_12427.jpg')
real_A = test_transform(real_A).unsqueeze(0).to(device)

style1 = G_A2B.mapping(torch.randn([1, latent_dim]).to(device)) # 风格1
style2 = G_A2B.mapping(torch.randn([1, latent_dim]).to(device)) # 风格2

with torch.no_grad():
    A2B = []
    A2B_content, _ = G_A2B.encode(real_A)
    for i in np.linspace(0,1,5):
        new_style = i*style1 + (1-i)*style2
        fake_A2B = G_A2B.decode(A2B_content, new_style, use_mapping=False)
        A2B.append(torch.cat([fake_A2B], 0))
    A2B = torch.cat([real_A] + A2B, 0)

display_image(utils.make_grid(A2B.cpu(), normalize=True, range=(-1, 1), nrow=10)) # 展示风格插值图片
/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

  and should_run_async(code)

image.png

第五步 基于SeFA的动漫脸风格编辑

人脸转换到动漫脸后,还可以用SeFA (https://arxiv.org/pdf/2007.06600.pdf) 找到可用于风格编辑的潜在方向

modulate = {
    k: v
    for k, v in ckpt["G_A2B_ema"].items()
    if "modulation" in k and "to_rgbs" not in k and "weight" in k
}

weight_mat = []
for k, v in modulate.items():
    weight_mat.append(v)

W = torch.cat(weight_mat, 0)
eigvec = torch.svd(W).V.to("cpu")

plt.rcParams['figure.dpi'] = 200

real_A = Image.open('./samples/female_11025.jpg')
real_A = test_transform(real_A).unsqueeze(0).to(device)


eig_idx = 2 # 特征向量的索引
eig_scale = 4 # 特征向量的长度规模

style = G_A2B.mapping(torch.randn([1, latent_dim]).to(device))
direction = eig_scale * eigvec[:, eig_idx].unsqueeze(0).to(device)


with torch.no_grad():
    A2B_content, _ = G_A2B.encode(real_A)
    fake_A2B = G_A2B.decode(A2B_content, style, use_mapping=False)
    fake_A2B2 = G_A2B.decode(A2B_content, style+direction, use_mapping=False)

display_image(utils.make_grid(torch.cat([real_A, fake_A2B, fake_A2B2], 0).cpu(), normalize=True, range=(-1, 1)))
/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

  and should_run_async(code)

image.png

第六步 人脸到动漫脸的视频转换

给定一个输入的人脸视频,转换为具备不同风格动漫脸的视频网格

视频生成有4个选项:

  • normal 每个网格都有不同的静态风格。
  • blend 每个网格都有一种随时间插值的风格。
  • beat 每个格子的风格都根据音乐的节拍而变化。
  • eig 每个网格的风格都根据音乐的节拍用特征向量改变。
# 输入视频
inpath = './samples/tiktok.mp4'
outpath = './samples/output.mp4'



mode = 'beat'
assert mode in ('normal', 'blend', 'beat', 'eig')


# 输出视频参数设置
start_frame=0
end_frame=None
frame_num = 0
mp4_fps= 30
faces = None
smoothing_sec=.7
eig_dir_idx = 1 

frames = []
reader = cv2.VideoCapture(inpath)
num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))

# 语音参数设置
win_s = 512                 
hop_s = win_s // 2          

s = source(inpath, 0, hop_s)
samplerate = s.samplerate
o = tempo("default", win_s, hop_s, samplerate)
delay = 4. * hop_s
# list of beats, in samples
beats = []

# 按帧读取
total_frames = 0
while True:
    samples, read = s()
    is_beat = o(samples)
    if is_beat:
        this_beat = int(total_frames - delay + is_beat[0] * hop_s)
        beats.append(this_beat/ float(samplerate))
    total_frames += read
    if read < hop_s: break
#print len(beats)
beats = [math.ceil(i*mp4_fps) for i in beats]


if mode == 'blend':
    shape = [num_frames, 8, latent_dim] # [frame, image, channel, component]
    all_latents = random_state.randn(*shape).astype(np.float32)
    all_latents = scipy.ndimage.gaussian_filter(all_latents, [smoothing_sec * mp4_fps, 0, 0], mode='wrap')
    all_latents /= np.sqrt(np.mean(np.square(all_latents)))
    all_latents = torch.from_numpy(all_latents).cuda()
else:
    all_latents = torch.randn([8, latent_dim]).cuda()
    
if mode == 'eig':
    all_latents = G_A2B.mapping(all_latents)
    
in_latent = all_latents

# Face detector
face_detector = dlib.get_frontal_face_detector()

assert start_frame < num_frames - 1
end_frame = end_frame if end_frame else num_frames

while reader.isOpened():
    _, image = reader.read()
    if image is None:
        break

    if frame_num < start_frame:
        continue
    # Image size
    height, width = image.shape[:2]

    # 2. Detect with dlib
    if faces is None:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray, 1)
    if len(faces):
        # For now only take biggest face
        face = faces[0]

    # --- 预测过程 ---------------------------------------------------
    x, y, size = get_boundingbox(face, width, height)
    cropped_face = image[y:y+size, x:x+size]
    cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_BGR2RGB)
    cropped_face = Image.fromarray(cropped_face)
    frame = test_transform(cropped_face).unsqueeze(0).cuda()

    with torch.no_grad():
        A2B_content, A2B_style = G_A2B.encode(frame)
        if mode == 'blend':
            in_latent = all_latents[frame_num]
        elif mode == 'normal':
            in_latent = all_latents
        elif mode == 'beat':
            if frame_num in beats:
                in_latent = torch.randn([8, latent_dim]).cuda()
        
        if mode == 'eig':
            if frame_num in beats:
                direction = 3 * eigvec[:, eig_dir_idx].unsqueeze(0).expand_as(all_latents).cuda()
                in_latent = all_latents + direction
                eig_dir_idx += 1
                
            fake_A2B = G_A2B.decode(A2B_content.repeat(8,1,1,1), in_latent, use_mapping=False)
        else:
            fake_A2B = G_A2B.decode(A2B_content.repeat(8,1,1,1), in_latent)

        
        
        fake_A2B = torch.cat([fake_A2B[:4], frame, fake_A2B[4:]], 0)

        fake_A2B = utils.make_grid(fake_A2B.cpu(), normalize=True, range=(-1, 1), nrow=3)

    fake_A2B = fake_A2B.permute(1,2,0).cpu().numpy()
    frames.append(fake_A2B*255)

    frame_num += 1
    
    os.makedirs('../results', exist_ok=True)

    # 存储图片
    img = np.uint8(fake_A2B*255)
    Image.fromarray(img).save(f"../results/{frame_num:04d}.png")



# 生成视频
!ffmpeg -y -r {mp4_fps} -i ../results/%04d.png -c:v libx264 -vf fps={mp4_fps} -pix_fmt yuv420p ./temp.mp4
!ffmpeg -i ./temp.mp4 -i $inpath -c copy -map 0:v:0 -map 1:a:0 $outpath -y
!rm ./temp.mp4
# 展示视频
mp4 = open(outpath,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)
/home/ma-user/anaconda3/envs/PyTorch-1.4/lib/python3.7/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

  and should_run_async(code)
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。