Audio2Face简介
在元宇宙的热潮下,为了让AI数字人渗透到更多的领域中,FACEGOOD已经将语音驱动口型的算法技术开源,开源地址:
https://github.com/FACEGOOD/FACEGOOD-Audio2Face
该技术可以实时将音频数据转换为驱动数字人面部BlendShape的权重数据,不同于ARKit中的52个BlendShape,它的数量多达116个,我们可以通过对应关系得到相应的数值,对应关系如下:
ARKit |
Voice2Face |
eyeBlinkLeft |
eye_blink2_l |
eyeLookDownLeft |
eye_lookDown2_l |
eyeLookInLeft |
eye_lookRight_l |
eyeLookOutLeft |
eye_lookLeft_l |
eyeLookUpLeft |
eye_lookUp_l |
eyeSquintLeft |
eye_shutTight_l |
eyeWideLeft |
max(eye_downLidRaise_l,eye_upLidRaise_l) |
eyeBlinkRight |
eye_blink2_r |
eyeLookDownRight |
eye_lookDown2_r |
eyeLookInRight |
eye_lookRight_r |
eyeLookOutRight |
eye_lookLeft_r |
eyeLookUpRight |
eye_lookUp_r |
eyeSquintRight |
eye_shutTight_r |
eyeWideRight |
max(eye_downLidRaise_r,eye_upLidRaise_r) |
jawForward |
jaw_thrust_c |
jawLeft |
jaw_sideways_l |
jawRight |
jaw_sideways_r |
jawOpen |
mouth_stretch_c |
mouthClose |
mouth_chew_c |
mouthFunnel |
max(mouth_funnel_dl,mouth_funnel_dr,mouth_funnel_ul,mouth_funnel_ur) |
mouthPucker |
max(mouth_pucker_l,mouth_pucker_r) |
mouthLeft |
mouth_sideways_l |
mouthRight |
mouth_sideways_r |
mouthSmileLeft |
mouth_lipCornerPull_l |
mouthSmileRight |
mouth_lipCornerPull_r |
mouthFrownLeft |
max(mouth_lipCornerDepress_l,mouth_lipCornerDepressFix_l) |
mouthFrownRight |
max(mouth_lipCornerDepress_r,mouth_lipCornerDepressFix_r) |
mouthDimpleLeft |
mouth_dimple_l |
mouthDimpleRight |
mouth_dimple_r |
mouthStretchLeft |
mouth_lipStretch_l |
mouthStretchRight |
mouth_lipStretch_r |
mouthRollLower |
max(mouth_suck_dl,mouth_suck_dr) |
mouthRollUpper |
max(mouth_suck_ul,mouth_suck_ur) |
mouthShrugLower |
mouth_chinRaise_d |
mouthShrugUpper |
mouth_chinRaise_u |
mouthPressLeft |
mouth_press_l |
mouthPressRight |
mouth_press_r |
mouthLowerDownLeft |
mouth_lowerLipDepress_l |
mouthLowerDownRight |
mouth_lowerLipDepress_r |
mouthUpperUpLeft |
mouth_upperLipRaise_l |
mouthUpperUpRight |
mouth_upperLipRaise_r |
browDownleft |
brow_lower_l |
browDownRight |
brow_lower_r |
browInnerUp |
brow_raise_c |
browOuterUpLeft |
brow_raise_l |
browOuterUpRight |
brow_raise_r |
cheekPuff |
max(cheek_puff_l,cheek_puff_r) |
cheekSquintLeft |
cheek_up |
cheekSquintRight |
cheek_up |
noseSneerLeft |
nose_out_l |
noseSneerRight |
nose_out_r |
tongueOut |
|
生产的数据结果如下图所示,可见是116个取值范围为-1~1的小数:
这116个数值依次对应下面116个BlendShape名称:
在Unity中应用
可以用过构建python服务,Unity客户端开启麦克风录制音频,将音频数据发送给python服务端,服务端转换为驱动BlendShape的权重数据后,返回给Unity客户端进行驱动。需要注意的是Unity中BlendShape的权重范围并不是[-1,1],因此需要进行映射。
例如:
下面是一段测试音频产生的bs权重数据文件,每一行包含116个权重数值,我们拿来进行测试,将其放到StreamingAssets文件夹下。
测试模型:
测试代码:
using System.IO;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class TEST : MonoBehaviour
{
private Coroutine coroutine;
private SkinnedMeshRenderer smr;
private readonly List<List<float>> valueList = new List<List<float>>();
private IEnumerator Start()
{
smr = GetComponent<SkinnedMeshRenderer>();
string path = Path.Combine(Application.streamingAssetsPath, "weight.txt");
using (StreamReader streamReader = new StreamReader(path))
{
string content;
while ((content = streamReader.ReadLine()) != null)
{
List<float> list = new List<float>();
content = content.Trim();
string[] splitArray = content.Split(' ');
for (int i = 0; i < splitArray.Length; i++)
{
float.TryParse(splitArray[i], out float result);
list.Add(result);
}
valueList.Add(list);
yield return null;
}
}
}
private IEnumerator ExecuteCoroutine()
{
for (int i = 0; i < valueList.Count; i++)
{
List<float> list = valueList[i];
smr.SetBlendShapeWeight(0, Remap(list[49])); //brow_raise_l
smr.SetBlendShapeWeight(1, Remap(list[60])); //brow_raise_r
smr.SetBlendShapeWeight(2, Remap(list[25])); //eye_shutTight_l
smr.SetBlendShapeWeight(3, Remap(list[26])); //eye_shutTight_r
smr.SetBlendShapeWeight(4, Remap(list[87])); //eye_lookRight_l
smr.SetBlendShapeWeight(5, Remap(list[86])); //eye_lookLeft_l
smr.SetBlendShapeWeight(6, Remap(list[92])); //eye_lookRight_r
smr.SetBlendShapeWeight(7, Remap(list[91])); //eye_lookLeft_r
smr.SetBlendShapeWeight(8, Remap(list[88])); //eye_lookUp_l
smr.SetBlendShapeWeight(9, Remap(list[94])); //eye_lookUp_r
smr.SetBlendShapeWeight(10, Remap(list[85])); //eye_lookDown2_l
smr.SetBlendShapeWeight(11, Remap(list[90])); //eye_lookDown2_r
smr.SetBlendShapeWeight(12, Mathf.Max(Remap(list[71]), Remap(list[82]))); //cheek_pull_l cheek_pull_r
smr.SetBlendShapeWeight(13, Remap(list[18])); //cheek_UP
smr.SetBlendShapeWeight(14, Remap(list[18])); //cheek_UP
smr.SetBlendShapeWeight(15, Remap(list[6])); //nose_out_l
smr.SetBlendShapeWeight(16, Remap(list[7])); //nose_out_r
smr.SetBlendShapeWeight(17, Remap(list[70])); //mouth_sideways_l
smr.SetBlendShapeWeight(18, Remap(list[72])); //mouth_sideways_r
smr.SetBlendShapeWeight(19, Mathf.Max(Remap(list[67]), Remap(list[68]))); //mouth_pucker_l mouth_pucker_2
smr.SetBlendShapeWeight(20, Mathf.Max(Remap(list[41]), Remap(list[42]), Remap(list[43]), Remap(list[44]))); //mouth_funnel_dl dr ul ur
smr.SetBlendShapeWeight(21, Remap(list[52])); //mouth_lipCornerPull_l
smr.SetBlendShapeWeight(22, Remap(list[53])); //mouth_lipCornerPull_r
smr.SetBlendShapeWeight(23, Mathf.Max(Remap(list[47]), Remap(list[45]))); //mouth_lipCornerDepress_l mouth_lipCornerDepressFix_l
smr.SetBlendShapeWeight(24, Mathf.Max(Remap(list[48]), Remap(list[46]))); //mouth_lipCornerDepress_r mouth_lipCornerDepressFix_r
smr.SetBlendShapeWeight(25, Remap(list[39])); //mouth_dimple_l
smr.SetBlendShapeWeight(26, Remap(list[40])); //mouth_dimple_r
smr.SetBlendShapeWeight(27, Remap(list[65])); //mouth_press_l
smr.SetBlendShapeWeight(28, Remap(list[66])); //mouth_press_r
smr.SetBlendShapeWeight(29, Remap(list[36])); //mouth_chinRaise_d
smr.SetBlendShapeWeight(30, Remap(list[37])); //mouth_chinRaise_u
smr.SetBlendShapeWeight(31, Remap(list[56])); //mouth_lipStretch_l
smr.SetBlendShapeWeight(32, Remap(list[57])); //mouth_lipStretch_r
smr.SetBlendShapeWeight(33, Remap(list[78])); //mouth_upperLipRaise_l
smr.SetBlendShapeWeight(34, Remap(list[79])); //mouth_upperLipRaise_r
smr.SetBlendShapeWeight(35, Remap(list[58])); //mouth_lowerLipDepress_l
smr.SetBlendShapeWeight(36, Remap(list[59])); //mouth_lowerLipDepress_r
smr.SetBlendShapeWeight(37, Mathf.Max(Remap(list[76]), Remap(list[77]))); //mouth_suck_ul mouth_suck_ur
smr.SetBlendShapeWeight(38, Mathf.Max(Remap(list[74]), Remap(list[75]))); //mouth_suck_dl mouth_suck_dr
smr.SetBlendShapeWeight(39, Remap(list[35])); //mouth_chew_c
smr.SetBlendShapeWeight(40, Remap(list[34])); //jaw_thrust_c
smr.SetBlendShapeWeight(41, Remap(list[73])); //mouth_stretch_c
smr.SetBlendShapeWeight(42, Remap(list[32])); //jaw_sideways_l
smr.SetBlendShapeWeight(43, Remap(list[33])); //jaw_sideways_r
smr.SetBlendShapeWeight(44, Remap(list[38])); //brow_raise_c
smr.SetBlendShapeWeight(45, Remap(list[22])); //eye_blink2_r
smr.SetBlendShapeWeight(46, Remap(list[21])); //eye_blink2_l
smr.SetBlendShapeWeight(47, Remap(list[0])); //brow_lower_l
smr.SetBlendShapeWeight(48, Remap(list[27])); //brow_lower_r
smr.SetBlendShapeWeight(49, Mathf.Max(Remap(list[31]), Remap(list[29]))); //eye_downLidRaise_r eye_upLidRaise_r
smr.SetBlendShapeWeight(50, Mathf.Max(Remap(list[30]), Remap(list[28]))); //eye_downLidRaise_l eye_upLidRaise_l
yield return new WaitForSeconds(.07f);
}
coroutine = null;
}
private float Remap(float v)
{
return v * 100f;
}
private void OnGUI()
{
GUI.enabled = coroutine == null;
if (GUILayout.Button("Begin", GUILayout.Width(200f), GUILayout.Height(50f)))
{
coroutine = StartCoroutine(ExecuteCoroutine());
}
GUI.enabled = coroutine != null;
if (GUILayout.Button("Stop", GUILayout.Width(200f), GUILayout.Height(50f)))
{
StopCoroutine(coroutine);
coroutine = null;
}
}
}
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
评论(0)