Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ obj/
# 打包产物
*.tlx
/build/
/tools/CheckModel
8 changes: 4 additions & 4 deletions DiffSingerDeclarations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,11 @@ public static ObjectConfig BuildNoteConfig(VoicebankConfig config, INoteProperty
DefaultOption = PropertyValue.Create(defaultValue),
};

static List<ComboBoxOption> ToOptions(IReadOnlyList<string> values)
static List<ComboBoxOption> LanguageOptions(IReadOnlyList<string> languages)
{
var options = new List<ComboBoxOption>(values.Count);
foreach (var value in values)
options.Add(value); // 隐式转换:string → ComboBoxOption(值即显示文本)
var options = new List<ComboBoxOption> { new(PropertyValue.Create(string.Empty), "default") };
foreach (var lang in languages)
options.Add(lang);
return options;
}

Expand Down
4 changes: 4 additions & 0 deletions DiffSingerForTuneLab.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
{
"name": "TuneLab (参考:SDK/docs/范例)",
"path": "../TuneLab"
},
{
"name": "OpenUtau-lunai (参考:OpenUtau.Core)",
"path": "../OpenUtau-lunai"
}
],
"settings": {
Expand Down
3 changes: 3 additions & 0 deletions DiffSingerModels.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ public sealed class VoiceModels : IDisposable
readonly Dictionary<string, DiffSingerPredictor?> mPredictors = new(StringComparer.Ordinal);
readonly object mPredictorLock = new();

readonly object mAcousticLock = new();
readonly object mVocoderLock = new();

public InferenceSession Acoustic { get; }
public InferenceSession Vocoder { get; }

Expand Down
75 changes: 71 additions & 4 deletions DiffSingerPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,35 @@ public static List<PhonemeSpan> Phonemize(
string[] symbols = GetSymbols(dur, note, noteLang[i], out pinned[i]);
noteSymbolCount[i] = symbols.Length;

// 连音符:不产生新音素,只延展前音的时长(通过 group 自然吸收)
// 连音符:- 不产生边界(前组自然吸收),+ 拆前组末音素到独立组强制 dur 边界
if (symbols.Length == 0 && (lyric == "-" || lyric == "+"))
{
if (lyric == "+")
{
// 把前一个非空组的最后一个音素拆分到 + 组(仅当 >1 音素,否则退化为空组边界)
string moved = "AP";
bool splitted = false;
for (int gi = groups.Count - 1; gi >= 0; gi--)
{
if (groups[gi].Phonemes.Count > 1)
{
int lastIdx = groups[gi].Phonemes.Count - 1;
moved = groups[gi].Phonemes[lastIdx];
groups[gi].Phonemes.RemoveAt(lastIdx);
splitted = true;
break;
}
}
var g = new Group(note.StartTime, note.Pitch);
if (splitted) g.Phonemes.Add(moved);
groups.Add(g);
}
// - 则完全不做任何事,前组自然吸收时长,不影响 dur
notePhIndex.Add(notePhIndex[^1]);
continue;
}

var wordGroups = ProcessWord(dur, note, symbols);
groups[^1].Phonemes.AddRange(wordGroups[0].Phonemes); // 前置辅音并入前一组(侵入前一 note 尾)
groups.AddRange(wordGroups.Skip(1)); // 韵核组(起点=note 起点)
Expand Down Expand Up @@ -169,17 +198,56 @@ static List<Group> ProcessWord(DiffSingerPredictor dur, SynthesisNoteSnapshot no
return wordGroups;
}

// 取音素符号串:钉死=用 note.Phonemes 符号;否则 G2P。过滤到「类型已定义 且 dur 表可 tokenize」;空则 [SP]。
// 取音素符号串:钉死/编辑器→用已有 phonemes 或 _phonemes 属性;连音符→空(slur 延展前音素);否则 G2P。
// 空结果且非连音符→ [SP] 兜底。
static string[] GetSymbols(DiffSingerPredictor dur, SynthesisNoteSnapshot note, string lang, out bool pinned)
{
string lyric = note.Lyric ?? string.Empty;
if (lyric == "-" || lyric == "+")
{
pinned = false;
return Array.Empty<string>();
}

// 优先使用 _phonemes 属性(音素编辑器写入)
var phonemesProp = note.Properties.GetString("_phonemes", "");
if (!string.IsNullOrEmpty(phonemesProp) && phonemesProp != "[]")
{
pinned = true;
return ParsePhonemesProperty(phonemesProp).Where(s => !string.IsNullOrEmpty(s) && dur.TryPhoneme(s, out _)).ToArray();
}

// 其次使用钉死音素
pinned = note.Phonemes.Count > 0;
IEnumerable<string> raw = pinned
? note.Phonemes.Select(p => p.Symbol)
: dur.G2P(note.Lyric ?? string.Empty, lang);
: dur.G2P(lyric, lang);
var symbols = raw.Where(s => dur.IsKnownSymbol(s) && dur.TryPhoneme(s, out _)).ToArray();
return symbols.Length > 0 ? symbols : new[] { Pause };
}

// 从 JSON 字符串解析音素符号列表:[{"s":"ja/b","v":false},...]
static string[] ParsePhonemesProperty(string json)
{
var result = new List<string>();
if (string.IsNullOrEmpty(json) || json.Length < 2) return result.ToArray();
try
{
int i = 0;
while (true)
{
int sIdx = json.IndexOf("\"s\":\"", i);
if (sIdx < 0) break;
sIdx += 5;
int eIdx = json.IndexOf('"', sIdx);
if (eIdx > sIdx) result.Add(json.Substring(sIdx, eIdx - sIdx));
i = eIdx + 1;
}
}
catch { }
return result.ToArray();
}

// OpenUtau stretch:source[from..from+count) 的帧时长按 ratio 缩放、终点对齐 endPos,返回各音素起点秒。
static IEnumerable<double> Stretch(IReadOnlyList<double> source, int from, int count, double ratio, double endPos)
{
Expand Down Expand Up @@ -235,8 +303,7 @@ static double[] RunDur(DiffSingerPredictor dur, long[] tokens, long[] langs,
var spk = new float[nTokens * hidden];
for (int i = 0; i < nTokens; i++) Array.Copy(emb, 0, spk, i * hidden, hidden);

var durModel = dur.Model("dur");
var durInputs = new List<NamedOnnxValue>
using var durOut = dur.RunModel("dur", new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("encoder_out", encDense),
NamedOnnxValue.CreateFromTensor("x_masks", maskDense),
Expand Down
1 change: 1 addition & 0 deletions DiffSingerPitch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ public static class DiffSingerPitch
// 延音符("-"/"+"):无自身音素,沿用前一个 note 的 rest 状态(前为发声 ⇒ 本帧也发声、携自身 MIDI 滑过去)。
if (DiffSingerPhonemizer.IsSlur(note.Lyric))
{
// slur 延音符继承前音 rest 状态,确保 pitch 模型为其生成正确过渡音高
restList.Add(restList[^1]);
}
else
Expand Down
121 changes: 115 additions & 6 deletions DiffSingerPredictor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public sealed class DiffSingerPredictor : IDisposable
readonly Dictionary<string, Dictionary<string, string[]>> mEntryCache = new(StringComparer.Ordinal);
readonly Dictionary<string, string> mSymbolTypes = new(StringComparer.Ordinal); // symbol → type(合并 dsdict)
readonly object mLock = new();
// 推理锁:DirectML EP 的 InferenceSession.Run() 非线程安全,串行化所有 Run 调用。
readonly object mRunLock = new();

public InferenceSession Linguistic { get; }
public int HiddenSize => mHidden;
Expand Down Expand Up @@ -86,13 +88,21 @@ public int PhonemeToken(string symbol)
: throw new InvalidOperationException($"音素 \"{symbol}\" 不在 {Path.GetFileName(mDir)} 的音素表中");
public long LangId(string lang) => mLanguages.TryGetValue(lang, out var id) ? id : 0;

// —— G2P:按语言查 dsdict-{lang}.yaml 词条(grapheme→带前缀音素),exact 后小写回退 ——
// —— G2P:优先查语言特定词典(dsdict-{lang}.yaml),避免默认底库(dsdict.yaml 以 zh 为主)污染;再试 replacements;最后才兜底查合并词典。 ——
public string[] G2P(string lyric, string lang)
{
var entries = GetEntries(lang);
var key = lyric.Trim();
if (entries.TryGetValue(key, out var phs)) return phs;
if (entries.TryGetValue(key.ToLowerInvariant(), out phs)) return phs;
// 1. 语言特定词典(不含默认底库)
var langEntries = GetLanguageSpecificEntries(lang);
if (langEntries.TryGetValue(key, out var phs)) return phs;
if (langEntries.TryGetValue(key.ToLowerInvariant(), out phs)) return phs;
// 2. 替换规则(en/ko 等无 entries 的语种)
var replaced = ApplyReplacements(lyric, lang);
if (replaced.Length > 0) return replaced;
// 3. 最后才查合并词典(含默认底库 dsdict.yaml,作为未知字素的最终兜底)
var allEntries = GetEntries(lang);
if (allEntries.TryGetValue(key, out phs)) return phs;
if (allEntries.TryGetValue(key.ToLowerInvariant(), out phs)) return phs;
return Array.Empty<string>();
}

Expand All @@ -119,7 +129,78 @@ public float[] GetEmbedding(string acousticSpeaker)
}
}

// —— 替换规则(用于 EN/KO 等无 entries 仅 replacements 的语种)——
readonly Dictionary<string, List<(string from, string to)>> mReplacements = new(StringComparer.Ordinal);

void LoadReplacements(string lang)
{
if (mReplacements.ContainsKey(lang)) return;
var list = new List<(string from, string to)>();
foreach (var file in new[] { $"dsdict-{lang}.yaml", $"dsdict-zh-{lang}.yaml", "dsdict.yaml" })
{
var path = Path.Combine(mDir, file);
if (!File.Exists(path)) continue;
try
{
var yaml = new DeserializerBuilder().Build();
var doc = yaml.Deserialize<Dictionary<string, object?>>(File.ReadAllText(path));
if (doc != null && doc.TryGetValue("replacements", out var reps) && reps is List<object?> repList)
{
foreach (var r in repList)
{
if (r is Dictionary<object, object?> repDict)
{
string? from = repDict.TryGetValue("from", out var fv) ? fv?.ToString() : null;
string? to = repDict.TryGetValue("to", out var tv) ? tv?.ToString() : null;
if (!string.IsNullOrEmpty(from) && !string.IsNullOrEmpty(to))
list.Add((from, to));
}
}
}
}
catch { }
}
mReplacements[lang] = list;
}

// 用替换规则将歌词转为音素(按最长匹配优先)
public string[] ApplyReplacements(string lyric, string lang)
{
LoadReplacements(lang);
if (!mReplacements.TryGetValue(lang, out var reps) || reps.Count == 0)
return Array.Empty<string>();

var repsSorted = reps.OrderByDescending(r => r.from.Length).ToList();
var result = new List<string>();
string text = lyric.ToLowerInvariant();
int pos = 0;
while (pos < text.Length)
{
bool matched = false;
foreach (var (from, to) in repsSorted)
{
if (pos + from.Length <= text.Length && text.Substring(pos, from.Length) == from)
{
result.Add(to);
pos += from.Length;
matched = true;
break;
}
}
if (!matched)
{
// 单个字符作为独立音素
string ch = text[pos].ToString();
result.Add(ch);
pos++;
}
}
return result.ToArray();
}

// —— 词典加载 ——
// 策略:先加载 dsdict.yaml 作为默认底库,再叠加载入语种特定文件(后面覆盖前面)。
// 若 entries 为空且 replacements 存在,留空返回(上层调用 ApplyReplacements)。
Dictionary<string, string[]> GetEntries(string lang)
{
lock (mLock)
Expand All @@ -128,21 +209,49 @@ Dictionary<string, string[]> GetEntries(string lang)
return cached;

var map = new Dictionary<string, string[]>(StringComparer.Ordinal);
foreach (var file in new[] { $"dsdict-{lang}.yaml", $"dsdict-zh-{lang}.yaml", "dsdict.yaml" })

// 1. 加载默认底库 dsdict.yaml(总是存在)
var defaultPath = Path.Combine(mDir, "dsdict.yaml");
if (File.Exists(defaultPath))
{
var root = DeserializeDsDict(defaultPath);
foreach (var e in root.entries)
if (!string.IsNullOrEmpty(e.grapheme))
map[e.grapheme] = e.phonemes.ToArray();
}

// 2. 叠加载入语种特定文件(若存在则覆盖/补充)
foreach (var file in new[] { $"dsdict-{lang}.yaml", $"dsdict-zh-{lang}.yaml" })
{
var path = Path.Combine(mDir, file);
if (!File.Exists(path)) continue;
var root = DeserializeDsDict(path);
foreach (var e in root.entries)
if (!string.IsNullOrEmpty(e.grapheme))
map[e.grapheme] = e.phonemes.ToArray();
break;
}

mEntryCache[lang] = map;
return map;
}
}

// 仅加载语言特定词典(不含默认底库 dsdict.yaml),用于 G2P 的优先查表——避免 zh 底库污染其他语言的译音。
Dictionary<string, string[]> GetLanguageSpecificEntries(string lang)
{
var map = new Dictionary<string, string[]>(StringComparer.Ordinal);
foreach (var file in new[] { $"dsdict-{lang}.yaml", $"dsdict-zh-{lang}.yaml" })
{
var path = Path.Combine(mDir, file);
if (!File.Exists(path)) continue;
var root = DeserializeDsDict(path);
foreach (var e in root.entries)
if (!string.IsNullOrEmpty(e.grapheme))
map[e.grapheme] = e.phonemes.ToArray();
}
return map;
}

void LoadSymbolTypes(string dsdictPath)
{
if (!File.Exists(dsdictPath)) return;
Expand Down
Loading