mirror of https://github.com/avilliai/Manyana
vits将作为内置功能加入更新
This commit is contained in:
parent
57839c37fe
commit
24a1d428f0
|
@ -11,8 +11,8 @@
|
|||
fuckingnumber: 13 #低于13人退群
|
||||
语音功能设置:
|
||||
#建议使用modelscopeTTS或outVits,FishTTS(目前不可用),其他模式需要额外部署。
|
||||
voicegenerate: modelscopeTTS #语音合成模式 modelscopeTTS/outVits/FishTTS(目前已不可用)/vits(需额外部署)/bert_vits2(需额外部署)/so-vits(需额外部署)
|
||||
speaker: "东雪莲" #根据你的合成模式设定角色,请查看 Manyana/config/语音合成可用角色.txt,vits则无需配置
|
||||
voicegenerate: modelscopeTTS #语音合成模式 modelscopeTTS/outVits/FishTTS(目前已不可用)/vits(本地合成)/bert_vits2(需额外部署)/so-vits(需额外部署)
|
||||
speaker: "东雪莲" #根据你的合成模式设定角色,最好启动后@bot 角色 以进行查看。请查看 Manyana/config/语音合成可用角色.txt,vits则无需配置
|
||||
voiceLangType: "<zh>" #默认语音合成语言类型,可选: <zh> <jp> <en> 目前仅在modelscopeTTS和firefly模式下生效,firefly模式下需要与speaker的语言类型匹配
|
||||
prefix: "" #语音合成指令前缀,默认为 xx说 ;如果为 "/" 则指令变为 /xx说 ;用以减少误触发。
|
||||
#下面的两个配置项,除非你自己额外搭了对应的服务,不然就别动。
|
||||
|
|
|
@ -80,32 +80,6 @@ with open('data/chatGLMData.yaml', 'r', encoding='utf-8') as f:
|
|||
chatGLMData = cha
|
||||
|
||||
|
||||
async def tstt(r):
|
||||
data1 = {'speaker': speaker}
|
||||
st8 = re.sub(r"([^)]*)", "", r) # 使用r前缀表示原始字符串,避免转义字符的问题
|
||||
data1["text"] = st8
|
||||
if voicegg == "vits":
|
||||
logger.info("调用vits语音回复")
|
||||
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
if voiceLangType == "<jp>":
|
||||
texts = await translate(str(st8))
|
||||
tex = '[JA]' + texts + '[JA]'
|
||||
else:
|
||||
tex = "[ZH]" + st8 + "[ZH]"
|
||||
logger.info("启动文本转语音:text: " + tex + " path: " + path)
|
||||
# spe = rte.get("defaultModel").get("speaker")
|
||||
with open('config/autoSettings.yaml', 'r', encoding='utf-8') as f:
|
||||
resulte = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
spe = resulte.get("defaultModel").get("speaker")
|
||||
modelSelect = resulte.get("defaultModel").get("modelSelect")
|
||||
await voiceGenerate({"text": tex, "out": path, "speaker": spe, "modelSelect": modelSelect})
|
||||
else:
|
||||
logger.info(f"调用{voicegg}语音合成")
|
||||
path = await superVG(data1, voicegg, berturl, voiceLangType)
|
||||
return path
|
||||
|
||||
|
||||
async def loop_run_in_executor(executor, func, *args):
|
||||
try:
|
||||
r = await executor.run_in_executor(None, func, *args)
|
||||
|
|
|
@ -1,46 +1,21 @@
|
|||
import datetime
|
||||
import os
|
||||
|
||||
from vits import utils
|
||||
import yaml
|
||||
|
||||
|
||||
def modelLoader():
|
||||
global modelDll
|
||||
modelDll = {}
|
||||
|
||||
a = os.listdir('vits/voiceModel')
|
||||
# print(type(a))
|
||||
ind = 0
|
||||
|
||||
global CHOISE
|
||||
CHOISE = {}
|
||||
|
||||
models={}
|
||||
for i in a:
|
||||
# print(i)
|
||||
|
||||
if os.path.isdir('vits/voiceModel/' + i):
|
||||
# 内层循环遍历取出模型文件
|
||||
file = os.listdir('vits/voiceModel/' + i)
|
||||
for ass in file:
|
||||
configPath = 'vits/voiceModel/' + i + '/config.json'
|
||||
with open(configPath, 'r', encoding='utf-8') as file:
|
||||
data = yaml.load(file, Loader=yaml.FullLoader)
|
||||
speakers = data['speakers']
|
||||
text_cleaners = data["data"]['text_cleaners']
|
||||
for ass in os.listdir('vits/voiceModel/' + i):
|
||||
if ass.endswith('.pth'):
|
||||
hps_ms = utils.get_hparams_from_file('vits/voiceModel/' + i + '/config.json')
|
||||
speakers = hps_ms.speakers if 'speakers' in hps_ms.keys() else ['0']
|
||||
muspeakers = {}
|
||||
for id, name in enumerate(speakers):
|
||||
muspeakers[str(id)] = name
|
||||
CHOISE[name] = [str(id),
|
||||
['vits/voiceModel/' + i + '/' + ass, 'vits/voiceModel/' + i + '/config.json']]
|
||||
|
||||
modelDll[str(ind)] = ['vits/voiceModel/' + i + '/' + ass, 'vits/voiceModel/' + i + '/config.json',
|
||||
muspeakers]
|
||||
time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
modelSelect = ['vits/voiceModel/' + i + '/' + ass, 'vits/voiceModel/' + i + '/config.json',
|
||||
muspeakers]
|
||||
|
||||
#print(time + '| 已读取' + 'voiceModel/' + i + '文件夹下的模型文件' + str(muspeakers))
|
||||
ind += 1
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
#print(modelDll)
|
||||
return modelDll, modelSelect, CHOISE
|
||||
modelPath = 'vits/voiceModel/' + i + '/' + ass
|
||||
models[str(speakers)]={"speakers":speakers,'modelPath':modelPath,'configPath':configPath,'text_cleaners':text_cleaners}
|
||||
return models
|
|
@ -1,8 +1,12 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import websockets
|
||||
|
@ -13,7 +17,7 @@ from plugins.toolkits import translate,random_str,random_session_hash
|
|||
try:
|
||||
from plugins.modelsLoader import modelLoader
|
||||
|
||||
models, default, characters = modelLoader() # 读取模型
|
||||
models = modelLoader() # 读取模型
|
||||
from vits import vG
|
||||
except:
|
||||
pass
|
||||
|
@ -81,22 +85,26 @@ async def superVG(data, mode, urls="", langmode="<zh>"):
|
|||
|
||||
text = data['text']
|
||||
out = data["out"]
|
||||
try:
|
||||
speaker = data['speaker']
|
||||
modelSelect = data['modelSelect']
|
||||
except Exception as e:
|
||||
speaker = 2
|
||||
modelSelect = ['vits/voiceModel/nene/1374_epochsm.pth', 'vits/voiceModel/nene/config.json']
|
||||
print(e)
|
||||
# with open('config/settings.yaml', 'r', encoding='utf-8') as f:
|
||||
# result = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
# speaker = result.get("vits").get("speaker")
|
||||
# modelSelect = result.get("vits").get("modelSelect")
|
||||
# 调用 voiceG() 函数
|
||||
if modelSelect[0].endswith("I.pth"):
|
||||
text = text.replace("[JA]", "").replace("[ZH]", "")
|
||||
# print("get")
|
||||
await vG(tex=text, out=out, speakerID=speaker, modelSelect=modelSelect)
|
||||
|
||||
speaker = data['speaker']
|
||||
modelSelect = ['vits/voiceModel/nene/1374_epochsm.pth', 'vits/voiceModel/nene/config.json']
|
||||
speakerId="0"
|
||||
for i in models:
|
||||
if speaker in i:
|
||||
modelSelect = [models[i]["modelPath"], models[i]["configPath"]]
|
||||
speakerId=models[i]['speakers'].index(speaker)
|
||||
if "mixture" in models[i]['text_cleaners'][0]:
|
||||
if langmode == "<zh>":
|
||||
text =f"[ZH]{text}[ZH]"
|
||||
elif langmode == "<en>":
|
||||
text = f"[EN]{text}[EN]"
|
||||
elif langmode == "<jp>":
|
||||
text = await translate(text)
|
||||
text=f"[JA]{text}[JA]"
|
||||
break
|
||||
loop = asyncio.get_event_loop()
|
||||
# 使用线程池在子线程中运行 sync_function
|
||||
await loop.run_in_executor(ThreadPoolExecutor(), vG, text, out, speakerId, modelSelect)
|
||||
print("语音生成完成")
|
||||
return out
|
||||
elif mode == "bert_vits2":
|
||||
|
@ -788,3 +796,5 @@ async def modelscopeTTS(data):
|
|||
with open(p, "wb") as f:
|
||||
f.write(r.content)
|
||||
return p
|
||||
|
||||
#asyncio.run(superVG({"text": "你好,欢迎使用语音合成服务。", "out": "output.wav", "speaker": "綾地寧々"},"vits", urls="", langmode="<zh>"))
|
|
@ -12,7 +12,9 @@ from mirai import FriendMessage, GroupMessage, At,Image
|
|||
from mirai import Voice, Startup
|
||||
from mirai.models import NudgeEvent
|
||||
|
||||
from plugins.aiReplyCore import modelReply, clearAllPrompts, tstt, clearsinglePrompt
|
||||
from plugins.aiReplyCore import modelReply, clearAllPrompts,clearsinglePrompt
|
||||
from plugins.toolkits import random_str
|
||||
from plugins.vitsGenerate import superVG
|
||||
from plugins.wReply.wontRep import wontrep
|
||||
|
||||
|
||||
|
@ -67,7 +69,9 @@ def main(bot, master, logger):
|
|||
maxTextLen = result.get("chatGLM").get("maxLen")
|
||||
voiceRate = result.get("chatGLM").get("voiceRate")
|
||||
withText = result.get("chatGLM").get("withText")
|
||||
|
||||
speaker = result.get("语音功能设置").get("speaker")
|
||||
voicegenerateMode = result.get("语音功能设置").get("voicegenerate")
|
||||
voiceLangType = result.get("语音功能设置").get("voiceLangType")
|
||||
with open('config.json', 'r', encoding='utf-8') as f:
|
||||
data = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
config = data
|
||||
|
@ -157,7 +161,11 @@ def main(bot, master, logger):
|
|||
r= await modelReply("指挥", event.from_id, text)
|
||||
if len(r) < maxTextLen and random.randint(0, 100) < voiceRate and "出错,请重试" not in r:
|
||||
try:
|
||||
voiceP = await tstt(r)
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker)
|
||||
data = {"text": text, "out": path, 'speaker': speaker}
|
||||
voiceP = await superVG(data,mode=voicegenerateMode,urls="",langmode=voiceLangType )
|
||||
await bot.send_group_message(event.subject.id, Voice(path=voiceP))
|
||||
if withText:
|
||||
await bot.send_group_message(event.subject.id, r)
|
||||
|
@ -206,7 +214,11 @@ def main(bot, master, logger):
|
|||
await bot.send(event, "如对话异常请发送 /clear 以清理对话", True)
|
||||
if len(r) < maxTextLen and random.randint(0, 100) < voiceRate and "出错,请重试" not in r:
|
||||
try:
|
||||
voiceP = await tstt(r)
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker)
|
||||
data = {"text": text, "out": path, 'speaker': speaker}
|
||||
voiceP = await superVG(data, mode=voicegenerateMode, urls="", langmode=voiceLangType)
|
||||
await bot.send(event, Voice(path=voiceP))
|
||||
if withText:
|
||||
await bot.send(event, r, True)
|
||||
|
@ -366,7 +378,11 @@ def main(bot, master, logger):
|
|||
chattingUser[user] = datetime.datetime.now()
|
||||
if len(r) < maxTextLen and random.randint(0, 100) < voiceRate and "出错,请重试" not in r:
|
||||
try:
|
||||
voiceP = await tstt(r)
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker)
|
||||
data = {"text": text, "out": path, 'speaker': speaker}
|
||||
voiceP = await superVG(data, mode=voicegenerateMode, urls="", langmode=voiceLangType)
|
||||
await bot.send(event, Voice(path=voiceP))
|
||||
if withText:
|
||||
await bot.send(event, r, True)
|
||||
|
|
|
@ -18,17 +18,7 @@ def main(bot, master, logger, berturl, proxy):
|
|||
transLateData = yaml.load(file, Loader=yaml.FullLoader)
|
||||
prob = result.get("prob")
|
||||
logger.info("读取到apiKey列表")
|
||||
global models
|
||||
global characters
|
||||
try:
|
||||
from plugins.modelsLoader import modelLoader
|
||||
models, default, characters = modelLoader() # 读取模型
|
||||
except:
|
||||
logger.error("缺少本地vits模型,无法使用vits模式(非必要)")
|
||||
logger.warning(
|
||||
"如有需要,请从https://github.com/avilliai/Manyana/releases/download/Manyana/1374_epochsm.pth下载,或在群628763673内获取")
|
||||
logger.warning("下载后,将其放置在vits/voiceModel/nene文件夹下")
|
||||
logger.warning("然后执行更新脚本的 下载vits依赖 选项")
|
||||
|
||||
with open('config/settings.yaml', 'r', encoding='utf-8') as f:
|
||||
result0 = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
speaker92 = result0.get("语音功能设置").get("speaker")
|
||||
|
@ -36,39 +26,6 @@ def main(bot, master, logger, berturl, proxy):
|
|||
nudgeornot = result0.get("chatGLM").get("nudgeReply")
|
||||
logger.info("语音合成模式:" + voicegg + " 语音合成speaker:" + speaker92)
|
||||
|
||||
if voicegg == "vits":
|
||||
with open('config/autoSettings.yaml', 'r', encoding='utf-8') as f:
|
||||
result2 = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
global modelSelect
|
||||
global speaker
|
||||
speaker = result2.get("defaultModel").get("speaker")
|
||||
modelSelect = result2.get("defaultModel").get("modelSelect")
|
||||
|
||||
@bot.on(GroupMessage)
|
||||
async def setDefaultModel(event: GroupMessage):
|
||||
if event.sender.id == master and str(event.message_chain).startswith("设定角色#"):
|
||||
global speaker
|
||||
global modelSelect
|
||||
if str(event.message_chain).split("#")[1] in characters:
|
||||
speaker1 = str(event.message_chain).split("#")[1]
|
||||
logger.info("尝试设定角色:" + speaker1)
|
||||
speaker = int(characters.get(speaker1)[0])
|
||||
modelSelect = characters.get(speaker1)[1]
|
||||
logger.info("设置了语音生成_speaker" + str(speaker))
|
||||
logger.info("设置了语音生成_模型:" + str(modelSelect))
|
||||
with open('config/autoSettings.yaml', 'r', encoding='utf-8') as f:
|
||||
result = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
defaultModel = result.get("defaultModel")
|
||||
defaultModel["speaker"] = speaker
|
||||
defaultModel["modelSelect"] = modelSelect
|
||||
result["defaultModel"] = defaultModel
|
||||
with open('config/autoSettings.yaml', 'w', encoding="utf-8") as file:
|
||||
yaml.dump(result, file, allow_unicode=True)
|
||||
|
||||
await bot.send(event, "成功设置了vits语音生成默认角色为:" + speaker1)
|
||||
else:
|
||||
await bot.send(event, "不存在的vits角色")
|
||||
|
||||
@bot.on(NudgeEvent)
|
||||
async def NudgeReply(event: NudgeEvent):
|
||||
global transLateData
|
||||
|
|
|
@ -5,6 +5,8 @@ import yaml
|
|||
from mirai import GroupMessage, At, Plain,MessageChain
|
||||
from mirai import Voice
|
||||
from mirai.models import ForwardMessageNode, Forward
|
||||
|
||||
from plugins.modelsLoader import modelLoader
|
||||
from plugins.toolkits import translate,random_str
|
||||
from plugins.vitsGenerate import voiceGenerate, superVG, fetch_FishTTS_ModelId, sovits, taffySayTest
|
||||
|
||||
|
@ -76,36 +78,8 @@ def main(bot, master, logger):
|
|||
"满穗",
|
||||
"桑帛"
|
||||
]
|
||||
with open('config/autoSettings.yaml', 'r', encoding='utf-8') as f:
|
||||
result2 = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
global modelSelect
|
||||
global speaker
|
||||
speaker = result2.get("defaultModel").get("speaker")
|
||||
modelSelect = result2.get("defaultModel").get("modelSelect")
|
||||
|
||||
global models
|
||||
global characters
|
||||
try:
|
||||
from plugins.modelsLoader import modelLoader
|
||||
models, default, characters = modelLoader() # 读取模型
|
||||
except Exception as e:
|
||||
characters = {"None": "无可用模型"}
|
||||
|
||||
@bot.on(GroupMessage)
|
||||
async def setDefaultModel(event: GroupMessage):
|
||||
if event.sender.id == master and str(event.message_chain).startswith("设定角色#"):
|
||||
global speaker
|
||||
global modelSelect
|
||||
if str(event.message_chain).split("#")[1] in characters:
|
||||
speaker1 = str(event.message_chain).split("#")[1]
|
||||
logger.info("尝试设定角色:" + speaker1)
|
||||
speaker = int(characters.get(speaker1)[0])
|
||||
modelSelect = characters.get(speaker1)[1]
|
||||
logger.info("设置了语音生成_speaker" + str(speaker))
|
||||
logger.info("设置了语音生成_模型:" + str(modelSelect))
|
||||
|
||||
# modelSelect=['voiceModel/selina/selina.pth','voiceModel/selina/config.json']
|
||||
# print('------\n'+str(CHOISE))
|
||||
models = modelLoader()
|
||||
|
||||
@bot.on(GroupMessage)
|
||||
async def characterSpeake(event: GroupMessage):
|
||||
|
@ -113,14 +87,13 @@ def main(bot, master, logger):
|
|||
|
||||
text = str(event.message_chain)[len(str(event.message_chain).split("说")[0]) + 1:]
|
||||
speaker = str(event.message_chain).split("说")[0].replace(prefix,"")
|
||||
if speaker in characters:
|
||||
text = await translate(text)
|
||||
for i in models:
|
||||
if speaker in i:
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker + str(characters.get(speaker)[1]))
|
||||
data = {"text": "[JA]" + text + "[JA]", "out": path, 'speaker': characters.get(speaker)[0],
|
||||
'modelSelect': characters.get(speaker)[1]}
|
||||
await voiceGenerate(data)
|
||||
logger.info("语音生成_模型:" + speaker)
|
||||
data = {"text": text, "out": path, 'speaker': speaker}
|
||||
await superVG(data,"vits")
|
||||
await bot.send(event, Voice(path=path))
|
||||
return
|
||||
if speaker in modelScope:
|
||||
|
@ -156,19 +129,6 @@ def main(bot, master, logger):
|
|||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
@bot.on(GroupMessage)
|
||||
async def characterSpeake(event: GroupMessage):
|
||||
if "中文" in str(event.message_chain) and str(event.message_chain).split("中文")[0].replace(prefix,"") in characters and str(event.message_chain).startswith(prefix):
|
||||
speaker = str(event.message_chain).split("中文")[0].replace(prefix,"")
|
||||
text = str(event.message_chain).split("中文")[1]
|
||||
|
||||
path = f'data/voices/{random_str()}.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker + str(characters.get(speaker)[1]))
|
||||
data = {"text": "[ZH]" + text + "[ZH]", "out": path, 'speaker': characters.get(speaker)[0],
|
||||
'modelSelect': characters.get(speaker)[1]}
|
||||
await voiceGenerate(data)
|
||||
await bot.send(event, Voice(path=path))
|
||||
|
||||
@bot.on(GroupMessage)
|
||||
async def characterSpeake(event: GroupMessage):
|
||||
|
@ -177,14 +137,15 @@ def main(bot, master, logger):
|
|||
text = str(event.message_chain)[len(str(event.message_chain).split("日文")[0]) + 1:]
|
||||
|
||||
logger.info("语音生成_文本" + text)
|
||||
if speaker in characters:
|
||||
path = f'data/voices/{random_str()}.wav'
|
||||
logger.info("语音生成_模型:" + speaker + str(characters.get(speaker)[1]))
|
||||
data = {"text": f"[JA]{text}[JA]", "out": path, 'speaker': speaker,
|
||||
'modelSelect': characters.get(speaker)[1]}
|
||||
await voiceGenerate(data)
|
||||
await bot.send(event, Voice(path=path))
|
||||
|
||||
for i in models:
|
||||
if speaker in i:
|
||||
path = 'data/voices/' + random_str() + '.wav'
|
||||
logger.info("语音生成_文本" + text)
|
||||
logger.info("语音生成_模型:" + speaker)
|
||||
data = {"text": text, "out": path, 'speaker': speaker}
|
||||
await superVG(data, "vits",urls="",langmode="<jp>")
|
||||
await bot.send(event, Voice(path=path))
|
||||
return
|
||||
try:
|
||||
sp1 = await fetch_FishTTS_ModelId(proxy, FishTTSAuthorization,speaker)
|
||||
if sp1 is None or sp1 == "":
|
||||
|
@ -204,8 +165,8 @@ def main(bot, master, logger):
|
|||
#print(len(str(event.message_chain).replace(str(At(bot.qq)))))
|
||||
try:
|
||||
str1 = "vits可用角色如下:\n"
|
||||
for i in characters:
|
||||
str1 += i + " |"
|
||||
for i in models:
|
||||
str1+=i+" |"
|
||||
except:
|
||||
str1 = ""
|
||||
b1=[]
|
||||
|
|
|
@ -90,7 +90,7 @@ def get_label(text, label):
|
|||
else:
|
||||
return False, text
|
||||
|
||||
async def vG(tex,out,speakerID=2,modelSelect=['vits/voiceModel/nene/1374_epochsm.pth','vits/voiceModel/nene/config.json'] ):
|
||||
def vG(tex,out,speakerID=2,modelSelect=['vits/voiceModel/nene/1374_epochsm.pth','vits/voiceModel/nene/config.json'] ):
|
||||
if len(tex)>150:
|
||||
|
||||
tex='[JA]長すぎるああ、こんなに長い声..... んもう~[JA]'
|
||||
|
@ -166,22 +166,19 @@ async def vG(tex,out,speakerID=2,modelSelect=['vits/voiceModel/nene/1374_epochsm
|
|||
|
||||
|
||||
write(out_path, hps_ms.data.sampling_rate, audio)#将生成的语音文件写入本地
|
||||
await change_sample_rate(out_path)
|
||||
async def change_sample_rate(path,new_sample_rate=44100):
|
||||
#wavfile = path # 提取音频文件名,如“1.wav"
|
||||
# new_file_name = wavfile.split('.')[0] + '_8k.wav' #此行代码可用于对转换后的文件进行重命名(如有需要)
|
||||
|
||||
signal, sr = librosa.load(path, sr=None) # 调用librosa载入音频
|
||||
|
||||
new_signal = librosa.resample(signal, orig_sr=sr, target_sr=new_sample_rate) # 调用librosa进行音频采样率转换
|
||||
|
||||
new_path = path # 指定输出音频的路径,音频文件与原音频同名
|
||||
# new_path = os.path.join(new_dir_path, new_file_name) #若需要改名则启用此行代码
|
||||
#print("?")
|
||||
#print(new_path)
|
||||
|
||||
# librosa.output.write_wav(new_path, new_signal , new_sample_rate) #因版本问题,此方法可能用不了
|
||||
soundfile.write(new_path, new_signal, new_sample_rate)
|
||||
change_sample_rate(out_path)
|
||||
def change_sample_rate(path,new_sample_rate=44100):
|
||||
try:
|
||||
signal, sr = librosa.load(path, sr=None)
|
||||
if sr == new_sample_rate:
|
||||
print("原始采样率与目标采样率相同,无需转换。")
|
||||
return
|
||||
new_signal = librosa.resample(signal, orig_sr=sr, target_sr=new_sample_rate)
|
||||
new_path = path
|
||||
soundfile.write(new_path, new_signal, new_sample_rate)
|
||||
print(f"音频文件已保存为: {new_path},采样率为: {new_sample_rate}")
|
||||
except Exception as e:
|
||||
print(f"处理音频文件时发生错误: {e}")
|
||||
|
||||
|
||||
def voice_conversion(sourcepath,speaker=0):
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
{
|
||||
"train": {
|
||||
"log_interval": 10,
|
||||
"eval_interval": 100,
|
||||
"seed": 1234,
|
||||
"epochs": 10000,
|
||||
"learning_rate": 0.0002,
|
||||
"betas": [
|
||||
0.8,
|
||||
0.99
|
||||
],
|
||||
"eps": 1e-09,
|
||||
"batch_size": 16,
|
||||
"fp16_run": true,
|
||||
"lr_decay": 0.999875,
|
||||
"segment_size": 8192,
|
||||
"init_lr_ratio": 1,
|
||||
"warmup_epochs": 0,
|
||||
"c_mel": 45,
|
||||
"c_kl": 1.0
|
||||
},
|
||||
"data": {
|
||||
"training_files": "final_annotation_train.txt",
|
||||
"validation_files": "final_annotation_val.txt",
|
||||
"text_cleaners": [
|
||||
"zh_ja_mixture_cleaners"
|
||||
],
|
||||
"max_wav_value": 32768.0,
|
||||
"sampling_rate": 22050,
|
||||
"filter_length": 1024,
|
||||
"hop_length": 256,
|
||||
"win_length": 1024,
|
||||
"n_mel_channels": 80,
|
||||
"mel_fmin": 0.0,
|
||||
"mel_fmax": null,
|
||||
"add_blank": true,
|
||||
"n_speakers": 3,
|
||||
"cleaned_text": true
|
||||
},
|
||||
"model": {
|
||||
"inter_channels": 192,
|
||||
"hidden_channels": 192,
|
||||
"filter_channels": 768,
|
||||
"n_heads": 2,
|
||||
"n_layers": 6,
|
||||
"kernel_size": 3,
|
||||
"p_dropout": 0.1,
|
||||
"resblock": "1",
|
||||
"resblock_kernel_sizes": [
|
||||
3,
|
||||
7,
|
||||
11
|
||||
],
|
||||
"resblock_dilation_sizes": [
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
],
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
],
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
]
|
||||
],
|
||||
"upsample_rates": [
|
||||
8,
|
||||
8,
|
||||
2,
|
||||
2
|
||||
],
|
||||
"upsample_initial_channel": 512,
|
||||
"upsample_kernel_sizes": [
|
||||
16,
|
||||
16,
|
||||
4,
|
||||
4
|
||||
],
|
||||
"n_layers_q": 3,
|
||||
"use_spectral_norm": false,
|
||||
"gin_channels": 256
|
||||
},
|
||||
"speakers": ["薄绿"],
|
||||
"symbols": [
|
||||
"_",
|
||||
",",
|
||||
".",
|
||||
"!",
|
||||
"?",
|
||||
"-",
|
||||
"~",
|
||||
"\u2026",
|
||||
"A",
|
||||
"E",
|
||||
"I",
|
||||
"N",
|
||||
"O",
|
||||
"Q",
|
||||
"U",
|
||||
"a",
|
||||
"b",
|
||||
"d",
|
||||
"e",
|
||||
"f",
|
||||
"g",
|
||||
"h",
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"v",
|
||||
"w",
|
||||
"y",
|
||||
"z",
|
||||
"\u0283",
|
||||
"\u02a7",
|
||||
"\u02a6",
|
||||
"\u026f",
|
||||
"\u0279",
|
||||
"\u0259",
|
||||
"\u0265",
|
||||
"\u207c",
|
||||
"\u02b0",
|
||||
"`",
|
||||
"\u2192",
|
||||
"\u2193",
|
||||
"\u2191",
|
||||
" "
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue