(二十二) 文本转语音、TTS、长文本、Edge-TTS

大家好，我是讯享网，很高兴认识大家。

(二十二) 文本转语音、TTS、长文本、Edge-TTS

本文代码使用了Edge-TTS来进行文本转语音的操作，可以存储为mp3或wav文件。文本不限长度。
调用的是云端Edge-TTS接口，本人只是做了简单封装、编了个UI而已。
可直接运行的文件可在百度网盘下载：
https://pan.baidu.com/s/1ntMnDWFvnS7tLUd9jku8Ew?pwd=hims

在这里插入图片描述
讯享网
代码如下：

#文本转语音工具V1.0 import asyncio import traceback # import librosa import edge_tts import os, sys, time import cv2 import yaml import hbt_funcs as hbt from playsound import playsound from PyQt5 import QtWidgets from PyQt5.QtWidgets import QWidget, QMessageBox, QFileDialog, QApplication, QSlider from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal, pyqtSlot from txt2audio_UI import Ui_txt2voice asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) hbt.is_contains_chinese(os.getcwd()) #检查是否有中文路径 rates = '+0%' run_flag = 0 stop_flag = False bar = 0 voices_list = ['XiaoxiaoNeural', 'XiaoyiNeural', 'YunxiaNeural', 'liaoning-XiaobeiNeural', 'shaanxi-XiaoniNeural', 'YunjianNeural', 'YunxiNeural', 'YunyangNeural'] my_title = "iCANX文字转语音工具" settings_file = "settings.yaml" #读取配置文件icanx_settings.yaml settings = { 
   } if os.path.exists(settings_file): with open(settings_file, 'r') as f: settings = yaml.safe_load(f) if settings == None: settings = { 
   } # Happens if the file is empty. out_dir = settings.get('out_dir', os.getcwd()) voices_select = settings.get('voices_select', 0) mp3_wav = settings.get('mp3_wav', 0) voices = 'zh-CN-' + voices_list[voices_select] # from subprocess import run, PIPE, STDOUT # def get_media_length(file_path): # cmdline = f'ffprobe -i "{file_path}" -show_entries format=duration -v quiet -of csv="p=0"' # # print(cmdline) # result = run(cmdline, stdout=PIPE, stderr=STDOUT) # try: lenth = int(float(result.stdout.decode('utf-8').strip())) # except: lenth = 0; print('ffprobe检测长度发生错误...') # return lenth from mutagen.mp3 import MP3 def get_media_length(file_path): audio = MP3(file_path) length = audio.info.length return length class EdgeTTSTrans(QThread): sinout = pyqtSignal(str) def __init__(self, winshot, texts, filename): super(EdgeTTSTrans, self).__init__() self.main_win = winshot self.rates = rates self.texts = texts self.filename = filename + '.mp3' def run(self): try: asyncio.run(self.edge_tts_trans(self.texts)) self.sinout.emit('OK') except: self.sinout.emit('ERROR') async def edge_tts_trans(self, text): communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices) await communicate.save(self.filename) class PlayAudioWav(QThread): def __init__(self, winshot, texts): super(PlayAudioWav, self).__init__() self.winshot = winshot self.rates = rates self.texts = texts if os.path.exists("temp.mp3"): os.remove("temp.mp3") def run(self): asyncio.run(self.edge_tts_trans(self.texts)) try: playsound("temp.mp3") except: traceback.print_exc() if os.path.exists("temp.mp3"): os.remove("temp.mp3") self.winshot.try_lisson.setEnabled(True) async def edge_tts_trans(self, text): self.communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices) try: # loop = asyncio.get_event_loop() await self.communicate.save('temp.mp3') # loop.run_until_complete(communicate.save('temp.mp3')) except: print('Error in Async...;'); traceback.print_exc() class Winshot(QWidget, Ui_txt2voice): def __init__(self): super(Winshot, self).__init__() self.start_time = 0 self.voice_len = 0 self.setupUi(self) global hwnd, run_flag self.createLayout() self.setWindowTitle(my_title) self.setWindowIcon(hbt.GetIco('ican')) self.setFixedSize(self.size()) self.setWindowFlags(Qt.WindowMinimizeButtonHint) self.my_timer = QTimer(self) self.show(); run_flag = 1 self.text_len = 0 def show_error(self,str): r_button = QMessageBox.question(self, my_title,'\n\n'+str+'\n\n', QMessageBox.Ok) def set_False_Btn(self): self.outButton.setEnabled(False) self.startButton.setEnabled(False) self.quitButton.setEnabled(False) self.out_path.setEnabled(False) def set_True_Btn(self): self.outButton.setEnabled(True) self.startButton.setEnabled(True) self.quitButton.setEnabled(True) self.out_path.setEnabled(True) def start_run(self): global stop_flag self.save_yaml() stop_flag = False; self.set_False_Btn() self.start_time = time.time() text = self.textEdit.toPlainText() self.text_len = len(text) if text == "" : self.show_error('文本框里的文字不能为空... '); self.set_True_Btn() stop_flag = 1; return # print('待转化的文字：', text) self.filename = out_dir + '/' + time.strftime("%Y_%m_%d_%H.%M.%S") # print(self.filename) self.my_thread = EdgeTTSTrans(self, text, self.filename) self.my_thread.sinout.connect(self.signal_coming) self.my_thread.start() self.my_timer.start(500) self.my_timer.timeout.connect(self.running) # def EdgeTTSTrans(self, text): # async def edge_tts_trans(): # communicate = edge_tts.Communicate(text=text, rate=rates, voice=voices) # await communicate.save('audio.mp3') # asyncio.run(edge_tts_trans()) def signal_coming(self, str): global stop_flag # if os.path.exists("audio.mp3"): os.remove("audio.mp3") if str == 'OK': # self.voice_len = librosa.get_duration(filename=self.filename +'.mp3') self.voice_len = get_media_length(self.filename +'.mp3') total_time = time.time() - self.start_time run_stat_text = f"统计信息：文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)" self.run_state.setText(run_stat_text) self.my_timer.stop(); self.progressBar.setValue(100) cv2.waitKey(10) r_button = QMessageBox.question(self, my_title, "\n\n\n完成本次文字转语音换过程...\n\n需要播放吗？\n\n\n", QMessageBox.Yes | QMessageBox.No) if r_button == QMessageBox.Yes: try: os.startfile(self.filename +'.mp3') except: print("无法播放文件......") if mp3_wav == 1: os.system(f"sysenv\\ffmpeg -i {self.filename}.mp3 {self.filename}.wav") else: self.show_error('转换过程中发生错误...\n可能原因：\n文件或目录不能包含中文...\n网络不通...\n网络不能使用代理...') self.set_True_Btn() self.progressBar.setValue(0) def running(self): global bar bar += 2 total_time = time.time() - self.start_time self.progressBar.setValue(bar) if bar >= 100: bar = 0 run_stat_text = f"统计信息：文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)" self.run_state.setText(run_stat_text) def helpWin(self): str="\n\n\n 本软件著作权归属：XXX 网址：www.xxx.com \n\n\n" QMessageBox.question(self, my_title, str, QMessageBox.Ok) def quitWin(self): r_button = QMessageBox.question(self, "my_title", "\n\n\n退出将终止本程序......\n\n确认退出吗？\n\n\n", QMessageBox.Yes | QMessageBox.No) self.save_yaml() if r_button == QMessageBox.Yes: sys.exit() def outButton_fuc(self): global out_dir tmp_path = out_dir out_dir = QFileDialog.getExistingDirectory(self,'选择转换后的输出文件夹', out_dir) if out_dir == '': out_dir = tmp_path self.out_path.setText(out_dir) # print('选择的保存目录：', out_dir) def open_fold_fuc(self): try: os.startfile(out_dir) except:pass def rates_slider_fuc(self): global rates self.audio_rates.setText(f'{self.rates_slider.value()}%') _rates = self.rates_slider.value() if _rates >= 0: rates = f'+{_rates}%' else: rates = f'{_rates}%' def click_audio_select(self, str1): global voices, voices_select voices_select = self.audio_select.currentIndex() voices = 'zh-CN-' + voices_list[voices_select] print('选择的声音：', voices) def click_try_lisson(self, str1): self.try_lisson.setEnabled(False) text = "感谢您选择我的声音" self.play_thread = PlayAudioWav(self, text) #启动Play线程 self.play_thread.start() def click_checkBox_mp3(self): global mp3_wav mp3_wav = 0 def click_checkBox_wav(self): global mp3_wav mp3_wav = 1 def click_textEdit(self): txt_len = len(self.textEdit.toPlainText()) print(self.textEdit.toPlainText()) self.run_state.setText(f"统计信息：文本长度({txt_len}字)") def save_yaml(self): settings = { 
   'out_dir': out_dir, 'voices_select': voices_select, 'mp3_wav': mp3_wav} with open(settings_file, 'w+') as f: yaml.dump(settings, f) def createLayout(self): self.out_path.setText(out_dir) if mp3_wav == 0: self.checkBox_mp3.setChecked(True) else: self.checkBox_wav.setChecked(True) self.checkBox_mp3.stateChanged.connect(self.click_checkBox_mp3) self.checkBox_wav.stateChanged.connect(self.click_checkBox_wav) self.outButton.clicked.connect(self.outButton_fuc) self.chk_outputfile.clicked.connect(self.open_fold_fuc) self.try_lisson.clicked.connect(self.click_try_lisson) self.textEdit.textChanged.connect(self.click_textEdit) self.textEdit.setPlainText("本软件使用微软Edge-TTS，快速把文字转换成语音。") self.startButton.clicked.connect(self.start_run) self.helpButton.clicked.connect(self.helpWin) self.quitButton.clicked.connect(self.quitWin) self.rates_slider.setTickPosition(QSlider.TicksAbove) self.rates_slider.valueChanged.connect(self.rates_slider_fuc) self.audio_select.addItems(['晓晓:女', '晓依:女', '云霞:女', '东北:女', '陕西:女', '云剑:男', '云溪:男','云阳:男']) self.audio_select.setCurrentIndex(voices_select) self.audio_select.activated[str].connect(self.click_audio_select) #if __name__ == '__main__': QApplication.setAttribute(Qt.AA_EnableHighDpiScaling) app = QtWidgets.QApplication(sys.argv) winshot = Winshot() sys.exit(app.exec_())

讯享网

UI代码如下：

讯享网# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'txt2audio_UI.ui' # # Created by: PyQt5 UI code generator 5.15.2 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. from PyQt5 import QtCore, QtGui, QtWidgets class Ui_txt2voice(object): def setupUi(self, txt2voice): txt2voice.setObjectName("txt2voice") txt2voice.resize(435, 431) self.startButton = QtWidgets.QPushButton(txt2voice) self.startButton.setGeometry(QtCore.QRect(160, 371, 91, 23)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.startButton.setFont(font) self.startButton.setObjectName("startButton") self.helpButton = QtWidgets.QPushButton(txt2voice) self.helpButton.setGeometry(QtCore.QRect(270, 371, 61, 23)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.helpButton.setFont(font) self.helpButton.setObjectName("helpButton") self.quitButton = QtWidgets.QPushButton(txt2voice) self.quitButton.setGeometry(QtCore.QRect(350, 371, 61, 23)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.quitButton.setFont(font) self.quitButton.setObjectName("quitButton") self.textEdit = QtWidgets.QPlainTextEdit(txt2voice) self.textEdit.setGeometry(QtCore.QRect(20, 30, 391, 175)) self.textEdit.setObjectName("textEdit") self.chk_outputfile = QtWidgets.QPushButton(txt2voice) self.chk_outputfile.setGeometry(QtCore.QRect(20, 371, 61, 23)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.chk_outputfile.setFont(font) self.chk_outputfile.setObjectName("chk_outputfile") self.outButton = QtWidgets.QPushButton(txt2voice) self.outButton.setGeometry(QtCore.QRect(20, 280, 61, 21)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.outButton.setFont(font) self.outButton.setObjectName("outButton") self.out_path = QtWidgets.QLabel(txt2voice) self.out_path.setGeometry(QtCore.QRect(90, 280, 311, 20)) self.out_path.setObjectName("out_path") self.lbl_3 = QtWidgets.QLabel(txt2voice) self.lbl_3.setGeometry(QtCore.QRect(26, 222, 51, 16)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.lbl_3.setFont(font) self.lbl_3.setObjectName("lbl_3") self.audio_select = QtWidgets.QComboBox(txt2voice) self.audio_select.setGeometry(QtCore.QRect(86, 221, 71, 18)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.audio_select.setFont(font) self.audio_select.setObjectName("audio_select") self.rates_slider = QtWidgets.QSlider(txt2voice) self.rates_slider.setGeometry(QtCore.QRect(271, 219, 111, 20)) self.rates_slider.setMinimum(-99) self.rates_slider.setTracking(True) self.rates_slider.setOrientation(QtCore.Qt.Horizontal) self.rates_slider.setInvertedAppearance(False) self.rates_slider.setInvertedControls(False) self.rates_slider.setObjectName("rates_slider") self.aud = QtWidgets.QLabel(txt2voice) self.aud.setGeometry(QtCore.QRect(211, 220, 61, 20)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.aud.setFont(font) self.aud.setObjectName("aud") self.lbl_4 = QtWidgets.QLabel(txt2voice) self.lbl_4.setGeometry(QtCore.QRect(27, 336, 61, 16)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.lbl_4.setFont(font) self.lbl_4.setObjectName("lbl_4") self.progressBar = QtWidgets.QProgressBar(txt2voice) self.progressBar.setGeometry(QtCore.QRect(87, 340, 321, 8)) self.progressBar.setProperty("value", 0) self.progressBar.setTextVisible(False) self.progressBar.setInvertedAppearance(False) self.progressBar.setObjectName("progressBar") self.try_lisson = QtWidgets.QPushButton(txt2voice) self.try_lisson.setGeometry(QtCore.QRect(160, 220, 31, 21)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.try_lisson.setFont(font) self.try_lisson.setObjectName("try_lisson") self.line = QtWidgets.QFrame(txt2voice) self.line.setGeometry(QtCore.QRect(0, 401, 441, 16)) self.line.setFrameShape(QtWidgets.QFrame.HLine) self.line.setFrameShadow(QtWidgets.QFrame.Sunken) self.line.setObjectName("line") self.run_state = QtWidgets.QLabel(txt2voice) self.run_state.setGeometry(QtCore.QRect(10, 410, 381, 20)) self.run_state.setObjectName("run_state") self.audio_file_path_txt_2 = QtWidgets.QLabel(txt2voice) self.audio_file_path_txt_2.setGeometry(QtCore.QRect(23, 10, 241, 16)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.audio_file_path_txt_2.setFont(font) self.audio_file_path_txt_2.setObjectName("audio_file_path_txt_2") self.lbl_5 = QtWidgets.QLabel(txt2voice) self.lbl_5.setGeometry(QtCore.QRect(26, 252, 61, 16)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.lbl_5.setFont(font) self.lbl_5.setObjectName("lbl_5") self.checkBox_mp3 = QtWidgets.QCheckBox(txt2voice) self.checkBox_mp3.setGeometry(QtCore.QRect(90, 252, 41, 16)) self.checkBox_mp3.setObjectName("checkBox_mp3") self.buttonGroup = QtWidgets.QButtonGroup(txt2voice) self.buttonGroup.setObjectName("buttonGroup") self.buttonGroup.addButton(self.checkBox_mp3) self.checkBox_wav = QtWidgets.QCheckBox(txt2voice) self.checkBox_wav.setGeometry(QtCore.QRect(140, 252, 68, 16)) self.checkBox_wav.setObjectName("checkBox_wav") self.buttonGroup.addButton(self.checkBox_wav) self.line_2 = QtWidgets.QFrame(txt2voice) self.line_2.setGeometry(QtCore.QRect(0, 310, 441, 16)) self.line_2.setFrameShape(QtWidgets.QFrame.HLine) self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken) self.line_2.setObjectName("line_2") self.audio_rates = QtWidgets.QLabel(txt2voice) self.audio_rates.setGeometry(QtCore.QRect(388, 220, 31, 20)) font = QtGui.QFont() font.setFamily("宋体") font.setPointSize(9) self.audio_rates.setFont(font) self.audio_rates.setObjectName("audio_rates") self.retranslateUi(txt2voice) QtCore.QMetaObject.connectSlotsByName(txt2voice) def retranslateUi(self, txt2voice): _translate = QtCore.QCoreApplication.translate txt2voice.setWindowTitle(_translate("txt2voice", "AI")) self.startButton.setText(_translate("txt2voice", "开始转换")) self.helpButton.setText(_translate("txt2voice", "帮助")) self.quitButton.setText(_translate("txt2voice", "退出")) self.chk_outputfile.setText(_translate("txt2voice", "查看结果")) self.outButton.setText(_translate("txt2voice", "输出目录")) self.out_path.setText(_translate("txt2voice", "生成完成的视频输出目录")) self.lbl_3.setText(_translate("txt2voice", "语音选择：")) self.aud.setText(_translate("txt2voice", "语速选择：")) self.lbl_4.setText(_translate("txt2voice", "转换进度：")) self.try_lisson.setText(_translate("txt2voice", "试听")) self.run_state.setText(_translate("txt2voice", "统计信息：")) self.audio_file_path_txt_2.setText(_translate("txt2voice", "请输入文本：")) self.lbl_5.setText(_translate("txt2voice", "输出格式：")) self.checkBox_mp3.setText(_translate("txt2voice", "MP3")) self.checkBox_wav.setText(_translate("txt2voice", "WAV")) self.audio_rates.setText(_translate("txt2voice", "0%"))

(二十二) 文本转语音、TTS、长文本、Edge-TTS

(二十二) 文本转语音、TTS、长文本、Edge-TTS

相关推荐