Skip to content

Commit ee6363c

Browse files
jotonedevjcolladospOpenSourceSimoncallumio
authored
Fix and improve TikTok TTS (#1271)
* feat: tiktok sessionId can be specified in the config.toml * feat: tiktok sessionId can be specified in the config.toml * Various improvements and optimizations * Add default argument * Remove an used variable * Code reformatted with black * Fixed all problems pointed out by pylint * Update TTS/TikTok.py * Apply suggestions from code review Co-authored-by: Simon <[email protected]> * chore: add default value for tiktok_voice Co-authored-by: Jose Collado <[email protected]> Co-authored-by: Simon <[email protected]> Co-authored-by: Callum Leslie <[email protected]> Co-authored-by: Callum Leslie <[email protected]>
1 parent f2e8e67 commit ee6363c

File tree

3 files changed

+128
-61
lines changed

3 files changed

+128
-61
lines changed

GUI/settings.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,19 @@
369369
</div>
370370
</div>
371371
</div>
372+
<div class="row mb-2">
373+
<label for="tiktok_sessionid" class="col-4">TikTok SessionId</label>
374+
<div class="col-8">
375+
<div class="input-group">
376+
<div class="input-group-text">
377+
<i class="bi bi-mic-fill"></i>
378+
</div>
379+
<input value="{{ data.tiktok_sessionid }}" name="tiktok_sessionid" type="text" class="form-control"
380+
data-toggle="tooltip"
381+
data-original-title="TikTok sessionid needed for the TTS API request. Check documentation if you don't know how to obtain it.">
382+
</div>
383+
</div>
384+
</div>
372385
<div class="row mb-2">
373386
<label for="python_voice" class="col-4">Python Voice</label>
374387
<div class="col-8">

TTS/TikTok.py

Lines changed: 108 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
1+
# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki
12
import base64
23
import random
4+
import time
5+
from typing import Optional, Final
36

47
import requests
5-
from requests.adapters import HTTPAdapter, Retry
68

79
from utils import settings
810

9-
# from profanity_filter import ProfanityFilter
10-
# pf = ProfanityFilter()
11-
# Code by @JasonLovesDoggo
12-
# https://twitter.com/scanlime/status/1512598559769702406
11+
__all__ = ["TikTok", "TikTokTTSException"]
1312

14-
nonhuman = [ # DISNEY VOICES
13+
disney_voices: Final[tuple] = (
1514
"en_us_ghostface", # Ghost Face
1615
"en_us_chewbacca", # Chewbacca
1716
"en_us_c3po", # C3PO
1817
"en_us_stitch", # Stitch
1918
"en_us_stormtrooper", # Stormtrooper
2019
"en_us_rocket", # Rocket
21-
# ENGLISH VOICES
22-
]
23-
human = [
20+
"en_female_madam_leota", # Madame Leota
21+
"en_male_ghosthost", # Ghost Host
22+
"en_male_pirate", # pirate
23+
)
24+
25+
eng_voices: Final[tuple] = (
2426
"en_au_001", # English AU - Female
2527
"en_au_002", # English AU - Male
2628
"en_uk_001", # English UK - Male 1
@@ -30,23 +32,28 @@
3032
"en_us_006", # English US - Male 1
3133
"en_us_007", # English US - Male 2
3234
"en_us_009", # English US - Male 3
33-
"en_us_010",
34-
]
35-
voices = nonhuman + human
35+
"en_us_010", # English US - Male 4
36+
"en_male_narration", # Narrator
37+
"en_male_funny", # Funny
38+
"en_female_emotional", # Peaceful
39+
"en_male_cody", # Serious
40+
)
3641

37-
noneng = [
42+
non_eng_voices: Final[tuple] = (
43+
# Western European voices
3844
"fr_001", # French - Male 1
3945
"fr_002", # French - Male 2
4046
"de_001", # German - Female
4147
"de_002", # German - Male
4248
"es_002", # Spanish - Male
43-
# AMERICA VOICES
49+
"it_male_m18" # Italian - Male
50+
# South american voices
4451
"es_mx_002", # Spanish MX - Male
4552
"br_001", # Portuguese BR - Female 1
4653
"br_003", # Portuguese BR - Female 2
4754
"br_004", # Portuguese BR - Female 3
4855
"br_005", # Portuguese BR - Male
49-
# ASIA VOICES
56+
# asian voices
5057
"id_001", # Indonesian - Female
5158
"jp_001", # Japanese - Female 1
5259
"jp_003", # Japanese - Female 2
@@ -55,51 +62,97 @@
5562
"kr_002", # Korean - Male 1
5663
"kr_003", # Korean - Female
5764
"kr_004", # Korean - Male 2
58-
]
59-
65+
)
6066

61-
# good_voices = {'good': ['en_us_002', 'en_us_006'],
62-
# 'ok': ['en_au_002', 'en_uk_001']} # less en_us_stormtrooper more less en_us_rocket en_us_ghostface
67+
vocals: Final[tuple] = (
68+
"en_female_f08_salut_damour", # Alto
69+
"en_male_m03_lobby", # Tenor
70+
"en_male_m03_sunshine_soon", # Sunshine Soon
71+
"en_female_f08_warmy_breeze", # Warmy Breeze
72+
"en_female_ht_f08_glorious", # Glorious
73+
"en_male_sing_funny_it_goes_up", # It Goes Up
74+
"en_male_m2_xhxs_m03_silly", # Chipmunk
75+
"en_female_ht_f08_wonderful_world", # Dramatic
76+
)
6377

6478

65-
class TikTok: # TikTok Text-to-Speech Wrapper
79+
class TikTok:
80+
"""TikTok Text-to-Speech Wrapper"""
6681
def __init__(self):
67-
self.URI_BASE = "https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker="
82+
headers = {
83+
"User-Agent": "com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; "
84+
"Build/NRD90M;tt-ok/3.12.13.1)",
85+
"Cookie": f"sessionid={settings.config['settings']['tts']['tiktok_sessionid']}",
86+
}
87+
88+
self.URI_BASE = "https://api16-normal-c-useast1a.tiktokv.com/media/api/text/speech/invoke/"
6889
self.max_chars = 300
69-
self.voices = {"human": human, "nonhuman": nonhuman, "noneng": noneng}
70-
71-
def run(self, text, filepath, random_voice: bool = False):
72-
# if censor:
73-
# req_text = pf.censor(req_text)
74-
# pass
75-
voice = (
76-
self.randomvoice()
77-
if random_voice
78-
else (
79-
settings.config["settings"]["tts"]["tiktok_voice"]
80-
or random.choice(self.voices["human"])
81-
)
82-
)
83-
try:
84-
r = requests.post(
85-
f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0"
86-
)
87-
except requests.exceptions.SSLError:
88-
# https://stackoverflow.com/a/47475019/18516611
89-
session = requests.Session()
90-
retry = Retry(connect=3, backoff_factor=0.5)
91-
adapter = HTTPAdapter(max_retries=retry)
92-
session.mount("http://", adapter)
93-
session.mount("https://", adapter)
94-
r = session.post(
95-
f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0"
96-
)
97-
# print(r.text)
98-
vstr = [r.json()["data"]["v_str"]][0]
99-
b64d = base64.b64decode(vstr)
10090

91+
self._session = requests.Session()
92+
# set the headers to the session, so we don't have to do it for every request
93+
self._session.headers = headers
94+
95+
def run(self, text: str, filepath: str, random_voice: bool = False):
96+
if random_voice:
97+
voice = self.random_voice()
98+
else:
99+
# if tiktok_voice is not set in the config file, then use a random voice
100+
voice = settings.config["settings"]["tts"].get("tiktok_voice", None)
101+
102+
# get the audio from the TikTok API
103+
data = self.get_voices(voice=voice, text=text)
104+
105+
# check if there was an error in the request
106+
status_code = data["status_code"]
107+
if status_code != 0:
108+
raise TikTokTTSException(status_code, data["message"])
109+
110+
# decode data from base64 to binary
111+
raw_voices = data["data"]["v_str"]
112+
decoded_voices = base64.b64decode(raw_voices)
113+
114+
# write voices to specified filepath
101115
with open(filepath, "wb") as out:
102-
out.write(b64d)
116+
out.write(decoded_voices)
117+
118+
def get_voices(self, text: str, voice: Optional[str] = None) -> dict:
119+
"""If voice is not passed, the API will try to use the most fitting voice"""
120+
# sanitize text
121+
text = text.replace("+", "plus").replace("&", "and").replace("r/", "")
122+
123+
# prepare url request
124+
params = {"req_text": text, "speaker_map_type": 0, "aid": 1233}
125+
126+
if voice is not None:
127+
params["text_speaker"] = voice
128+
129+
# send request
130+
try:
131+
response = self._session.post(self.URI_BASE, params=params)
132+
except ConnectionError:
133+
time.sleep(random.randrange(1, 7))
134+
response = self._session.post(self.URI_BASE, params=params)
135+
136+
return response.json()
137+
138+
@staticmethod
139+
def random_voice():
140+
return random.choice(eng_voices)
141+
142+
143+
class TikTokTTSException(Exception):
144+
def __init__(self, code: int, message: str):
145+
self._code = code
146+
self._message = message
147+
148+
def __str__(self) -> str:
149+
if self._code == 1:
150+
return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}"
151+
152+
if self._code == 2:
153+
return f"Code: {self._code}, reason: the text is too long, message: {self._message}"
154+
155+
if self._code == 4:
156+
return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}"
103157

104-
def randomvoice(self):
105-
return random.choice(self.voices["human"])
158+
return f"Code: {self._message}, reason: unknown, message: {self._message}"

utils/.config.template.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,12 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96,
4343
background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" }
4444

4545
[settings.tts]
46-
voice_choice = { optional = false, default = "googletranslate", options = ["streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. This can be left blank and you will be prompted to choose at runtime." }
47-
aws_polly_voice = { optional = true, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
48-
streamlabs_polly_voice = { optional = true, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
49-
tiktok_voice = { optional = true, default = "en_us_006", example = "en_us_006", explanation = "The voice used for TikTok TTS" }
50-
python_voice = { optional = true, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" }
51-
py_voice_num = { optional = true, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" }
46+
voice_choice = { optional = false, default = "tiktok", options = ["streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. This can be left blank and you will be prompted to choose at runtime." }
47+
aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
48+
streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
49+
tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" }
50+
tiktok_sessionid = { optional = true, example = "c76bcc3a7625abcc27b508c7db457ff1", explanation = "TikTok sessionid needed for the TTS API request. Check documentation if you don't know how to obtain it." }
51+
python_voice = { optional = false, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" }
52+
py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" }
5253
silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" }
5354
no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" }

0 commit comments

Comments
 (0)