<div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto

A simple multi-process version of preprocess.py about vits HOT 1 CLOSED

jaywalnut310 commented on June 30, 2024 1

A simple multi-process version of preprocess.py

from vits.

Comments (1)

nikich340 commented on June 30, 2024

Nice! Though process function doesn't know about args var and it raises error.
Here's edited version:

import argparse
import text
from utils import load_filepaths_and_text
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

def process(inputs):
  i, line = inputs
  cleaned_line = text._clean_text(line, ["my_cleaners2"])  # <- change this!
  return i, cleaned_line

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument("--out_extension", default="cleaned")
  parser.add_argument("--text_index", default=1, type=int)
  parser.add_argument("--filelists", nargs="+", default=["filelists/ljs_audio_text_val_filelist.txt", "filelists/ljs_audio_text_test_filelist.txt"])
  parser.add_argument("--text_cleaners", nargs="+", default=["my_cleaners2"])

  args = parser.parse_args()

  for filelist in args.filelists:
    print("!START: ", filelist)
    filepaths_and_text = load_filepaths_and_text(filelist)
    inputs = [(i, filepaths_and_text[i][args.text_index]) for i in range(len(filepaths_and_text))]
    print(f"!CPU count: {cpu_count()}")
    with Pool(processes=cpu_count()) as pool:
      with tqdm(total=len(inputs)) as pbar:
        for i, line in tqdm(pool.imap_unordered(process, inputs)):
          filepaths_and_text[i][args.text_index] = line
          # print(" >> cleaned: {}".format(line))
          pbar.update()

    new_filelist = filelist + "." + args.out_extension
    with open(new_filelist, "w", encoding="utf-8") as f:
      f.writelines(["|".join(x) + "\n" for x in filepaths_and_text])

Took me ~25 mins for 19k lines.

from vits.

Recommend Projects