mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-04-04 19:58:58 +08:00
222 lines
7.0 KiB
Python
222 lines
7.0 KiB
Python
# Copyright (c) 2025 SparkAudio
|
|
# 2025 Xinsheng Wang (w.xinshawn@gmail.com)
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Description:
|
|
This script contains a collection of functions designed to handle various
|
|
file reading and writing operations. It provides utilities to read from files,
|
|
write data to files, and perform file manipulation tasks.
|
|
"""
|
|
|
|
|
|
import os
|
|
import json
|
|
import json
|
|
import csv
|
|
|
|
from tqdm import tqdm
|
|
from typing import List, Dict, Any, Set, Union
|
|
from pathlib import Path
|
|
from omegaconf import OmegaConf, DictConfig
|
|
|
|
|
|
def resolve_symbolic_link(symbolic_link_path: Path) -> Path:
|
|
"""
|
|
Resolves the absolute path of a symbolic link.
|
|
|
|
Args:
|
|
symbolic_link_path (Path): The path to the symbolic link.
|
|
|
|
Returns:
|
|
Path: The absolute path that the symbolic link points to.
|
|
"""
|
|
|
|
link_directory = os.path.dirname(symbolic_link_path)
|
|
target_path_relative = os.readlink(symbolic_link_path)
|
|
return os.path.join(link_directory, target_path_relative)
|
|
|
|
|
|
def write_jsonl(metadata: List[dict], file_path: Path) -> None:
|
|
"""Writes a list of dictionaries to a JSONL file.
|
|
|
|
Args:
|
|
metadata : List[dict]
|
|
A list of dictionaries, each representing a piece of meta.
|
|
file_path : Path
|
|
The file path to save the JSONL file
|
|
|
|
This function writes each dictionary in the list to a new line in the specified file.
|
|
"""
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
for meta in tqdm(metadata, desc="writing jsonl"):
|
|
# Convert dictionary to JSON string and write it to the file with a newline
|
|
json_str = json.dumps(meta, ensure_ascii=False) + "\n"
|
|
f.write(json_str)
|
|
print(f"jsonl saved to {file_path}")
|
|
|
|
|
|
def read_jsonl(file_path: Path) -> List[dict]:
|
|
"""
|
|
Reads a JSONL file and returns a list of dictionaries.
|
|
|
|
Args:
|
|
file_path : Path
|
|
The path to the JSONL file to be read.
|
|
|
|
Returns:
|
|
List[dict]
|
|
A list of dictionaries parsed from each line of the JSONL file.
|
|
"""
|
|
metadata = []
|
|
# Open the file for reading
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
# Split the file into lines
|
|
lines = f.read().splitlines()
|
|
# Process each line
|
|
for line in lines:
|
|
# Convert JSON string back to dictionary and append to list
|
|
meta = json.loads(line)
|
|
metadata.append(meta)
|
|
# Return the list of metadata
|
|
return metadata
|
|
|
|
def read_json_as_jsonl(file_path: Path) -> List[dict]:
|
|
metadata = []
|
|
with open(file_path, 'r', encoding='utf-8') as infile:
|
|
data = json.load(infile)
|
|
for k in sorted(data.keys()):
|
|
meta = {'index': k}
|
|
meta.update(data[k])
|
|
metadata.append(meta)
|
|
return metadata
|
|
|
|
|
|
|
|
def decode_unicode_strings(meta: Dict[str, Any]) -> Dict[str, Any]:
|
|
processed_meta = {}
|
|
for k, v in meta.items():
|
|
if isinstance(v, str):
|
|
processed_meta[k] = v.encode("utf-8").decode("unicode_escape")
|
|
else:
|
|
processed_meta[k] = v
|
|
return processed_meta
|
|
|
|
|
|
def load_config(config_path: Path) -> DictConfig:
|
|
"""Loads a configuration file and optionally merges it with a base configuration.
|
|
|
|
Args:
|
|
config_path (Path): Path to the configuration file.
|
|
"""
|
|
# Load the initial configuration from the given path
|
|
config = OmegaConf.load(config_path)
|
|
|
|
# Check if there is a base configuration specified and merge if necessary
|
|
if config.get("base_config", None) is not None:
|
|
base_config = OmegaConf.load(config["base_config"])
|
|
config = OmegaConf.merge(base_config, config)
|
|
|
|
return config
|
|
|
|
|
|
|
|
def jsonl_to_csv(jsonl_file_path: str, csv_file_path: str) -> None:
|
|
"""
|
|
Converts a JSONL file to a CSV file.
|
|
|
|
This function reads a JSONL file, determines all unique keys present in the file,
|
|
and writes the data to a CSV file with columns for all these keys.
|
|
"""
|
|
|
|
all_keys = set()
|
|
data_rows = []
|
|
|
|
# Read the JSONL file once to extract keys and collect data
|
|
with open(jsonl_file_path, 'r') as file:
|
|
for line in file:
|
|
data = json.loads(line.strip())
|
|
data_rows.append(data)
|
|
all_keys.update(data.keys())
|
|
|
|
# Convert the set of keys to a sorted list for consistent column order
|
|
sorted_keys = sorted(all_keys)
|
|
|
|
# Write the data to a CSV file
|
|
with open(csv_file_path, 'w', newline='') as csvfile:
|
|
writer = csv.DictWriter(csvfile, fieldnames=sorted_keys)
|
|
|
|
# Write the header row
|
|
writer.writeheader()
|
|
|
|
# Write each row of data
|
|
for data in data_rows:
|
|
writer.writerow(data)
|
|
|
|
print(f"CSV file has been created at {csv_file_path}")
|
|
|
|
|
|
def save_metadata(data, filename, headers=None):
|
|
"""
|
|
Save metadata to a file.
|
|
|
|
Args:
|
|
data (list of dict): Metadata to be saved.
|
|
filename (str): Name of the file to save the metadata.
|
|
headers (list of str): The order of column names to be saved; defaults to the keys from the first dictionary in data if not provided.
|
|
"""
|
|
# Set headers to keys from the first dictionary in data if not explicitly provided
|
|
if headers is None:
|
|
headers = list(data[0].keys())
|
|
|
|
with open(filename, "w", encoding="utf-8") as file:
|
|
# Write the headers to the file
|
|
file.write("|".join(headers) + "\n")
|
|
for entry in data:
|
|
# Retrieve values in the order of headers, replacing any '|' characters with a space to prevent formatting errors
|
|
formatted_values = [str(entry.get(key, "")).replace("|", " ") for key in headers]
|
|
# Write the formatted values to the file
|
|
file.write("|".join(formatted_values) + "\n")
|
|
|
|
|
|
def read_metadata(filename, headers=None):
|
|
"""
|
|
Read metadata from a file.
|
|
|
|
Args:
|
|
filename (str): The file from which to read the metadata.
|
|
|
|
Returns:
|
|
list of dict: The metadata read from the file.
|
|
list of str: The headers used in the file.
|
|
"""
|
|
with open(filename, "r", encoding="utf-8") as file:
|
|
lines = file.readlines()
|
|
|
|
data = []
|
|
# Set headers from the first line of the file if not provided
|
|
if headers is None:
|
|
headers = lines[0].strip().split("|")
|
|
lines = lines[1:]
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
# Skip empty lines
|
|
if not line:
|
|
continue
|
|
# Split the line by '|' and pair with headers to form a dictionary
|
|
entry_data = dict(zip(headers, line.split("|")))
|
|
data.append(entry_data)
|
|
|
|
return data, headers
|