AlistSource/urls/main.py

86 lines
3.6 KiB
Python
Raw Normal View History

2024-02-19 21:19:00 +08:00
import os
import urllib.parse
import argparse
2024-02-20 15:34:08 +08:00
import re
2024-02-20 18:49:05 +08:00
import time
2024-06-27 01:17:57 +08:00
import yaml
2024-02-20 15:34:08 +08:00
def natural_sort_key(s):
2024-06-27 02:11:44 +08:00
"""为了自然排序的辅助函数,将字符串中的数字部分转换成整数"""
2024-02-20 15:34:08 +08:00
return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
2024-02-19 21:19:00 +08:00
2024-06-27 01:41:28 +08:00
def generate_urls(file_paths, base_url, sub_directory, min_size):
"""根据文件路径、基础URL、子目录和最小文件大小生成URL链接"""
2024-06-27 00:58:09 +08:00
urls = {}
2024-02-19 21:19:00 +08:00
if not base_url.endswith('/'):
base_url += '/'
2024-06-27 01:41:28 +08:00
if sub_directory and not sub_directory.endswith('/'):
sub_directory += '/'
2024-06-27 00:32:52 +08:00
current_timestamp = int(time.time()) # 移到循环外
for path in sorted(file_paths, key=natural_sort_key):
file_size_bytes = os.path.getsize(path)
2024-06-27 00:32:52 +08:00
if file_size_bytes < min_size:
continue
relative_path = os.path.relpath(path, start='.')
2024-06-27 01:41:28 +08:00
relative_path = relative_path.replace(os.sep, '/') # 将路径分隔符替换为正斜杠
encoded_path = urllib.parse.quote(relative_path)
2024-06-27 01:41:28 +08:00
url = f"{base_url}{sub_directory}{encoded_path}"
2024-06-27 00:58:09 +08:00
dir_name = os.path.dirname(relative_path)
if dir_name not in urls:
urls[dir_name] = []
urls[dir_name].append(f"{file_size_bytes}:{current_timestamp}:{url}")
2024-02-19 21:19:00 +08:00
return urls
2024-06-27 01:17:57 +08:00
def save_urls(urls, output_file, root_folder):
"""将URL链接保存到YAML文件中"""
2024-02-19 21:19:00 +08:00
with open(output_file, 'w', encoding='utf-8') as f:
2024-06-27 01:17:57 +08:00
f.write(f"{root_folder}:\n")
2024-06-27 02:25:47 +08:00
if has_subdirectories(urls):
for dir_name, files in urls.items():
if dir_name == '.':
for file in files:
f.write(f" {file}\n")
else:
f.write(f" {dir_name.split('/')[-1]}:\n")
for file in files:
f.write(f" {file}\n")
else:
for file in urls['.']:
f.write(f" {file}\n")
2024-02-19 21:19:00 +08:00
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description='Generate URLs from file names.')
2024-02-21 13:48:01 +08:00
parser.add_argument('--dir', type=str, default='', help='Sub-directory for generating file URLs (optional)')
2024-06-27 01:17:57 +08:00
parser.add_argument('--output', type=str, default='urls.yaml', help='Output file name (default: urls.yaml)')
parser.add_argument('--base-url', type=str, default='https://link.kite.kim/feng', help='Base URL for generating file URLs (default: https://link.kite.kim/feng)')
2024-06-27 00:32:52 +08:00
parser.add_argument('--min-size', type=int, default=128*1024, help='Minimum file size in bytes (default: 128KB)')
2024-06-27 01:17:57 +08:00
parser.add_argument('--rf', type=str, required=True, help='Root folder name for YAML output')
2024-02-19 21:19:00 +08:00
return parser.parse_args()
2024-06-27 00:32:52 +08:00
def list_files_recursive(start_path='.', exclude_files=None):
2024-06-27 02:11:44 +08:00
"""递归列出目录及其子目录中的所有文件,排除指定的文件"""
2024-06-27 00:32:52 +08:00
if exclude_files is None:
exclude_files = set()
file_paths = []
for root, dirs, files in os.walk(start_path):
for file in files:
2024-06-27 00:32:52 +08:00
if file not in exclude_files:
file_paths.append(os.path.join(root, file))
return file_paths
2024-06-27 02:25:47 +08:00
def has_subdirectories(urls):
"""检查是否存在子文件夹"""
return any(dir_name != '.' for dir_name in urls)
2024-02-19 21:19:00 +08:00
def main():
args = parse_arguments()
current_script = os.path.basename(__file__)
2024-06-27 00:32:52 +08:00
exclude_files = {current_script} # 排除当前脚本文件
file_paths = list_files_recursive('.', exclude_files)
2024-06-27 01:41:28 +08:00
urls = generate_urls(file_paths, args.base_url, args.dir, args.min_size)
2024-06-27 01:17:57 +08:00
save_urls(urls, args.output, args.rf)
print(f"URL链接已保存到{args.output}")
2024-02-19 21:19:00 +08:00
if __name__ == '__main__':
main()