Help Needed: How to Select Specific Folder Names with a Script?

Please check the FAQ (https://www.xyplorer.com/faq.php) before posting a question...
Post Reply
star
Posts: 46
Joined: 11 Mar 2022 08:18

Help Needed: How to Select Specific Folder Names with a Script?

Post by star »

I previously wrote a script for fman, and its logic works as follows: it uses difflib.SequenceMatcher(None, names, names[j]).ratio() to calculate the string similarity between multiple folder names. When the similarity reaches 60%–80% or higher, those folders are grouped together.

Next, the script checks whether each group contains localized (translated) works:

If a group includes multiple translated versions, it keeps only the one from the translation team with the best quality and deletes the rest.

If there are no translated works in the group, it keeps one raw (untranslated) version instead.

After filtering, the script automatically selects the corresponding folder names for manual review.

My question is — can XYPLORER achieve a similar function that automatically selects specific folders? I’ve tried generating scripts with ChatGPT and Grok, but none of them worked properly. To make things clearer, I’ve attached a screenshot of my fman plugin, which is mainly designed to perform selection operations among a bunch of similar manga folder names.

Code: Select all

import sys
import os
import difflib
import re
from collections import defaultdict


# This Python script analyzes a list of folder paths, 
# groups similar folder names based on string similarity, 
# and selects which folders to keep or remove according
# to specific naming patterns and priority rules. 
# It detects folders with keywords (mainly related to 
# localization or translation tags), groups related ones, 
# and applies a priority system to retain the most preferred
# version while marking others for selection. The resulting 
# selected folder paths are written to an output file.

if len(sys.argv) < 3:
    sys.exit(1)
infile = sys.argv[1]
outfile = sys.argv[2]

with open(infile, 'r', encoding='utf-8') as f:
    folders = [line.strip() for line in f if line.strip()]

names = [os.path.basename(url) for url in folders]

pattern = re.compile(
    r'\[[^\[\]]*(汉|漢|制|製|翻|语|中|嵌|人|書館|书馆|修正|未来|未來|字幕|考试|漫画|漫畫|去码|去碼|粵化|工房|我的|三個|甘油|出版女友|小吃|洨五組|新视界|整合|可好|冊語草堂|空中貓製作室|屁眼派對|禁漫天堂|下北泽幕府|hanhua)[^\[\]]*\]')

# 定义优先级列表
priority_list = [
    '脸肿汉化', '绅士仓库', 'Lolipoi', '中国語', '禁漫天堂', '未來數位',
    '工房', '工坊', '冊語草堂', '中文', '修正', '家族', '支援',
    '嵌', '制', '翻译', '汉化', '个人', '机翻'
]

graph = defaultdict(set)
for i in range(len(names)):
    for j in range(i + 1, len(names)):
        sim = difflib.SequenceMatcher(None, names[i], names[j]).ratio()
        if sim > 0.6:
            graph[i].add(j)
            graph[j].add(i)

def find_group(start, visited, group):
    group.add(start)
    visited.add(start)
    for neighbor in graph[start]:
        if neighbor not in visited:
            find_group(neighbor, visited, group)

groups = []
visited = set()
for i in range(len(names)):
    if i not in visited:
        group = set()
        find_group(i, visited, group)
        groups.append([folders[idx] for idx in group])

to_select = set()
for group in groups:
    group_folders = group
    group_names = [os.path.basename(url) for url in group_folders]

    has_pattern_match = any(pattern.search(name) for name in group_names)

    if has_pattern_match:
        matched_folders = [(folder, name) for folder, name in zip(group_folders, group_names) if pattern.search(name)]

        if len(matched_folders) > 1:
            highest_priority_folder = None
            highest_priority_index = len(priority_list)  
            for folder, name in matched_folders:
                for i, priority in enumerate(priority_list):
                    if priority in name and i < highest_priority_index:
                        highest_priority_index = i
                        highest_priority_folder = folder
                        break
            for folder in group_folders:
                if folder != highest_priority_folder:
                    to_select.add(folder)
        else:
            for folder, name in zip(group_folders, group_names):
                if not pattern.search(name):
                    to_select.add(folder)
    else:
        if len(group_folders) > 1:  
            shortest_folder = min(group_folders, key=lambda x: len(os.path.basename(x)))
            for folder in group_folders:
                if folder != shortest_folder:
                    to_select.add(folder)

with open(outfile, 'w', encoding='utf-8') as f:
    f.write('\n'.join(to_select))
Image
XYPlorer x32 Ver 27.10.0600, Win10 LSTC Enterprise 21H2, scaling percentage 200%(Display 4K)

star
Posts: 46
Joined: 11 Mar 2022 08:18

Re: Help Needed: How to Select Specific Folder Names with a Script?

Post by star »

It solved. (https://www.xyplorer.com/download/XYplorerHelp.pdf )

xys:

Code: Select all

// XYplorer Script: Select Similar (integrated with Python)
  $python = "C:\ProgramData\anaconda3\envs\python312\python.exe";
  $pyscript = "C:\Users\Administrator\Personal_scripts\Python\PythonScripts\PythonScripts\src\select_similar.py";
  $tempfile = "%temp%\filelist.txt";

  //  "dirs" or "files" 
  $items = folderreport("dirs", "r", "<curpath>", "<crlf>");
  writefile($tempfile, $items, , "utf16le");  

  $cmd = """$python"" ""$pyscript"" ""$tempfile""";
  run $cmd, , 0, 1; 

  $matches = readfile($tempfile, "utf16le");

  if ($matches != "") {
    selectitems $matches;
  } else {
    msg "No matches";
  }
python:

Code: Select all

import sys
import re
import difflib
from collections import defaultdict
from pathlib import Path

def main():
    if len(sys.argv) < 2:
        print("No input file")
        return

    tempfile = Path(sys.argv[1])
    if not tempfile.exists():
        print("Temp file not found")
        return

    text = tempfile.read_text(encoding="utf-16-le")
    folders = [line.strip() for line in text.splitlines() if line.strip()]

    if not folders:
        return

    names = [Path(f).name for f in folders]

    pattern = re.compile(
        r'\[[^\[\]]*(汉|漢|制|製|翻|语|中|嵌|人|書館|书馆|修正|未来|未來|字幕|考试|漫画|漫畫|去码|去碼|粵化|工房|我的|三個|甘油|出版女友|小吃|洨五組|新视界|整合|可好|冊語草堂|空中貓製作室|屁眼派對|禁漫天堂|下北泽幕府|hanhua)[^\[\]]*\]'
    )

    priority_list = [
        '脸肿汉化', '绅士仓库', 'Lolipoi', '中国語', '禁漫天堂', '未來數位',
        '工房', '工坊', '出版', '書館', '书馆', '修正', '整合', '字幕', '考试',
        '漫画', '漫畫', '去码', '去碼', '粵化', '冊語草堂', '新视界', '空中貓製作室',
        '屁眼派對', '我的', '三個', '甘油', '出版女友', '小吃', '洨五組', '下北泽幕府',
        '汉', '漢', '製', '语', '中', '嵌', '家族', '支援', '制', '翻译', '人', '机翻'
    ]

    graph = defaultdict(set)
    for i in range(len(names)):
        for j in range(i + 1, len(names)):
            sim = difflib.SequenceMatcher(None, names[i], names[j]).ratio()
            if sim > 0.6:
                graph[i].add(j)
                graph[j].add(i)

    def dfs(start, visited, group):
        group.add(start)
        visited.add(start)
        for neighbor in graph[start]:
            if neighbor not in visited:
                dfs(neighbor, visited, group)

    groups = []
    visited = set()
    for i in range(len(names)):
        if i not in visited:
            group = set()
            dfs(i, visited, group)
            groups.append([folders[idx] for idx in group])

    to_select = set()
    for group in groups:
        group_names = [Path(url).name for url in group]
        has_pattern = any(pattern.search(name) for name in group_names)

        if has_pattern:
            matched = [(folder, name) for folder, name in zip(group, group_names) if pattern.search(name)]
            if len(matched) > 1:
                best_folder = None
                best_idx = len(priority_list)
                for folder, name in matched:
                    for i, p in enumerate(priority_list):
                        if p in name and i < best_idx:
                            best_idx = i
                            best_folder = folder
                            break
                for folder in group:
                    if folder != best_folder:
                        to_select.add(folder)
            else:
                for folder, name in zip(group, group_names):
                    if not pattern.search(name):
                        to_select.add(folder)
        else:
            if len(group) > 1:
                shortest = min(group, key=lambda x: len(Path(x).name))
                for folder in group:
                    if folder != shortest:
                        to_select.add(folder)

    tempfile.write_text("|".join(sorted(to_select)), encoding="utf-16-le")

if __name__ == "__main__":
    main()
XYPlorer x32 Ver 27.10.0600, Win10 LSTC Enterprise 21H2, scaling percentage 200%(Display 4K)

Post Reply