【解包】获取GalGame类文本、音频方法及格式化对应

ELIX2025-11-292025-12-03

一、准备

1.GARbro

2.FreeMoteToolkit

3.游戏本体（本文以Senren Banka为例）

二、提取

打开准备好的游戏目录

│  adult.xp3
│  adult2.xp3
│  bgimage.xp3
│  bgimage.xp3.sig
│  bgm.xp3
│  bgm.xp3.sig
│  data.xp3
│  data.xp3.sig
│  ds.ini
│  evimage.xp3
│  evimage.xp3.sig
│  fgimage.xp3
│  fgimage.xp3.sig
│  patch.xp3
│  patch.xp3.sig
│  patch_extra.xp3
│  patch_extra.xp3.sig
│  runasadmin.vdf
│  scn.xp3
│  scn.xp3.sig
│  SenrenBanka.cf
│  SenrenBanka.exe
│  SenrenBanka.exe.sig
│  steam.xp3
│  steam.xp3.sig
│  steam_api.dll
│  video.xp3
│  video.xp3.sig
│  video2.xp3
│  video2.xp3.sig
│  voice.xp3
│  voice.xp3.sig
│
├─plugin
│      AlphaMovie.dll
│      AlphaMovie.dll.sig
│      extNagano.dll
│      extNagano.dll.sig
│      extrans.dll
│      extrans.dll.sig
│      getLangName.dll
│      getLangName.dll.sig
│      getSample.dll
│      getSample.dll.sig
│      k2compat.dll
│      k2compat.dll.sig
│      kagexopt.dll
│      kagexopt.dll.sig
│      KAGParserEx.dll
│      KAGParserEx.dll.sig
│      krkrsteam.dll
│      krkrsteam.dll.sig
│      krmovie.dll
│      krmovie.dll.sig
│      layerExDraw.dll
│      layerExDraw.dll.sig
│      lzfs.dll
│      lzfs.dll.sig
│      menu.dll
│      menu.dll.sig
│      motionplayer.dll
│      motionplayer.dll.sig
│      motionplayer_nod3d.dll
│      motionplayer_nod3d.dll.sig
│      PackinOne.dll
│      PackinOne.dll.sig
│      psbfile.dll
│      psbfile.dll.sig
│      psd.dll
│      psd.dll.sig
│      steam
│      SteamDrawDevice.dll
│      SteamDrawDevice.dll.sig
│      textrender.dll
│      textrender.dll.sig
│      win32dialog.dll
│      win32dialog.dll.sig
│      win32ole.dll
│      win32ole.dll.sig
│      windowEx.dll
│      windowEx.dll.sig
│      wuopus.dll
│      wuopus.dll.sig
│      wuvorbis.dll
│      wuvorbis.dll.sig
│      yuzuex.dll
│      yuzuex.dll.sig
│
└─SteamEmu
    ├─Inventory
    ├─Saves
    │      datasc.ksd
    │      datasu.ksd
    │      data_continue.bmp
    │      savecheck
    │
    ├─UGC
    └─UserStats
            achiev.ini
            ldb.ini
            stats.ini

可以看到，有scn.xp3（剧情文本）、voice.xp3（语音资源）、video.xp3（音频资源）。

部分游戏不会对每部分内容进行区分，可以直接解包data.xp3、patch.xp3尝试查找。

将这些xp3文件拖到GARbro，即可看到解包后的文件结构，打开时可能提示文件被加密，选择对应的类型打开（GARbro内置一些解密类型），如果找不到对应的类型，可以尝试直接提取。

将这些文件右键提取出来，以便进行后续处理。

三、处理

针对剧情文件，打开scn.xp3，可以看到很多以.ks.scn后缀的文件，一般.ks后缀是正常的文本文件，.scn代表需要二次处理。

这时候需要下载FreeMoteToolkit，将提取出来的,scn文件拖到PsbDecompile.exe，会弹出命令行，处理后会生成两个文件。

.resx.json文件是解压规则；.json即为需要的文件。

如果闪出命令行然后迅速关闭，且没有文件生成，一般是.scn文件再次被加密处理过，无法直接解包。

具体报错可以通过cmd执行PsbDecompile.exe filename.ks.scn 查看

最后生成的json即为剧情文本文件。

但这仍然不是我们想要的，里面信息量很大，很多都是不需要的。

需要进一步处理，提取出对话内容，最好能把CG、Voice与对话相对应。

通过观察可以看到，json中有许多这样的结构，从中可以将角色名、对话内容、语音编号…..等信息进行对应

["芳乃",null,[[null,"「玄十郎さんには普段からお世話になっていますから」"],["Yoshino","Genjuurou-san helps us out quite often."],["芳乃","「毕竟玄十郎先生跟我们家一直有来往嘛」"],["芳乃","「畢竟玄十郎先生跟我們家一直有來往嘛」"]],[{
              "name": "芳乃",
              "pan": 0,
              "type": 0,
              "voice": "yos004_012"
            }
    .......

于是可以借助AI生成提取程序，如下供参考

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
批量对话内容提取脚本
用于处理千恋万花游戏脚本中的所有JSON文件，提取中文对话并转换为指定格式
"""

import json
import re
import os
import sys
import glob
from pathlib import Path

def extract_dialogues_from_json(json_file_path):
    """
    从JSON文件中提取对话内容
    
    Args:
        json_file_path (str): JSON文件路径
        
    Returns:
        list: 提取的对话列表，格式为 [角色名, 简体中文对话, voice字符串]
    """
    dialogues = []
    
    try:
        # 尝试不同的编码
        encodings = ['utf-8', 'utf-8-sig', 'gbk', 'shift_jis', 'cp932']
        content = None
        
        for encoding in encodings:
            try:
                with open(json_file_path, 'r', encoding=encoding) as file:
                    content = file.read()
                    print(f"成功使用 {encoding} 编码读取文件")
                    break
            except UnicodeDecodeError:
                continue
        
        if content is None:
            print("无法使用任何编码读取文件")
            return []
            
        # 解析JSON内容
        try:
            data = json.loads(content)
            print("成功解析JSON数据")
        except json.JSONDecodeError as e:
            print(f"JSON解析错误: {e}")
            # 如果JSON解析失败，使用正则表达式方法
            return extract_dialogues_with_regex(content)
        
        # 使用递归函数搜索对话
        def find_dialogues(obj, path=[]):
            if isinstance(obj, list):
                for i, item in enumerate(obj):
                    # 检查是否是对话条目：包含角色名、null、对话数组、voice对象
                    if (isinstance(item, list) and len(item) >= 4 and 
                        item[1] is None and 
                        isinstance(item[2], list) and len(item[2]) >= 3):
                        
                        # 检查角色名是否为null（无角色名的情况）
                        if item[0] is None:
                            character_name = ""  # 无角色名
                        elif isinstance(item[0], str):
                            character_name = item[0]
                        else:
                            character_name = ""  # 其他情况
                        
                        dialogue_array = item[2]
                        
                        # 简体中文对话在第三个位置（索引2）
                        if len(dialogue_array) > 2 and isinstance(dialogue_array[2], list) and len(dialogue_array[2]) >= 2:
                            chinese_dialogue = dialogue_array[2][1]  # 简体中文内容
                            
                            # 提取中文角色名 - 从简体中文对话前面的中文名字提取
                            # 对话数组结构：[[null, 日文对话], [英文名, 英文对话], [中文名, 中文对话], ...]
                            chinese_character_name = ""
                            if len(dialogue_array) > 2 and isinstance(dialogue_array[2], list) and len(dialogue_array[2]) >= 1:
                                chinese_character_name = dialogue_array[2][0]  # 简体中文角色名
                            
                            # 如果中文角色名为空，则使用日文角色名作为备选
                            if not chinese_character_name and character_name:
                                chinese_character_name = character_name
                            
                            # 提取voice信息 - 修正voice参数位置
                            voice_string = ""
                            if len(item) > 3 and isinstance(item[3], dict) and "voice" in item[3]:
                                voice_string = item[3]["voice"]
                            elif len(item) > 3 and isinstance(item[3], list) and len(item[3]) > 0 and isinstance(item[3][0], dict) and "voice" in item[3][0]:
                                voice_string = item[3][0]["voice"]
                            
                            dialogues.append({
                                'character': chinese_character_name,
                                'dialogue': chinese_dialogue,
                                'voice': voice_string
                            })
                    
                    # 递归搜索
                    find_dialogues(item, path + [i])
            
            elif isinstance(obj, dict):
                for key, value in obj.items():
                    find_dialogues(value, path + [key])
        
        find_dialogues(data)
        print(f"找到 {len(dialogues)} 个对话条目")
        
    except Exception as e:
        print(f"处理文件时出错: {e}")
        import traceback
        traceback.print_exc()
        return []
    
    return dialogues

def extract_dialogues_with_regex(content):
    """使用正则表达式提取对话内容（备用方法）"""
    dialogues = []
    
    # 改进的正则表达式模式，支持无角色名的情况
    # 匹配：["角色名"或null, null, [对话数组], voice对象]
    pattern = r'\[\s*(?:"([^"]+)"|null)\s*,\s*null\s*,\s*\[(.*?)\]\s*,\s*\{(.*?)\}.*?\]'
    
    matches = re.findall(pattern, content, re.DOTALL)
    print(f"使用正则表达式找到 {len(matches)} 个匹配项")
    
    for match in matches:
        character_name = match[0] if match[0] else ""  # 无角色名时为""
        dialogue_array_str = match[1]
        voice_info = match[2]
        
        # 提取简体中文对话
        simplified_pattern = r'"([^"]+)"\s*,\s*"([^"]+)'
        dialogue_matches = re.findall(simplified_pattern, dialogue_array_str)
        
        # 简体中文对话在第三个位置（索引2）
        if len(dialogue_matches) >= 3:
            simplified_chinese = dialogue_matches[2][1]  # 第二个元素是简体中文对话
            
            # 提取voice字符串
            voice_match = re.search(r'"voice"\s*:\s*"([^"]+)"', voice_info)
            voice_string = voice_match.group(1) if voice_match else ""
            
            dialogues.append({
                'character': character_name,
                'dialogue': simplified_chinese,
                'voice': voice_string
            })
    
    return dialogues

def convert_to_target_format(dialogues):
    """
    将提取的对话转换为目标格式
    
    Args:
        dialogues (list): 提取的对话列表
        
    Returns:
        list: 转换后的对话列表
    """
    formatted_dialogues = []
    
    for dialogue in dialogues:
        character = dialogue['character']
        text = dialogue['dialogue']
        voice = dialogue['voice']
        
        # 构建目标格式：有角色名时显示角色名，无角色名时直接输出台词
        if character:
            formatted = f"{character}：{text}"
        else:
            formatted = text
            
        if voice:
            formatted += f" [{voice}.ogg]"
            
        formatted_dialogues.append(formatted)
    
    return formatted_dialogues

def process_json_file(input_file_path, output_file_path=None):
    """
    处理单个JSON文件
    
    Args:
        input_file_path (str): 输入JSON文件路径
        output_file_path (str, optional): 输出文件路径，如果为None则打印到控制台
    """
    print(f"正在处理文件: {input_file_path}")
    
    # 提取对话
    dialogues = extract_dialogues_from_json(input_file_path)
    
    if not dialogues:
        print("未找到符合条件的对话内容")
        return 0
    
    print(f"找到 {len(dialogues)} 个对话")
    
    # 转换为目标格式
    formatted_dialogues = convert_to_target_format(dialogues)
    
    # 输出结果
    if output_file_path:
        with open(output_file_path, 'w', encoding='utf-8') as f:
            for dialogue in formatted_dialogues:
                f.write(dialogue + '\n')
        print(f"结果已保存到: {output_file_path}")
    else:
        print("\n转换后的对话内容:")
        print("=" * 50)
        for i, dialogue in enumerate(formatted_dialogues, 1):
            print(f"{i}. {dialogue}")
    
    return len(dialogues)

def batch_process_all_json_files(input_directory, output_directory):
    """
    批量处理所有JSON文件
    
    Args:
        input_directory (str): 输入目录路径
        output_directory (str): 输出目录路径
    """
    # 确保输出目录存在
    os.makedirs(output_directory, exist_ok=True)
    
    # 查找所有JSON文件
    json_files = []
    json_files.extend(glob.glob(os.path.join(input_directory, "**", "*.json"), recursive=True))
    json_files.extend(glob.glob(os.path.join(input_directory, "*.json")))
    
    # 去重
    json_files = list(set(json_files))
    
    print(f"找到 {len(json_files)} 个JSON文件")
    
    total_dialogues = 0
    processed_files = 0
    
    for json_file in json_files:
        try:
            # 创建输出文件名
            file_name = os.path.basename(json_file)
            output_file_name = file_name.replace('.json', '_dialogues.txt')
            output_file_path = os.path.join(output_directory, output_file_name)
            
            # 处理文件
            dialogue_count = process_json_file(json_file, output_file_path)
            total_dialogues += dialogue_count
            processed_files += 1
            
            print(f"已完成 {processed_files}/{len(json_files)} 个文件")
            print("-" * 50)
            
        except Exception as e:
            print(f"处理文件 {json_file} 时出错: {e}")
            continue
    
    print(f"\n批量处理完成！")
    print(f"成功处理 {processed_files}/{len(json_files)} 个文件")
    print(f"总共提取 {total_dialogues} 个对话条目")
    
    # 创建汇总文件
    summary_file = os.path.join(output_directory, "processing_summary.txt")
    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write("批量对话处理汇总报告\n")
        f.write("=" * 50 + "\n")
        f.write(f"处理时间: {os.path.getctime(__file__)}\n")
        f.write(f"输入目录: {input_directory}\n")
        f.write(f"输出目录: {output_directory}\n")
        f.write(f"找到的JSON文件总数: {len(json_files)}\n")
        f.write(f"成功处理的文件数: {processed_files}\n")
        f.write(f"总共提取的对话条目数: {total_dialogues}\n")
        f.write("\n处理的文件列表:\n")
        
        for json_file in json_files:
            file_name = os.path.basename(json_file)
            output_file_name = file_name.replace('.json', '_dialogues.txt')
            f.write(f"- {file_name} -> {output_file_name}\n")
    
    print(f"汇总报告已保存到: {summary_file}")

def main():
    """主函数"""
    # 设置输入和输出目录
    input_directory = r"C:\Users\lgf\Desktop\galageimu\千恋万花"
    output_directory = r"C:\Users\lgf\Desktop\galageimu\千恋万花\aaa"
    
    print("开始批量处理所有JSON文件...")
    print(f"输入目录: {input_directory}")
    print(f"输出目录: {output_directory}")
    print("=" * 60)
    
    # 批量处理所有JSON文件
    batch_process_all_json_files(input_directory, output_directory)

if __name__ == "__main__":
    main()