将其他位置的md文档转移到博客文件夹中

本文最后更新于 2026年6月9日 12:22:54

注：此功能后续可能还会更新，这里只是简要介绍 ## 零、引入在做计组lab的时候，已经产生了完善的实验报告。然而，每次我都需要手动将实验报告从wsl中复制到博客目录下，再分别对应去取每一张图片，这些事情实在太琐碎了。我很懒，所以让agent写了个代码，帮我移动博客文章。我没有仔细看每一行代码，但从结果上来说，它达到了我的目的。

一、功能简介

此代码不依赖额外的python库，主要职责是复制文件内容。支持这些参数： - source_md：也就是源文件的绝对路径 - target_md：你想要把这个文件复制到哪里 - source_image_dir：源文件的图片目录。默认留空并继承源文件所在路径 - target_image_dir：目标文件对应的文件目录。默认留空并设定为目标文件同路径下的同名文件夹

二、hexo配置

建议开启以下三项：

post_asset_folder: true
marked:
  prependRoot: true
  postAsset: true

参考Hexo官方博客

开启这三项后，博客文章中可以直接使用markdown语法，无需特意改成asset_img标签插件的形式。图片只需要放在文章同名文件夹下即可，非常方便。

三、代码详情

"""Import an external Markdown file into a Hexo post and copy its images."""

from __future__ import annotations

import argparse
import os
import re
import shutil
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from urllib.parse import unquote, urlparse


IMAGE_EXTENSIONS = {
    ".apng",
    ".avif",
    ".bmp",
    ".gif",
    ".jpeg",
    ".jpg",
    ".png",
    ".svg",
    ".webp",
}
MARKDOWN_IMAGE_RE = re.compile(r"!\[([^\]]*)\]\(([^)\r\n]+)\)")
HTML_IMAGE_RE = re.compile(r"(<img\b[^>]*?\bsrc=[\"'])([^\"']+)([\"'][^>]*>)", re.IGNORECASE)


# 这里是日常使用时最常改的配置区；路径前面的 r 表示按原样读取反斜杠，适合 Windows 路径。
CONFIG = {
    # 源 Markdown 文件：脚本会读取它的正文内容。
    "source_md": r"\\wsl.localhost\Ubuntu-22.04\home\lab6_web.md",
    # 目标 Hexo 文章：脚本会保留它原来的 front matter，并替换 front matter 后面的正文。
    "target_md": r"source\_posts\构建五级流水线——lab6.md",
    # 源图片目录：留空时自动使用源 Markdown 所在目录。
    "source_image_dir": "",
    # 目标图片目录：留空时自动使用目标 Markdown 同路径下的同名文件夹。
    "target_image_dir": "",
}


@dataclass(frozen=True)
class ImageCopy:
    source: Path
    target: Path
    replacement: str


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Copy source Markdown body into a Hexo post and move local images into the post asset folder."
    )
    parser.add_argument("--source-md", help="Source Markdown file. Defaults to CONFIG['source_md'].")
    parser.add_argument("--target-md", help="Target Hexo post Markdown file. Defaults to CONFIG['target_md'].")
    parser.add_argument(
        "--source-image-dir",
        help="Directory used to resolve relative image paths. Defaults to CONFIG['source_image_dir'].",
    )
    parser.add_argument(
        "--target-image-dir",
        help="Directory where images will be copied. Defaults to CONFIG['target_image_dir'].",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print the planned work without writing the target Markdown or copying images.",
    )
    return parser.parse_args()


def configured_value(args: argparse.Namespace, name: str) -> str:
    value = getattr(args, name)
    return value if value is not None else CONFIG[name]


def default_target_image_dir(target_md: Path) -> Path:
    return target_md.with_suffix("")


def read_text(path: Path) -> str:
    return path.read_text(encoding="utf-8")


def write_text(path: Path, content: str) -> None:
    path.write_text(content, encoding="utf-8", newline="")


def split_front_matter(content: str) -> tuple[str, str]:
    lines = content.splitlines(keepends=True)
    if not lines or lines[0].strip() != "---":
        raise ValueError("Target Markdown must start with YAML front matter delimited by ---")

    for index in range(1, len(lines)):
        if lines[index].strip() == "---":
            front_matter = "".join(lines[: index + 1])
            body = "".join(lines[index + 1 :])
            return front_matter, body

    raise ValueError("Target Markdown front matter is missing its closing ---")


def strip_source_front_matter(content: str) -> str:
    lines = content.splitlines(keepends=True)
    if not lines or lines[0].strip() != "---":
        return content

    for index in range(1, len(lines)):
        if lines[index].strip() == "---":
            return "".join(lines[index + 1 :])

    return content


def is_external_or_special_url(image_url: str) -> bool:
    parsed = urlparse(image_url)
    return bool(parsed.scheme and parsed.scheme.lower() not in ("", "file")) or image_url.startswith("#")


def strip_markdown_link_title(raw_target: str) -> str:
    target = raw_target.strip()
    if not target:
        return target

    quote_index = min(
        [index for index in (target.find(' "'), target.find(" '")) if index != -1],
        default=-1,
    )
    if quote_index != -1:
        return target[:quote_index].strip()

    return target


def resolve_image_path(image_url: str, source_image_dir: Path) -> Path | None:
    clean_url = strip_markdown_link_title(image_url)
    if is_external_or_special_url(clean_url):
        return None

    parsed = urlparse(clean_url)
    raw_path = unquote(parsed.path if parsed.scheme.lower() == "file" else clean_url)
    if not raw_path:
        return None

    image_path = Path(raw_path)
    if not image_path.is_absolute():
        image_path = source_image_dir / image_path

    return image_path


def is_probable_image(path: Path) -> bool:
    return path.suffix.lower() in IMAGE_EXTENSIONS


def posix_relative_path(path: Path, start: Path) -> str:
    relative = os.path.relpath(path.resolve(), start.resolve())
    return PurePosixPath(*Path(relative).parts).as_posix()


def make_markdown_image_url(copied_image: Path, target_md: Path) -> str:
    if copied_image.parent.resolve() == default_target_image_dir(target_md).resolve():
        return copied_image.name

    return posix_relative_path(copied_image, target_md.parent)


def collect_markdown_images(
    body: str,
    source_image_dir: Path,
    target_image_dir: Path,
    target_md: Path,
) -> tuple[str, list[ImageCopy]]:
    copies: list[ImageCopy] = []

    def replace_match(match: re.Match[str]) -> str:
        alt_text, raw_url = match.groups()
        source_path = resolve_image_path(raw_url, source_image_dir)
        if source_path is None or not is_probable_image(source_path):
            return match.group(0)

        target_path = target_image_dir / source_path.name
        image_url = make_markdown_image_url(target_path, target_md)
        replacement = f"![{alt_text}]({image_url})"
        copies.append(ImageCopy(source_path, target_path, replacement))
        return replacement

    return MARKDOWN_IMAGE_RE.sub(replace_match, body), copies


def collect_html_images(
    body: str,
    source_image_dir: Path,
    target_image_dir: Path,
    target_md: Path,
) -> tuple[str, list[ImageCopy]]:
    copies: list[ImageCopy] = []

    def replace_match(match: re.Match[str]) -> str:
        prefix, raw_url, suffix = match.groups()
        source_path = resolve_image_path(raw_url, source_image_dir)
        if source_path is None or not is_probable_image(source_path):
            return match.group(0)

        target_path = target_image_dir / source_path.name
        replacement_url = posix_relative_path(target_path, target_md.parent)
        copies.append(ImageCopy(source_path, target_path, replacement_url))
        return f"{prefix}{replacement_url}{suffix}"

    return HTML_IMAGE_RE.sub(replace_match, body), copies


def copy_images(copies: list[ImageCopy], dry_run: bool) -> None:
    seen_targets: set[Path] = set()
    for item in copies:
        if item.target in seen_targets:
            continue

        if not item.source.exists():
            raise FileNotFoundError(f"Image not found: {item.source}")

        if dry_run:
            print(f"copy: {item.source} -> {item.target}")
        else:
            item.target.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(item.source, item.target)

        seen_targets.add(item.target)


def build_post_content(front_matter: str, source_body: str) -> str:
    return front_matter.rstrip() + "\n" + source_body.lstrip()


def main() -> None:
    args = parse_args()
    source_md_text = configured_value(args, "source_md")
    target_md_text = configured_value(args, "target_md")
    source_image_dir_text = configured_value(args, "source_image_dir")
    target_image_dir_text = configured_value(args, "target_image_dir")

    if not source_md_text or not target_md_text:
        raise ValueError("CONFIG['source_md'] and CONFIG['target_md'] must not be empty.")

    source_md = Path(source_md_text)
    target_md = Path(target_md_text)
    source_image_dir = Path(source_image_dir_text) if source_image_dir_text else source_md.parent
    target_image_dir = Path(target_image_dir_text) if target_image_dir_text else default_target_image_dir(target_md)

    target_front_matter, _ = split_front_matter(read_text(target_md))
    source_body = strip_source_front_matter(read_text(source_md))
    source_body, markdown_copies = collect_markdown_images(
        source_body,
        source_image_dir,
        target_image_dir,
        target_md,
    )
    source_body, html_copies = collect_html_images(
        source_body,
        source_image_dir,
        target_image_dir,
        target_md,
    )
    new_content = build_post_content(target_front_matter, source_body)
    copies = markdown_copies + html_copies

    if args.dry_run:
        print(f"target markdown: {target_md}")
        print(f"target image dir: {target_image_dir}")
        print(f"images found: {len(copies)}")
    else:
        target_image_dir.mkdir(parents=True, exist_ok=True)
        write_text(target_md, new_content)

    copy_images(copies, args.dry_run)


if __name__ == "__main__":
    main()

#博客开发

将其他位置的md文档转移到博客文件夹中

https://travellingsheep.github.io/2026/06/09/blog/将其他位置的md文档转移到博客文件夹中/

作者

trs62

发布于

2026年6月9日

许可协议

算法（H）笔记——最短路上一篇

构建五级流水线——lab6 下一篇