将其他位置的md文档转移到博客文件夹中

本文最后更新于 2026年6月9日 12:22:54

注:此功能后续可能还会更新,这里只是简要介绍 ## 零、引入 在做计组lab的时候,已经产生了完善的实验报告。然而,每次我都需要手动将实验报告从wsl中复制到博客目录下,再分别对应去取每一张图片,这些事情实在太琐碎了。我很懒,所以让agent写了个代码,帮我移动博客文章。我没有仔细看每一行代码,但从结果上来说,它达到了我的目的。

一、功能简介

此代码不依赖额外的python库,主要职责是复制文件内容。支持这些参数: - source_md:也就是源文件的绝对路径 - target_md:你想要把这个文件复制到哪里 - source_image_dir:源文件的图片目录。默认留空并继承源文件所在路径 - target_image_dir:目标文件对应的文件目录。默认留空并设定为目标文件同路径下的同名文件夹

二、hexo配置

建议开启以下三项:

1
2
3
4
post_asset_folder: true
marked:
prependRoot: true
postAsset: true
参考Hexo官方博客

开启这三项后,博客文章中可以直接使用markdown语法,无需特意改成asset_img标签插件的形式。图片只需要放在文章同名文件夹下即可,非常方便。

三、代码详情

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""Import an external Markdown file into a Hexo post and copy its images."""

from __future__ import annotations

import argparse
import os
import re
import shutil
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from urllib.parse import unquote, urlparse


IMAGE_EXTENSIONS = {
".apng",
".avif",
".bmp",
".gif",
".jpeg",
".jpg",
".png",
".svg",
".webp",
}
MARKDOWN_IMAGE_RE = re.compile(r"!\[([^\]]*)\]\(([^)\r\n]+)\)")
HTML_IMAGE_RE = re.compile(r"(<img\b[^>]*?\bsrc=[\"'])([^\"']+)([\"'][^>]*>)", re.IGNORECASE)


# 这里是日常使用时最常改的配置区;路径前面的 r 表示按原样读取反斜杠,适合 Windows 路径。
CONFIG = {
# 源 Markdown 文件:脚本会读取它的正文内容。
"source_md": r"\\wsl.localhost\Ubuntu-22.04\home\lab6_web.md",
# 目标 Hexo 文章:脚本会保留它原来的 front matter,并替换 front matter 后面的正文。
"target_md": r"source\_posts\构建五级流水线——lab6.md",
# 源图片目录:留空时自动使用源 Markdown 所在目录。
"source_image_dir": "",
# 目标图片目录:留空时自动使用目标 Markdown 同路径下的同名文件夹。
"target_image_dir": "",
}


@dataclass(frozen=True)
class ImageCopy:
source: Path
target: Path
replacement: str


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Copy source Markdown body into a Hexo post and move local images into the post asset folder."
)
parser.add_argument("--source-md", help="Source Markdown file. Defaults to CONFIG['source_md'].")
parser.add_argument("--target-md", help="Target Hexo post Markdown file. Defaults to CONFIG['target_md'].")
parser.add_argument(
"--source-image-dir",
help="Directory used to resolve relative image paths. Defaults to CONFIG['source_image_dir'].",
)
parser.add_argument(
"--target-image-dir",
help="Directory where images will be copied. Defaults to CONFIG['target_image_dir'].",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Print the planned work without writing the target Markdown or copying images.",
)
return parser.parse_args()


def configured_value(args: argparse.Namespace, name: str) -> str:
value = getattr(args, name)
return value if value is not None else CONFIG[name]


def default_target_image_dir(target_md: Path) -> Path:
return target_md.with_suffix("")


def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8")


def write_text(path: Path, content: str) -> None:
path.write_text(content, encoding="utf-8", newline="")


def split_front_matter(content: str) -> tuple[str, str]:
lines = content.splitlines(keepends=True)
if not lines or lines[0].strip() != "---":
raise ValueError("Target Markdown must start with YAML front matter delimited by ---")

for index in range(1, len(lines)):
if lines[index].strip() == "---":
front_matter = "".join(lines[: index + 1])
body = "".join(lines[index + 1 :])
return front_matter, body

raise ValueError("Target Markdown front matter is missing its closing ---")


def strip_source_front_matter(content: str) -> str:
lines = content.splitlines(keepends=True)
if not lines or lines[0].strip() != "---":
return content

for index in range(1, len(lines)):
if lines[index].strip() == "---":
return "".join(lines[index + 1 :])

return content


def is_external_or_special_url(image_url: str) -> bool:
parsed = urlparse(image_url)
return bool(parsed.scheme and parsed.scheme.lower() not in ("", "file")) or image_url.startswith("#")


def strip_markdown_link_title(raw_target: str) -> str:
target = raw_target.strip()
if not target:
return target

quote_index = min(
[index for index in (target.find(' "'), target.find(" '")) if index != -1],
default=-1,
)
if quote_index != -1:
return target[:quote_index].strip()

return target


def resolve_image_path(image_url: str, source_image_dir: Path) -> Path | None:
clean_url = strip_markdown_link_title(image_url)
if is_external_or_special_url(clean_url):
return None

parsed = urlparse(clean_url)
raw_path = unquote(parsed.path if parsed.scheme.lower() == "file" else clean_url)
if not raw_path:
return None

image_path = Path(raw_path)
if not image_path.is_absolute():
image_path = source_image_dir / image_path

return image_path


def is_probable_image(path: Path) -> bool:
return path.suffix.lower() in IMAGE_EXTENSIONS


def posix_relative_path(path: Path, start: Path) -> str:
relative = os.path.relpath(path.resolve(), start.resolve())
return PurePosixPath(*Path(relative).parts).as_posix()


def make_markdown_image_url(copied_image: Path, target_md: Path) -> str:
if copied_image.parent.resolve() == default_target_image_dir(target_md).resolve():
return copied_image.name

return posix_relative_path(copied_image, target_md.parent)


def collect_markdown_images(
body: str,
source_image_dir: Path,
target_image_dir: Path,
target_md: Path,
) -> tuple[str, list[ImageCopy]]:
copies: list[ImageCopy] = []

def replace_match(match: re.Match[str]) -> str:
alt_text, raw_url = match.groups()
source_path = resolve_image_path(raw_url, source_image_dir)
if source_path is None or not is_probable_image(source_path):
return match.group(0)

target_path = target_image_dir / source_path.name
image_url = make_markdown_image_url(target_path, target_md)
replacement = f"![{alt_text}]({image_url})"
copies.append(ImageCopy(source_path, target_path, replacement))
return replacement

return MARKDOWN_IMAGE_RE.sub(replace_match, body), copies


def collect_html_images(
body: str,
source_image_dir: Path,
target_image_dir: Path,
target_md: Path,
) -> tuple[str, list[ImageCopy]]:
copies: list[ImageCopy] = []

def replace_match(match: re.Match[str]) -> str:
prefix, raw_url, suffix = match.groups()
source_path = resolve_image_path(raw_url, source_image_dir)
if source_path is None or not is_probable_image(source_path):
return match.group(0)

target_path = target_image_dir / source_path.name
replacement_url = posix_relative_path(target_path, target_md.parent)
copies.append(ImageCopy(source_path, target_path, replacement_url))
return f"{prefix}{replacement_url}{suffix}"

return HTML_IMAGE_RE.sub(replace_match, body), copies


def copy_images(copies: list[ImageCopy], dry_run: bool) -> None:
seen_targets: set[Path] = set()
for item in copies:
if item.target in seen_targets:
continue

if not item.source.exists():
raise FileNotFoundError(f"Image not found: {item.source}")

if dry_run:
print(f"copy: {item.source} -> {item.target}")
else:
item.target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(item.source, item.target)

seen_targets.add(item.target)


def build_post_content(front_matter: str, source_body: str) -> str:
return front_matter.rstrip() + "\n" + source_body.lstrip()


def main() -> None:
args = parse_args()
source_md_text = configured_value(args, "source_md")
target_md_text = configured_value(args, "target_md")
source_image_dir_text = configured_value(args, "source_image_dir")
target_image_dir_text = configured_value(args, "target_image_dir")

if not source_md_text or not target_md_text:
raise ValueError("CONFIG['source_md'] and CONFIG['target_md'] must not be empty.")

source_md = Path(source_md_text)
target_md = Path(target_md_text)
source_image_dir = Path(source_image_dir_text) if source_image_dir_text else source_md.parent
target_image_dir = Path(target_image_dir_text) if target_image_dir_text else default_target_image_dir(target_md)

target_front_matter, _ = split_front_matter(read_text(target_md))
source_body = strip_source_front_matter(read_text(source_md))
source_body, markdown_copies = collect_markdown_images(
source_body,
source_image_dir,
target_image_dir,
target_md,
)
source_body, html_copies = collect_html_images(
source_body,
source_image_dir,
target_image_dir,
target_md,
)
new_content = build_post_content(target_front_matter, source_body)
copies = markdown_copies + html_copies

if args.dry_run:
print(f"target markdown: {target_md}")
print(f"target image dir: {target_image_dir}")
print(f"images found: {len(copies)}")
else:
target_image_dir.mkdir(parents=True, exist_ok=True)
write_text(target_md, new_content)

copy_images(copies, args.dry_run)


if __name__ == "__main__":
main()

将其他位置的md文档转移到博客文件夹中
https://travellingsheep.github.io/2026/06/09/blog/将其他位置的md文档转移到博客文件夹中/
作者
trs62
发布于
2026年6月9日
许可协议