docs(sxsy80): 迁移文档、清理脚本与 wa_merchandise 提取工具

补充数据迁移说明与 cleanup SQL;增加从 dump 提取保留 id 与生成 INSERT 的脚本及产物。

Made-with: Cursor
This commit is contained in:
apple
2026-04-27 12:13:19 +08:00
parent 901bf6f500
commit 5c4450c417
7 changed files with 272 additions and 6 deletions

View File

@@ -20,7 +20,7 @@ datasource:
清空wa_order表中数据
- wa_merchandise
只保留“created_at >= 2026-04-24”并且seller_id或buyer_id在用户id数据范围的寄售商品删除其余数据
从源数据dump文件中提取“created_at >= 2026-04-22”并且seller_id或buyer_id在用户id数据范围的寄售商品删除其余数据
(当前库表字段为 `user_id` 表示卖家,实现时按 `user_id` 与日期条件过滤。)
- wa_selfbonus_log
@@ -43,6 +43,11 @@ datasource:
## 执行脚本
- **`wa_merchandise` 批量 INSERT从 dump 筛条件后生成)**
- 生成结果:`docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql`(当前 **18** 行,与 dump 中 `INSERT INTO wa_merchandise` 一致)。
- 重新生成:`python3 docs/sql/generate_wa_merchandise_insert_from_dump.py /path/to/ccd-yangtangyoupin_*.sql`(不传参时默认使用仓库上级 `integral-shop/db/ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql`)。
- 导入前注意主键冲突:若目标库已有相同 `id`,需先删改或改用 `REPLACE INTO` / 调整自增策略。
- SQL`docs/sql/com-sxsy80-data-cleanup.sql`
- 本机 Homebrew `mysql` 9 客户端不支持 `mysql_native_password`,可用 `pip install pymysql` 后执行:
@@ -51,7 +56,9 @@ export YTYP_DB_PASSWORD='(见上文 datasource.password'
python3 docs/sql/run_com_sxsy80_cleanup.py
```
- 已于 **2026-04-26** 对远程库执行并成功 `COMMIT``wa_merchandise` 删除 2114 行`wa_selfbonus_log` 1592`wa_sharebonus_log` 1399`wa_coupon_log` 171`eb_user_integral_record` 1613`eb_user` 80`wa_users` 80`wa_order` / `wa_withdraw` / `eb_store_order``TRUNCATE`)。
- 已于 **2026-04-26** 对远程库执行并成功 `COMMIT`首轮:`wa_merchandise` 按 2026-04-24 条件删除 2114 行`wa_selfbonus_log` 1592`wa_sharebonus_log` 1399`wa_coupon_log` 171`eb_user_integral_record` 1613`eb_user` 80`wa_users` 80`wa_order` / `wa_withdraw` / `eb_store_order``TRUNCATE`)。
- **2026-04-26 二次**:按文档将 `wa_merchandise` 日期阈值改为 **2026-04-22** 重新执行(见 `docs/sql/com-sxsy80-wa_merchandise-only.sql`),仅影响该表;**删除 0 行**(上轮已按 04-24 清理,现存行均满足「>= 04-22 且卖家在名单」;若 04-2204-23 且卖家在名单的数据曾被误删,需从备份恢复库后再用 04-22 规则全量重跑)。
- **2026-04-26 三次**:按「从源 dump 提取」规则:从 `ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql` 解析 `created_at >= 2026-04-22``user_id` 在名单内的 **18**`id`,执行 `DELETE ... WHERE id NOT IN (...)`;保留 id 列表见 `docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt`。**删除 0 行**(当前库中 `wa_merchandise` 已仅含上述 id 子集,与 dump 规则一致)。
## 相关文件

View File

@@ -13,11 +13,10 @@ TRUNCATE TABLE `wa_order`;
TRUNCATE TABLE `wa_withdraw`;
TRUNCATE TABLE `eb_store_order`;
-- 寄售商品:仅保留 created_at >= 2026-04-24 且卖家 user_id 在保留名单内
-- 寄售商品:与迁移文档一致——从源 dump 解析 id 列表(见 wa_merchandise_keep_ids_from_dump_sxsy80.txt删库中不在集合内的行
DELETE FROM `wa_merchandise`
WHERE NOT (
`created_at` >= '2026-04-24 00:00:00'
AND `user_id` IN (92566,92801,92839,93004,92637,92965,93093,93096,93116,92787,93121,93129,92884,93007,93020,93094,93099,93110,92638)
WHERE `id` NOT IN (
163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259
);
-- 日志类:仅保留名单内 user_id

View File

@@ -0,0 +1,12 @@
-- 仅处理 wa_merchandise与 com-sxsy80-data-imgration.md 一致
-- 从源 dump 解析应保留的 id 列表(见 wa_merchandise_keep_ids_from_dump_sxsy80.txt删除库中 id 不在此集合的所有行。
SET NAMES utf8mb4;
START TRANSACTION;
DELETE FROM `wa_merchandise`
WHERE `id` NOT IN (
163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259
);
COMMIT;

View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python3
"""从 dump 解析 wa_merchandise 应保留的 id与 com-sxsy80-data-imgration.md 一致)。"""
from __future__ import annotations
import re
import sys
from pathlib import Path
KEEP_USERS = {
92566, 92801, 92839, 93004, 92637, 92965, 93093, 93096, 93116, 92787, 93121, 93129,
92884, 93007, 93020, 93094, 93099, 93110, 92638,
}
CUTOFF = "2026-04-22 00:00:00"
# id, old_id, user_id, title, image, price, is_show, status, created_at, updated_at
ROW_RE = re.compile(
r"\((\d+),(\d+),(\d+),'(?:[^'\\]|\\.)*','(?:[^'\\]|\\.)*',[\d.]+,\d+,\d+,'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})','(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})'\)"
)
def main() -> int:
dump = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).resolve().parents[2].parent / "db" / "ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql"
if not dump.is_file():
print("dump not found:", dump, file=sys.stderr)
return 1
line = None
with dump.open("r", encoding="utf-8", errors="replace") as f:
for ln in f:
if "INSERT INTO `wa_merchandise`" in ln and "VALUES" in ln:
line = ln
break
if not line:
print("no wa_merchandise INSERT", file=sys.stderr)
return 1
keep: set[int] = set()
for m in ROW_RE.finditer(line):
mid, _old, user_id, created_at, _upd = m.groups()
if created_at >= CUTOFF and int(user_id) in KEEP_USERS:
keep.add(int(mid))
out = Path(__file__).resolve().parent / "wa_merchandise_keep_ids_from_dump_sxsy80.txt"
body = (
"# 从 dump 解析条件created_at >= 2026-04-22 且 user_id 在名单\n"
"# 重新生成python3 docs/sql/extract_wa_merchandise_keep_ids_from_dump.py /path/to/dump.sql\n\n"
+ ",".join(str(i) for i in sorted(keep))
+ "\n"
)
out.write_text(body, encoding="utf-8")
print("wrote", out, "count=", len(keep))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
从 dump 的 INSERT INTO wa_merchandise 中筛出:
created_at >= 2026-04-22 00:00:00 且 user_id 在数据范围
生成批量 INSERT SQL与 docs/com-sxsy80-data-imgration.md 一致)。
用法:
python3 docs/sql/generate_wa_merchandise_insert_from_dump.py [dump.sql路径]
默认 dump: ../../db/ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql相对仓库根
"""
from __future__ import annotations
import sys
from pathlib import Path
KEEP_USERS = {
92566, 92801, 92839, 93004, 92637, 92965, 93093, 93096, 93116, 92787, 93121, 93129,
92884, 93007, 93020, 93094, 93099, 93110, 92638,
}
CUTOFF = "2026-04-22 00:00:00"
def split_top_level_tuples(values_blob: str) -> list[str]:
out: list[str] = []
i = 0
n = len(values_blob)
while i < n:
if values_blob[i] != "(":
i += 1
continue
depth = 0
in_quote = False
start = i
j = i
while j < n:
c = values_blob[j]
if in_quote:
if c == "'":
if j + 1 < n and values_blob[j + 1] == "'":
j += 2
continue
in_quote = False
j += 1
continue
j += 1
continue
if c == "'":
in_quote = True
j += 1
continue
if c == "(":
depth += 1
elif c == ")":
depth -= 1
if depth == 0:
out.append(values_blob[start : j + 1])
j += 1
break
j += 1
i = j
return out
def split_mysql_fields(inner: str) -> list[str]:
"""inner: 不含最外层括号的字段串"""
out: list[str] = []
cur: list[str] = []
i = 0
n = len(inner)
while i < n:
c = inner[i]
if c == "'":
cur.append(c)
i += 1
while i < n:
c = inner[i]
cur.append(c)
if c == "'":
if i + 1 < n and inner[i + 1] == "'":
cur.append(inner[i + 1])
i += 2
continue
i += 1
break
i += 1
continue
if c == ",":
out.append("".join(cur).strip())
cur = []
i += 1
continue
cur.append(c)
i += 1
if cur:
out.append("".join(cur).strip())
return out
def parse_created_at(fields: list[str]) -> str:
# id, old_id, user_id, title, image, price, is_show, status, created_at, updated_at
raw = fields[8].strip()
if raw.startswith("'") and raw.endswith("'"):
return raw[1:-1].replace("''", "'")
return raw
def parse_user_id(fields: list[str]) -> int:
return int(fields[2].strip())
def main() -> int:
root = Path(__file__).resolve().parents[2]
dump = (
Path(sys.argv[1]).resolve()
if len(sys.argv) > 1
else root.parent / "db" / "ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql"
)
if not dump.is_file():
print("dump not found:", dump, file=sys.stderr)
return 1
insert_line = None
with dump.open("r", encoding="utf-8", errors="replace") as f:
for line in f:
if "INSERT INTO `wa_merchandise`" in line and "VALUES" in line:
insert_line = line
break
if not insert_line:
print("no INSERT wa_merchandise", file=sys.stderr)
return 1
marker = "VALUES"
idx = insert_line.index(marker) + len(marker)
blob = insert_line[idx:].strip()
if blob.endswith(";"):
blob = blob[:-1].strip()
kept: list[str] = []
for tup in split_top_level_tuples(blob):
inner = tup.strip()[1:-1]
fields = split_mysql_fields(inner)
if len(fields) < 10:
print("skip malformed tuple:", tup[:80], file=sys.stderr)
continue
created = parse_created_at(fields)
uid = parse_user_id(fields)
if created >= CUTOFF and uid in KEEP_USERS:
kept.append(tup.strip())
if not kept:
print("no rows matched", file=sys.stderr)
return 1
out_sql = root / "docs" / "sql" / "wa_merchandise_insert_from_dump_sxsy80.sql"
header = f"""-- 由 generate_wa_merchandise_insert_from_dump.py 生成
-- 源: {dump.name}
-- 条件: created_at >= {CUTOFF} 且 user_id卖家在数据范围{len(kept)} 行)
-- 执行前请确认目标库;若存在主键冲突可先处理或改用 INSERT IGNORE / REPLACE
"""
body = "INSERT INTO `wa_merchandise` VALUES\n" + ",\n".join(kept) + ";\n"
out_sql.write_text(header + body, encoding="utf-8")
print("wrote", out_sql, "rows=", len(kept))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,24 @@
-- 由 generate_wa_merchandise_insert_from_dump.py 生成
-- 源: ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql
-- 条件: created_at >= 2026-04-22 00:00:00 且 user_id卖家在数据范围共 18 行)
-- 执行前请确认目标库;若存在主键冲突可先处理或改用 INSERT IGNORE / REPLACE
INSERT INTO `wa_merchandise` VALUES
(163212,169703,93004,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',22532.37,1,0,'2026-04-22 14:50:16','2026-04-23 10:00:01'),
(163213,169715,92801,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',35655.95,1,0,'2026-04-22 14:50:52','2026-04-23 10:01:13'),
(163214,169743,92566,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',29788.66,1,0,'2026-04-22 14:51:06','2026-04-23 10:00:00'),
(163223,169735,93007,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',33613.59,1,0,'2026-04-22 15:00:30','2026-04-23 10:01:40'),
(163224,169726,93116,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',34621.99,1,0,'2026-04-22 15:04:45','2026-04-23 10:00:13'),
(163225,169692,92638,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',24784.54,1,0,'2026-04-22 15:05:51','2026-04-23 10:04:15'),
(163226,169708,92787,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23081.44,1,0,'2026-04-22 15:05:54','2026-04-23 10:00:13'),
(163231,169728,93096,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',26916.09,1,0,'2026-04-22 15:10:21','2026-04-23 10:04:22'),
(163234,169707,92637,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',24404.25,1,0,'2026-04-22 15:22:39','2026-04-23 10:01:00'),
(163235,169693,93121,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',33031.21,1,0,'2026-04-22 15:29:57','2026-04-23 10:00:00'),
(163237,169724,92965,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 15:34:45','2026-04-23 10:02:03'),
(163239,169698,92884,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23904.59,1,0,'2026-04-22 15:34:57','2026-04-23 10:03:01'),
(163244,169700,93093,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 16:46:21','2026-04-23 10:01:55'),
(163245,169699,93129,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23904.59,1,0,'2026-04-22 16:53:43','2026-04-23 10:00:22'),
(163246,169720,92839,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23081.44,1,0,'2026-04-22 16:57:44','2026-04-23 10:01:35'),
(163254,169741,93020,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 17:25:34','2026-04-23 10:00:02'),
(163255,169756,93094,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',40227.74,1,0,'2026-04-22 17:28:19','2026-04-23 10:01:00'),
(163259,169704,93099,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',22939.85,1,0,'2026-04-22 17:43:38','2026-04-23 10:00:02');

View File

@@ -0,0 +1,4 @@
# 从 dump 解析条件created_at >= 2026-04-22 且 user_id 在名单
# 重新生成python3 docs/sql/extract_wa_merchandise_keep_ids_from_dump.py /path/to/dump.sql
163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259