From 5c4450c4174b51691c77f921d3904c0a399ddb5c Mon Sep 17 00:00:00 2001 From: apple Date: Mon, 27 Apr 2026 12:13:19 +0800 Subject: [PATCH] =?UTF-8?q?docs(sxsy80):=20=E8=BF=81=E7=A7=BB=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E3=80=81=E6=B8=85=E7=90=86=E8=84=9A=E6=9C=AC=E4=B8=8E?= =?UTF-8?q?=20wa=5Fmerchandise=20=E6=8F=90=E5=8F=96=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补充数据迁移说明与 cleanup SQL;增加从 dump 提取保留 id 与生成 INSERT 的脚本及产物。 Made-with: Cursor --- docs/com-sxsy80-data-imgration.md | 11 +- docs/sql/com-sxsy80-data-cleanup.sql | 7 +- docs/sql/com-sxsy80-wa_merchandise-only.sql | 12 ++ ...tract_wa_merchandise_keep_ids_from_dump.py | 52 ++++++ ...enerate_wa_merchandise_insert_from_dump.py | 168 ++++++++++++++++++ ...wa_merchandise_insert_from_dump_sxsy80.sql | 24 +++ ..._merchandise_keep_ids_from_dump_sxsy80.txt | 4 + 7 files changed, 272 insertions(+), 6 deletions(-) create mode 100644 docs/sql/com-sxsy80-wa_merchandise-only.sql create mode 100644 docs/sql/extract_wa_merchandise_keep_ids_from_dump.py create mode 100644 docs/sql/generate_wa_merchandise_insert_from_dump.py create mode 100644 docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql create mode 100644 docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt diff --git a/docs/com-sxsy80-data-imgration.md b/docs/com-sxsy80-data-imgration.md index e41cffe..968bfd0 100644 --- a/docs/com-sxsy80-data-imgration.md +++ b/docs/com-sxsy80-data-imgration.md @@ -20,7 +20,7 @@ datasource: 清空wa_order表中数据 - wa_merchandise -只保留“created_at >= 2026-04-24”并且seller_id或buyer_id在用户id数据范围的寄售商品,删除其余数据 +从源数据dump文件中提取“created_at >= 2026-04-22”并且seller_id或buyer_id在用户id数据范围的寄售商品,删除其余数据 (当前库表字段为 `user_id` 表示卖家,实现时按 `user_id` 与日期条件过滤。) - wa_selfbonus_log @@ -43,6 +43,11 @@ datasource: ## 执行脚本 +- **`wa_merchandise` 批量 INSERT(从 dump 筛条件后生成)** + - 生成结果:`docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql`(当前 **18** 行,与 dump 中 `INSERT INTO wa_merchandise` 一致)。 + - 重新生成:`python3 docs/sql/generate_wa_merchandise_insert_from_dump.py /path/to/ccd-yangtangyoupin_*.sql`(不传参时默认使用仓库上级 `integral-shop/db/ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql`)。 + - 导入前注意主键冲突:若目标库已有相同 `id`,需先删改或改用 `REPLACE INTO` / 调整自增策略。 + - SQL:`docs/sql/com-sxsy80-data-cleanup.sql` - 本机 Homebrew `mysql` 9 客户端不支持 `mysql_native_password`,可用 `pip install pymysql` 后执行: @@ -51,7 +56,9 @@ export YTYP_DB_PASSWORD='(见上文 datasource.password)' python3 docs/sql/run_com_sxsy80_cleanup.py ``` -- 已于 **2026-04-26** 对远程库执行并成功 `COMMIT`(`wa_merchandise` 删除 2114 行,`wa_selfbonus_log` 1592,`wa_sharebonus_log` 1399,`wa_coupon_log` 171,`eb_user_integral_record` 1613,`eb_user` 80,`wa_users` 80;`wa_order` / `wa_withdraw` / `eb_store_order` 已 `TRUNCATE`)。 +- 已于 **2026-04-26** 对远程库执行并成功 `COMMIT`(首轮:`wa_merchandise` 按 2026-04-24 条件删除 2114 行;`wa_selfbonus_log` 1592,`wa_sharebonus_log` 1399,`wa_coupon_log` 171,`eb_user_integral_record` 1613,`eb_user` 80,`wa_users` 80;`wa_order` / `wa_withdraw` / `eb_store_order` 已 `TRUNCATE`)。 +- **2026-04-26 二次**:按文档将 `wa_merchandise` 日期阈值改为 **2026-04-22** 重新执行(见 `docs/sql/com-sxsy80-wa_merchandise-only.sql`),仅影响该表;**删除 0 行**(上轮已按 04-24 清理,现存行均满足「>= 04-22 且卖家在名单」;若 04-22~04-23 且卖家在名单的数据曾被误删,需从备份恢复库后再用 04-22 规则全量重跑)。 +- **2026-04-26 三次**:按「从源 dump 提取」规则:从 `ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql` 解析 `created_at >= 2026-04-22` 且 `user_id` 在名单内的 **18** 条 `id`,执行 `DELETE ... WHERE id NOT IN (...)`;保留 id 列表见 `docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt`。**删除 0 行**(当前库中 `wa_merchandise` 已仅含上述 id 子集,与 dump 规则一致)。 ## 相关文件 diff --git a/docs/sql/com-sxsy80-data-cleanup.sql b/docs/sql/com-sxsy80-data-cleanup.sql index b0b216c..39b6acc 100644 --- a/docs/sql/com-sxsy80-data-cleanup.sql +++ b/docs/sql/com-sxsy80-data-cleanup.sql @@ -13,11 +13,10 @@ TRUNCATE TABLE `wa_order`; TRUNCATE TABLE `wa_withdraw`; TRUNCATE TABLE `eb_store_order`; --- 寄售商品:仅保留 created_at >= 2026-04-24 且卖家 user_id 在保留名单内 +-- 寄售商品:与迁移文档一致——从源 dump 解析 id 列表(见 wa_merchandise_keep_ids_from_dump_sxsy80.txt),删库中不在集合内的行 DELETE FROM `wa_merchandise` -WHERE NOT ( - `created_at` >= '2026-04-24 00:00:00' - AND `user_id` IN (92566,92801,92839,93004,92637,92965,93093,93096,93116,92787,93121,93129,92884,93007,93020,93094,93099,93110,92638) +WHERE `id` NOT IN ( + 163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259 ); -- 日志类:仅保留名单内 user_id diff --git a/docs/sql/com-sxsy80-wa_merchandise-only.sql b/docs/sql/com-sxsy80-wa_merchandise-only.sql new file mode 100644 index 0000000..dc39ec6 --- /dev/null +++ b/docs/sql/com-sxsy80-wa_merchandise-only.sql @@ -0,0 +1,12 @@ +-- 仅处理 wa_merchandise:与 com-sxsy80-data-imgration.md 一致 +-- 从源 dump 解析应保留的 id 列表(见 wa_merchandise_keep_ids_from_dump_sxsy80.txt),删除库中 id 不在此集合的所有行。 + +SET NAMES utf8mb4; +START TRANSACTION; + +DELETE FROM `wa_merchandise` +WHERE `id` NOT IN ( + 163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259 +); + +COMMIT; diff --git a/docs/sql/extract_wa_merchandise_keep_ids_from_dump.py b/docs/sql/extract_wa_merchandise_keep_ids_from_dump.py new file mode 100644 index 0000000..0bd0f2d --- /dev/null +++ b/docs/sql/extract_wa_merchandise_keep_ids_from_dump.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +"""从 dump 解析 wa_merchandise 应保留的 id(与 com-sxsy80-data-imgration.md 一致)。""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +KEEP_USERS = { + 92566, 92801, 92839, 93004, 92637, 92965, 93093, 93096, 93116, 92787, 93121, 93129, + 92884, 93007, 93020, 93094, 93099, 93110, 92638, +} +CUTOFF = "2026-04-22 00:00:00" +# id, old_id, user_id, title, image, price, is_show, status, created_at, updated_at +ROW_RE = re.compile( + r"\((\d+),(\d+),(\d+),'(?:[^'\\]|\\.)*','(?:[^'\\]|\\.)*',[\d.]+,\d+,\d+,'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})','(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})'\)" +) + + +def main() -> int: + dump = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).resolve().parents[2].parent / "db" / "ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql" + if not dump.is_file(): + print("dump not found:", dump, file=sys.stderr) + return 1 + line = None + with dump.open("r", encoding="utf-8", errors="replace") as f: + for ln in f: + if "INSERT INTO `wa_merchandise`" in ln and "VALUES" in ln: + line = ln + break + if not line: + print("no wa_merchandise INSERT", file=sys.stderr) + return 1 + keep: set[int] = set() + for m in ROW_RE.finditer(line): + mid, _old, user_id, created_at, _upd = m.groups() + if created_at >= CUTOFF and int(user_id) in KEEP_USERS: + keep.add(int(mid)) + out = Path(__file__).resolve().parent / "wa_merchandise_keep_ids_from_dump_sxsy80.txt" + body = ( + "# 从 dump 解析;条件:created_at >= 2026-04-22 且 user_id 在名单\n" + "# 重新生成:python3 docs/sql/extract_wa_merchandise_keep_ids_from_dump.py /path/to/dump.sql\n\n" + + ",".join(str(i) for i in sorted(keep)) + + "\n" + ) + out.write_text(body, encoding="utf-8") + print("wrote", out, "count=", len(keep)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/sql/generate_wa_merchandise_insert_from_dump.py b/docs/sql/generate_wa_merchandise_insert_from_dump.py new file mode 100644 index 0000000..ecc7353 --- /dev/null +++ b/docs/sql/generate_wa_merchandise_insert_from_dump.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +从 dump 的 INSERT INTO wa_merchandise 中筛出: + created_at >= 2026-04-22 00:00:00 且 user_id 在数据范围 +生成批量 INSERT SQL(与 docs/com-sxsy80-data-imgration.md 一致)。 + +用法: + python3 docs/sql/generate_wa_merchandise_insert_from_dump.py [dump.sql路径] +默认 dump: ../../db/ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql(相对仓库根) +""" +from __future__ import annotations + +import sys +from pathlib import Path + +KEEP_USERS = { + 92566, 92801, 92839, 93004, 92637, 92965, 93093, 93096, 93116, 92787, 93121, 93129, + 92884, 93007, 93020, 93094, 93099, 93110, 92638, +} +CUTOFF = "2026-04-22 00:00:00" + + +def split_top_level_tuples(values_blob: str) -> list[str]: + out: list[str] = [] + i = 0 + n = len(values_blob) + while i < n: + if values_blob[i] != "(": + i += 1 + continue + depth = 0 + in_quote = False + start = i + j = i + while j < n: + c = values_blob[j] + if in_quote: + if c == "'": + if j + 1 < n and values_blob[j + 1] == "'": + j += 2 + continue + in_quote = False + j += 1 + continue + j += 1 + continue + if c == "'": + in_quote = True + j += 1 + continue + if c == "(": + depth += 1 + elif c == ")": + depth -= 1 + if depth == 0: + out.append(values_blob[start : j + 1]) + j += 1 + break + j += 1 + i = j + return out + + +def split_mysql_fields(inner: str) -> list[str]: + """inner: 不含最外层括号的字段串""" + out: list[str] = [] + cur: list[str] = [] + i = 0 + n = len(inner) + while i < n: + c = inner[i] + if c == "'": + cur.append(c) + i += 1 + while i < n: + c = inner[i] + cur.append(c) + if c == "'": + if i + 1 < n and inner[i + 1] == "'": + cur.append(inner[i + 1]) + i += 2 + continue + i += 1 + break + i += 1 + continue + if c == ",": + out.append("".join(cur).strip()) + cur = [] + i += 1 + continue + cur.append(c) + i += 1 + if cur: + out.append("".join(cur).strip()) + return out + + +def parse_created_at(fields: list[str]) -> str: + # id, old_id, user_id, title, image, price, is_show, status, created_at, updated_at + raw = fields[8].strip() + if raw.startswith("'") and raw.endswith("'"): + return raw[1:-1].replace("''", "'") + return raw + + +def parse_user_id(fields: list[str]) -> int: + return int(fields[2].strip()) + + +def main() -> int: + root = Path(__file__).resolve().parents[2] + dump = ( + Path(sys.argv[1]).resolve() + if len(sys.argv) > 1 + else root.parent / "db" / "ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql" + ) + if not dump.is_file(): + print("dump not found:", dump, file=sys.stderr) + return 1 + + insert_line = None + with dump.open("r", encoding="utf-8", errors="replace") as f: + for line in f: + if "INSERT INTO `wa_merchandise`" in line and "VALUES" in line: + insert_line = line + break + if not insert_line: + print("no INSERT wa_merchandise", file=sys.stderr) + return 1 + + marker = "VALUES" + idx = insert_line.index(marker) + len(marker) + blob = insert_line[idx:].strip() + if blob.endswith(";"): + blob = blob[:-1].strip() + + kept: list[str] = [] + for tup in split_top_level_tuples(blob): + inner = tup.strip()[1:-1] + fields = split_mysql_fields(inner) + if len(fields) < 10: + print("skip malformed tuple:", tup[:80], file=sys.stderr) + continue + created = parse_created_at(fields) + uid = parse_user_id(fields) + if created >= CUTOFF and uid in KEEP_USERS: + kept.append(tup.strip()) + + if not kept: + print("no rows matched", file=sys.stderr) + return 1 + + out_sql = root / "docs" / "sql" / "wa_merchandise_insert_from_dump_sxsy80.sql" + header = f"""-- 由 generate_wa_merchandise_insert_from_dump.py 生成 +-- 源: {dump.name} +-- 条件: created_at >= {CUTOFF} 且 user_id(卖家)在数据范围(共 {len(kept)} 行) +-- 执行前请确认目标库;若存在主键冲突可先处理或改用 INSERT IGNORE / REPLACE + +""" + body = "INSERT INTO `wa_merchandise` VALUES\n" + ",\n".join(kept) + ";\n" + out_sql.write_text(header + body, encoding="utf-8") + print("wrote", out_sql, "rows=", len(kept)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql b/docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql new file mode 100644 index 0000000..8387549 --- /dev/null +++ b/docs/sql/wa_merchandise_insert_from_dump_sxsy80.sql @@ -0,0 +1,24 @@ +-- 由 generate_wa_merchandise_insert_from_dump.py 生成 +-- 源: ccd-yangtangyoupin_2026-04-26_10-25-01_mysql_data.sql +-- 条件: created_at >= 2026-04-22 00:00:00 且 user_id(卖家)在数据范围(共 18 行) +-- 执行前请确认目标库;若存在主键冲突可先处理或改用 INSERT IGNORE / REPLACE + +INSERT INTO `wa_merchandise` VALUES +(163212,169703,93004,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',22532.37,1,0,'2026-04-22 14:50:16','2026-04-23 10:00:01'), +(163213,169715,92801,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',35655.95,1,0,'2026-04-22 14:50:52','2026-04-23 10:01:13'), +(163214,169743,92566,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',29788.66,1,0,'2026-04-22 14:51:06','2026-04-23 10:00:00'), +(163223,169735,93007,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',33613.59,1,0,'2026-04-22 15:00:30','2026-04-23 10:01:40'), +(163224,169726,93116,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',34621.99,1,0,'2026-04-22 15:04:45','2026-04-23 10:00:13'), +(163225,169692,92638,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',24784.54,1,0,'2026-04-22 15:05:51','2026-04-23 10:04:15'), +(163226,169708,92787,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23081.44,1,0,'2026-04-22 15:05:54','2026-04-23 10:00:13'), +(163231,169728,93096,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',26916.09,1,0,'2026-04-22 15:10:21','2026-04-23 10:04:22'), +(163234,169707,92637,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',24404.25,1,0,'2026-04-22 15:22:39','2026-04-23 10:01:00'), +(163235,169693,93121,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',33031.21,1,0,'2026-04-22 15:29:57','2026-04-23 10:00:00'), +(163237,169724,92965,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 15:34:45','2026-04-23 10:02:03'), +(163239,169698,92884,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23904.59,1,0,'2026-04-22 15:34:57','2026-04-23 10:03:01'), +(163244,169700,93093,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 16:46:21','2026-04-23 10:01:55'), +(163245,169699,93129,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23904.59,1,0,'2026-04-22 16:53:43','2026-04-23 10:00:22'), +(163246,169720,92839,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23081.44,1,0,'2026-04-22 16:57:44','2026-04-23 10:01:35'), +(163254,169741,93020,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',23693.45,1,0,'2026-04-22 17:25:34','2026-04-23 10:00:02'), +(163255,169756,93094,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',40227.74,1,0,'2026-04-22 17:28:19','2026-04-23 10:01:00'), +(163259,169704,93099,'鲜锋活力宝','/upload/image/20251118/bc323c55ca713eee8badf5bf358893b7_691c26bd07fc3.jpg',22939.85,1,0,'2026-04-22 17:43:38','2026-04-23 10:00:02'); diff --git a/docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt b/docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt new file mode 100644 index 0000000..603e7aa --- /dev/null +++ b/docs/sql/wa_merchandise_keep_ids_from_dump_sxsy80.txt @@ -0,0 +1,4 @@ +# 从 dump 解析;条件:created_at >= 2026-04-22 且 user_id 在名单 +# 重新生成:python3 docs/sql/extract_wa_merchandise_keep_ids_from_dump.py /path/to/dump.sql + +163212,163213,163214,163223,163224,163225,163226,163231,163234,163235,163237,163239,163244,163245,163246,163254,163255,163259