Initial commit: MSH System\n\n- msh_single_uniapp: Vue 2 + UniApp 前端(微信小程序/H5/App/支付宝小程序)\n- msh_crmeb_22: Spring Boot 2.2 后端(C端API/管理端/业务逻辑)\n- models-integration: AI服务集成(Coze/KieAI/腾讯ASR)\n- docs: 产品文档与设计稿
This commit is contained in:
35
msh_crmeb_22/scraper/analyze_detail.py
Normal file
35
msh_crmeb_22/scraper/analyze_detail.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Constructing the URL based on the relative link found
|
||||
url = 'http://www.ishen365.com/index.php/article/29/show/36'
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
|
||||
}
|
||||
|
||||
log_file = os.path.join(os.path.dirname(__file__), 'debug.log')
|
||||
|
||||
def log(msg):
|
||||
with open(log_file, 'a') as f:
|
||||
f.write(msg + '\n')
|
||||
print(msg)
|
||||
|
||||
try:
|
||||
log(f"Fetching {url}...")
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.encoding = 'utf-8'
|
||||
log(f"Status Code: {response.status_code}")
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Save the HTML
|
||||
output_path = os.path.join(os.path.dirname(__file__), 'detail_content.html')
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(soup.prettify())
|
||||
|
||||
log(f"Saved HTML to {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error: {e}")
|
||||
Reference in New Issue
Block a user