36 lines
1.0 KiB
Python
36 lines
1.0 KiB
Python
|
|
import requests
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
|
||
|
|
# Constructing the URL based on the relative link found
|
||
|
|
url = 'http://www.ishen365.com/index.php/article/29/show/36'
|
||
|
|
headers = {
|
||
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
|
||
|
|
}
|
||
|
|
|
||
|
|
log_file = os.path.join(os.path.dirname(__file__), 'debug.log')
|
||
|
|
|
||
|
|
def log(msg):
|
||
|
|
with open(log_file, 'a') as f:
|
||
|
|
f.write(msg + '\n')
|
||
|
|
print(msg)
|
||
|
|
|
||
|
|
try:
|
||
|
|
log(f"Fetching {url}...")
|
||
|
|
response = requests.get(url, headers=headers, timeout=10)
|
||
|
|
response.encoding = 'utf-8'
|
||
|
|
log(f"Status Code: {response.status_code}")
|
||
|
|
|
||
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
|
|
||
|
|
# Save the HTML
|
||
|
|
output_path = os.path.join(os.path.dirname(__file__), 'detail_content.html')
|
||
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(soup.prettify())
|
||
|
|
|
||
|
|
log(f"Saved HTML to {output_path}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
log(f"Error: {e}")
|