msh-system/msh_crmeb_22/scraper/analyze_site.py

import requests
from bs4 import BeautifulSoup
import os

url = 'http://www.ishen365.com/index.php/swcfb'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}

try:
    print(f"Fetching {url}...")
    response = requests.get(url, headers=headers, timeout=10)
    response.encoding = 'utf-8'
    print(f"Status Code: {response.status_code}")

    soup = BeautifulSoup(response.text, 'html.parser')

    # Save the full HTML to a file for analysis
    output_path = os.path.join(os.path.dirname(__file__), 'site_content.html')
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(soup.prettify())

    print(f"Saved HTML to {output_path}")

except Exception as e:
    print(f"Error: {e}")