on
ai 주식투자
- Get link
- X
- Other Apps
Python script that creates a GUI-based web scraper using Pygame, designed for Windows, with features like repetitive scraping, Excel saving, scrolling, clipboard copying, and a reasonable window size. I'll provide a detailed explanation along with the code.
import pygame
import requests
from bs4 import BeautifulSoup
import openpyxl
import tkinter as tk
from tkinter import scrolledtext, messagebox
import pyperclip
import threading
class WebScraperApp:
def __init__(self, master):
self.master = master
master.title("Web Scraper")
master.geometry("1000x800") # Larger window size
# --- GUI Elements ---
self.url_label = tk.Label(master, text="URL:")
self.url_label.pack()
self.url_entry = tk.Entry(master, width=80)
self.url_entry.pack()
self.title_check = tk.Checkbutton(master, text="Scrape Title")
self.title_check.pack()
self.content_check = tk.Checkbutton(master, text="Scrape Content")
self.content_check.pack()
self.scrape_button = tk.Button(master, text="Scrape", command=self.start_scraping)
self.scrape_button.pack()
self.repeat_label = tk.Label(master, text="Repeat Count:")
self.repeat_label.pack()
self.repeat_entry = tk.Entry(master, width=10)
self.repeat_entry.insert(0, "1") # Default repeat count
self.repeat_entry.pack()
self.text_area = scrolledtext.ScrolledText(master, wrap=tk.WORD, width=90, height=30)
self.text_area.pack()
self.copy_button = tk.Button(master, text="Copy to Clipboard", command=self.copy_to_clipboard)
self.copy_button.pack()
self.save_button = tk.Button(master, text="Save to Excel", command=self.save_to_excel)
self.save_button.pack()
self.status_label = tk.Label(master, text="Ready")
self.status_label.pack()
def start_scraping(self):
url = self.url_entry.get()
repeat_count = int(self.repeat_entry.get())
if not url:
messagebox.showerror("Error", "Please enter a URL.")
return
# Start scraping in a separate thread to prevent GUI freezing
threading.Thread(target=self.run_scraping, args=(url, repeat_count)).start()
def run_scraping(self, url, repeat_count):
self.status_label.config(text="Scraping...")
self.text_area.delete("1.0", tk.END) # Clear previous content
for _ in range(repeat_count):
try:
response = requests.get(url)
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
soup = BeautifulSoup(response.content, 'html.parser')
if self.title_check.cget("state") == "normal": # Check if title checkbox is selected
title = soup.title.text.strip() if soup.title else "No Title Found"
self.text_area.insert(tk.END, f"Title: {title}\n")
if self.content_check.cget("state") == "normal": # Check if content checkbox is selected
# Example: Extract all paragraphs
paragraphs = soup.find_all('p')
for p in paragraphs:
self.text_area.insert(tk.END, p.text.strip() + "\n")
self.text_area.insert(tk.END, "\n") # Add a separator between scrapes
except requests.exceptions.RequestException as e:
self.text_area.insert(tk.END, f"Error during scraping: {e}\n")
except Exception as e:
self.text_area.insert(tk.END, f"An unexpected error occurred: {e}\n")
self.status_label.config(text="Scraping complete.")
def copy_to_clipboard(self):
try:
text = self.text_area.get("1.0", tk.END)
pyperclip.copy(text)
messagebox.showinfo("Success", "Text copied to clipboard!")
except Exception as e:
messagebox.showerror("Error", f"Error copying to clipboard: {e}")
def save_to_excel(self):
try:
text = self.text_area.get("1.0", tk.END)
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.append(["Scraped Data"]) # Header
lines = text.splitlines()
for line in lines:
sheet.append([line])
workbook.save("scraped_data.xlsx")
messagebox.showinfo("Success", "Data saved to scraped_data.xlsx")
except Exception as e:
messagebox.showerror("Error", f"Error saving to Excel: {e}")
root = tk.Tk()
app = WebScraperApp(root)
root.mainloop()@pip install pygame requests beautifulsoup4 openpyxl tkinter pyperclip@
python web_scraper.py@
Comments
Post a Comment