Skip to content

Commit 8d30730

Browse files
committed
Filesize checks
1 parent a447f4a commit 8d30730

2 files changed

Lines changed: 4 additions & 2 deletions

File tree

.github/workflows/links.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ jobs:
235235
236236
- name: Notify Slack for large images
237237
if: always() && steps.image_sizes.outcome == 'failure' && github.event_name == 'schedule' && github.run_attempt == '1'
238+
# if: always() && steps.image_sizes.outcome == 'failure' && github.event_name == 'pull_request' && github.run_attempt == '1'
238239
uses: slackapi/slack-github-action@v2.1.1
239240
with:
240241
webhook-type: incoming-webhook

utils/check_image_sizes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
1414

1515

16-
def check_image_sizes(download_dir, website, threshold_mb=0.5, max_workers=64):
16+
def check_image_sizes(download_dir, website, threshold_kb=500, max_workers=64):
1717
"""Check image sizes in downloaded HTML files and report large images."""
1818
print(f"Scanning {download_dir} for images...")
1919
unique_images = defaultdict(set)
@@ -105,8 +105,9 @@ def get_size(url):
105105
print("\nTop 10 Largest Images:")
106106
top_10 = df.head(10).copy()
107107
top_10["Size (KB)"] = top_10["Size (KB)"].round(1)
108+
top_10["Example Page"] = top_10["URL"].apply(lambda url: list(unique_images[url])[0])
108109
top_10["URL"] = top_10["URL"].apply(lambda x: x if len(x) <= 120 else x[:60] + "..." + x[-57:])
109-
print(top_10.to_string(index=False))
110+
print(top_10[["URL", "Pages", "Size (KB)", "Format", "Example Page"]].to_string(index=False))
110111

111112
# Check for large images above threshold
112113
large_images = [

0 commit comments

Comments
 (0)