@@ -58,38 +58,23 @@ SQL >
5858 WHERE snapshotId = (SELECT max(snapshotId) FROM leaderboards_copy_ds)
5959 GROUP BY slug
6060
61- NODE project_insights_copy_last_365_days_metrics
61+ NODE project_insights_copy_period_metrics
6262DESCRIPTION >
63- Calculate metrics for last 365 days: stars, forks, active contributors, active organizations
63+ Calculate metrics for last 365 days and previous 365 days in a single scan
6464
6565SQL >
6666 SELECT
6767 segmentId,
68- countIf(type = 'star') AS starsLast365Days,
69- countIf(type = 'fork') AS forksLast365Days,
70- uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsLast365Days,
71- uniq(
72- CASE WHEN organizationId != '' THEN organizationId ELSE NULL END
73- ) AS activeOrganizationsLast365Days
68+ countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days,
69+ countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days,
70+ uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days,
71+ uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days,
72+ countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days,
73+ countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days,
74+ uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days,
75+ uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days
7476 FROM activityRelations_deduplicated_cleaned_bucket_union
75- WHERE timestamp <= now()
76- GROUP BY segmentId
77-
78- NODE project_insights_copy_previous_365_days_metrics
79- DESCRIPTION >
80- Calculate metrics for previous 365 days (365-730 days ago): stars, forks, active contributors, active organizations
81-
82- SQL >
83- SELECT
84- segmentId,
85- countIf(type = 'star') AS starsPrevious365Days,
86- countIf(type = 'fork') AS forksPrevious365Days,
87- uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsPrevious365Days,
88- uniq(
89- CASE WHEN organizationId != '' THEN organizationId ELSE NULL END
90- ) AS activeOrganizationsPrevious365Days
91- FROM activityRelations_deduplicated_cleaned_bucket_union
92- WHERE timestamp < now() - INTERVAL 365 DAY
77+ WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now()
9378 GROUP BY segmentId
9479
9580NODE project_insights_copy_project_results
@@ -120,19 +105,18 @@ SQL >
120105 dep.developmentPercentage AS developmentHealthScore,
121106 dep.securityPercentage AS securityHealthScore,
122107 base.firstCommit AS firstCommit,
123- l365 .starsLast365Days AS starsLast365Days,
124- l365 .forksLast365Days AS forksLast365Days,
125- l365 .activeContributorsLast365Days AS activeContributorsLast365Days,
126- l365 .activeOrganizationsLast365Days AS activeOrganizationsLast365Days,
127- p365 .starsPrevious365Days AS starsPrevious365Days,
128- p365 .forksPrevious365Days AS forksPrevious365Days,
129- p365 .activeContributorsPrevious365Days AS activeContributorsPrevious365Days,
130- p365 .activeOrganizationsPrevious365Days AS activeOrganizationsPrevious365Days
108+ pm .starsLast365Days AS starsLast365Days,
109+ pm .forksLast365Days AS forksLast365Days,
110+ pm .activeContributorsLast365Days AS activeContributorsLast365Days,
111+ pm .activeOrganizationsLast365Days AS activeOrganizationsLast365Days,
112+ pm .starsPrevious365Days AS starsPrevious365Days,
113+ pm .forksPrevious365Days AS forksPrevious365Days,
114+ pm .activeContributorsPrevious365Days AS activeContributorsPrevious365Days,
115+ pm .activeOrganizationsPrevious365Days AS activeOrganizationsPrevious365Days
131116 FROM project_insights_copy_base_projects AS base
132117 LEFT JOIN project_insights_copy_dependency_metrics AS dep ON base.slug = dep.slug
133118 LEFT JOIN project_insights_copy_achievements AS ach ON base.slug = ach.slug
134- LEFT JOIN project_insights_copy_last_365_days_metrics AS l365 USING (segmentId)
135- LEFT JOIN project_insights_copy_previous_365_days_metrics AS p365 USING (segmentId)
119+ LEFT JOIN project_insights_copy_period_metrics AS pm USING (segmentId)
136120
137121NODE project_insights_copy_repo_base
138122DESCRIPTION >
@@ -154,38 +138,23 @@ SQL >
154138 FROM repositories_populated_ds AS rp
155139 JOIN repositories r FINAL ON r.id = rp.id
156140
157- NODE project_insights_copy_repo_last_365_days_metrics
158- DESCRIPTION >
159- Calculate repository-level metrics for last 365 days grouped by channel (repo URL)
160-
161- SQL >
162- SELECT
163- channel,
164- countIf(type = 'star') AS starsLast365Days,
165- countIf(type = 'fork') AS forksLast365Days,
166- uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsLast365Days,
167- uniq(
168- CASE WHEN organizationId != '' THEN organizationId ELSE NULL END
169- ) AS activeOrganizationsLast365Days
170- FROM activityRelations_deduplicated_cleaned_bucket_union
171- WHERE timestamp <= now()
172- GROUP BY channel
173-
174- NODE project_insights_copy_repo_previous_365_days_metrics
141+ NODE project_insights_copy_repo_period_metrics
175142DESCRIPTION >
176- Calculate repository-level metrics for previous 365 days grouped by channel (repo URL)
143+ Calculate repository-level metrics for last 365 days and previous 365 days in a single scan
177144
178145SQL >
179146 SELECT
180147 channel,
181- countIf(type = 'star') AS starsPrevious365Days,
182- countIf(type = 'fork') AS forksPrevious365Days,
183- uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsPrevious365Days,
184- uniq(
185- CASE WHEN organizationId != '' THEN organizationId ELSE NULL END
186- ) AS activeOrganizationsPrevious365Days
148+ countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days,
149+ countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days,
150+ uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days,
151+ uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days,
152+ countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days,
153+ countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days,
154+ uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days,
155+ uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days
187156 FROM activityRelations_deduplicated_cleaned_bucket_union
188- WHERE timestamp < now() - INTERVAL 365 DAY
157+ WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now()
189158 GROUP BY channel
190159
191160NODE project_insights_copy_repo_results
@@ -205,28 +174,28 @@ SQL >
205174 base.contributorCount AS contributorCount,
206175 base.organizationCount AS organizationCount,
207176 base.softwareValue AS softwareValue,
208- toUInt64( 0) AS contributorDependencyCount,
209- toFloat64( 0) AS contributorDependencyPercentage,
210- toUInt64( 0) AS organizationDependencyCount,
211- toFloat64( 0) AS organizationDependencyPercentage,
177+ COALESCE(hs.contributorDependencyCount, 0) AS contributorDependencyCount,
178+ COALESCE(hs.contributorDependencyPercentage, 0) AS contributorDependencyPercentage,
179+ COALESCE(hs.organizationDependencyCount, 0) AS organizationDependencyCount,
180+ COALESCE(hs.organizationDependencyPercentage, 0) AS organizationDependencyPercentage,
212181 CAST([] AS Array(Tuple(String, UInt64, UInt64))) AS achievements,
213- CAST(NULL AS Nullable(Float64)) AS healthScore,
214- CAST(NULL AS Nullable(Float64)) AS contributorHealthScore,
215- CAST(NULL AS Nullable(Float64)) AS popularityHealthScore,
216- CAST(NULL AS Nullable(Float64)) AS developmentHealthScore,
217- CAST(NULL AS Nullable(Float64)) AS securityHealthScore,
182+ hs.overallScore AS healthScore,
183+ hs.contributorPercentage AS contributorHealthScore,
184+ hs.popularityPercentage AS popularityHealthScore,
185+ hs.developmentPercentage AS developmentHealthScore,
186+ hs.securityPercentage AS securityHealthScore,
218187 base.firstCommit AS firstCommit,
219- COALESCE(l365 .starsLast365Days, 0) AS starsLast365Days,
220- COALESCE(l365 .forksLast365Days, 0) AS forksLast365Days,
221- COALESCE(l365 .activeContributorsLast365Days, 0) AS activeContributorsLast365Days,
222- COALESCE(l365 .activeOrganizationsLast365Days, 0) AS activeOrganizationsLast365Days,
223- COALESCE(p365 .starsPrevious365Days, 0) AS starsPrevious365Days,
224- COALESCE(p365 .forksPrevious365Days, 0) AS forksPrevious365Days,
225- COALESCE(p365 .activeContributorsPrevious365Days, 0) AS activeContributorsPrevious365Days,
226- COALESCE(p365 .activeOrganizationsPrevious365Days, 0) AS activeOrganizationsPrevious365Days
188+ COALESCE(rm .starsLast365Days, 0) AS starsLast365Days,
189+ COALESCE(rm .forksLast365Days, 0) AS forksLast365Days,
190+ COALESCE(rm .activeContributorsLast365Days, 0) AS activeContributorsLast365Days,
191+ COALESCE(rm .activeOrganizationsLast365Days, 0) AS activeOrganizationsLast365Days,
192+ COALESCE(rm .starsPrevious365Days, 0) AS starsPrevious365Days,
193+ COALESCE(rm .forksPrevious365Days, 0) AS forksPrevious365Days,
194+ COALESCE(rm .activeContributorsPrevious365Days, 0) AS activeContributorsPrevious365Days,
195+ COALESCE(rm .activeOrganizationsPrevious365Days, 0) AS activeOrganizationsPrevious365Days
227196 FROM project_insights_copy_repo_base AS base
228- LEFT JOIN project_insights_copy_repo_last_365_days_metrics AS l365 ON base.repoUrl = l365 .channel
229- LEFT JOIN project_insights_copy_repo_previous_365_days_metrics AS p365 ON base.repoUrl = p365 .channel
197+ LEFT JOIN repo_health_score_copy_ds AS hs ON base.repoUrl = hs .channel
198+ LEFT JOIN project_insights_copy_repo_period_metrics AS rm ON base.repoUrl = rm .channel
230199
231200NODE project_insights_copy_results
232201DESCRIPTION >
0 commit comments