Skip to content

Commit 2c35f99

Browse files
committed
Perf: atype priority filters/ordering
1 parent d125950 commit 2c35f99

4 files changed

Lines changed: 113 additions & 168 deletions

File tree

colocus/api/filters.py

Lines changed: 59 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,30 @@ class BaseColocResultFilter(FilterSet):
6262
def __init__(self, *args, **kwargs):
6363
super().__init__(*args, **kwargs)
6464

65+
self.SIGNAL_ANNOTATIONS = {
66+
"primary_signal_trait": ("signal1__analysis__trait__uuid", "signal2__analysis__trait__uuid"),
67+
"secondary_signal_trait": ("signal2__analysis__trait__uuid", "signal1__analysis__trait__uuid"),
68+
"primary_signal_chrom": ("signal1__lead_variant__chrom", "signal2__lead_variant__chrom"),
69+
"secondary_signal_chrom": ("signal2__lead_variant__chrom", "signal1__lead_variant__chrom"),
70+
"primary_signal_pos": ("signal1__lead_variant__pos", "signal2__lead_variant__pos"),
71+
"secondary_signal_pos": ("signal2__lead_variant__pos", "signal1__lead_variant__pos"),
72+
"primary_signal_logp": ("signal1__neg_log_p", "signal2__neg_log_p"),
73+
"secondary_signal_logp": ("signal2__neg_log_p", "signal1__neg_log_p"),
74+
"primary_signal_tissue": ("signal1__analysis__tissue", "signal2__analysis__tissue"),
75+
"secondary_signal_tissue": ("signal2__analysis__tissue", "signal1__analysis__tissue"),
76+
"primary_signal_cell_type": ("signal1__analysis__cell_type", "signal2__analysis__cell_type"),
77+
"secondary_signal_cell_type": ("signal2__analysis__cell_type", "signal1__analysis__cell_type"),
78+
"primary_signal_study": ("signal1__analysis__study__uuid", "signal2__analysis__study__uuid"),
79+
"secondary_signal_study": ("signal2__analysis__study__uuid", "signal1__analysis__study__uuid"),
80+
"primary_signal_gene_ens_id": ("signal1__analysis__trait__gene__ens_id", "signal2__analysis__trait__gene__ens_id"),
81+
"secondary_signal_gene_ens_id": ("signal2__analysis__trait__gene__ens_id", "signal1__analysis__trait__gene__ens_id"),
82+
"primary_signal_gene_symbol": ("signal1__analysis__trait__gene__symbol", "signal2__analysis__trait__gene__symbol"),
83+
"secondary_signal_gene_symbol": ("signal2__analysis__trait__gene__symbol", "signal1__analysis__trait__gene__symbol"),
84+
"primary_signal_exon_ens_id": ("signal1__analysis__trait__exon__ens_id", "signal2__analysis__trait__exon__ens_id"),
85+
"secondary_signal_exon_ens_id": ("signal2__analysis__trait__exon__ens_id", "signal1__analysis__trait__exon__ens_id"),
86+
}
87+
88+
6589
# Dynamically create min_logp_{analysis_type} filters
6690
for _, analysis_type_name in ANALYSIS_TYPES:
6791
filter_name = f"min_logp_{analysis_type_name.lower()}" # filter name convention requires min_logp_*
@@ -78,6 +102,28 @@ def __init__(self, *args, **kwargs):
78102
),
79103
)
80104

105+
def _get_requested_ordering_fields(self):
106+
"""Extract the annotation fields needed based on the ordering parameter."""
107+
ordering_param = self.data.get(self.order_by_field, "")
108+
if not ordering_param:
109+
return set()
110+
111+
requested = set()
112+
for field in ordering_param.split(","):
113+
field = field.lstrip("-").strip()
114+
# Map public ordering names to internal annotation names
115+
for internal_name, (_, _) in self.SIGNAL_ANNOTATIONS.items():
116+
# Check against the ordering filter's field mapping
117+
if field in ("signal1_trait", "signal2_trait", "signal1_chrom", "signal2_chrom",
118+
"signal1_pos", "signal2_pos", "signal1_logp", "signal2_logp",
119+
"signal1_tissue", "signal2_tissue", "signal1_cell_type", "signal2_cell_type",
120+
"signal1_study", "signal2_study", "signal1_gene_ens_id", "signal2_gene_ens_id",
121+
"signal1_gene_symbol", "signal2_gene_symbol", "signal1_exon_ens_id", "signal2_exon_ens_id"):
122+
# Convert public name to internal annotation name
123+
internal = field.replace("signal1_", "primary_signal_").replace("signal2_", "secondary_signal_")
124+
requested.add(internal)
125+
return requested
126+
81127
def filter_queryset(self, queryset):
82128
# Add some fields that are useful for filtering/sorting but not stored directly in the DB
83129
# Dynamically create logp_max_over_{analysis_type} for each analysis type
@@ -115,106 +161,19 @@ def filter_queryset(self, queryset):
115161
# Add conditional annotations for ordering
116162
# These are necessary because on a per-row basis, signals may be swapped depending on user preference
117163
# (e.g. analysis_priority), so we need to create consistent "primary" and "secondary" signal fields
118-
queryset = queryset.annotate(
119-
primary_signal_trait=Case(
120-
When(no_signal_swap=True, then=F("signal1__analysis__trait__uuid")),
121-
default=F("signal2__analysis__trait__uuid"),
122-
),
123-
secondary_signal_trait=Case(
124-
When(no_signal_swap=True, then=F("signal2__analysis__trait__uuid")),
125-
default=F("signal1__analysis__trait__uuid"),
126-
),
127-
primary_signal_chrom=Case(
128-
When(no_signal_swap=True, then=F("signal1__lead_variant__chrom")),
129-
default=F("signal2__lead_variant__chrom"),
130-
),
131-
secondary_signal_chrom=Case(
132-
When(no_signal_swap=True, then=F("signal2__lead_variant__chrom")),
133-
default=F("signal1__lead_variant__chrom"),
134-
),
135-
primary_signal_pos=Case(
136-
When(no_signal_swap=True, then=F("signal1__lead_variant__pos")),
137-
default=F("signal2__lead_variant__pos"),
138-
),
139-
secondary_signal_pos=Case(
140-
When(no_signal_swap=True, then=F("signal2__lead_variant__pos")),
141-
default=F("signal1__lead_variant__pos"),
142-
),
143-
primary_signal_logp=Case(
144-
When(no_signal_swap=True, then=F("signal1__neg_log_p")),
145-
default=F("signal2__neg_log_p"),
146-
),
147-
secondary_signal_logp=Case(
148-
When(no_signal_swap=True, then=F("signal2__neg_log_p")),
149-
default=F("signal1__neg_log_p"),
150-
),
151-
primary_signal_tissue=Case(
152-
When(no_signal_swap=True, then=F("signal1__analysis__tissue")),
153-
default=F("signal2__analysis__tissue"),
154-
),
155-
secondary_signal_tissue=Case(
156-
When(no_signal_swap=True, then=F("signal2__analysis__tissue")),
157-
default=F("signal1__analysis__tissue"),
158-
),
159-
primary_signal_cell_type=Case(
160-
When(no_signal_swap=True, then=F("signal1__analysis__cell_type")),
161-
default=F("signal2__analysis__cell_type"),
162-
),
163-
secondary_signal_cell_type=Case(
164-
When(no_signal_swap=True, then=F("signal2__analysis__cell_type")),
165-
default=F("signal1__analysis__cell_type"),
166-
),
167-
primary_signal_study=Case(
168-
When(no_signal_swap=True, then=F("signal1__analysis__study__uuid")),
169-
default=F("signal2__analysis__study__uuid"),
170-
),
171-
secondary_signal_study=Case(
172-
When(no_signal_swap=True, then=F("signal2__analysis__study__uuid")),
173-
default=F("signal1__analysis__study__uuid"),
174-
),
175-
primary_signal_gene_ens_id=Case(
176-
When(
177-
no_signal_swap=True,
178-
then=F("signal1__analysis__trait__gene__ens_id"),
179-
),
180-
default=F("signal2__analysis__trait__gene__ens_id"),
181-
),
182-
secondary_signal_gene_ens_id=Case(
183-
When(
184-
no_signal_swap=True,
185-
then=F("signal2__analysis__trait__gene__ens_id"),
186-
),
187-
default=F("signal1__analysis__trait__gene__ens_id"),
188-
),
189-
primary_signal_gene_symbol=Case(
190-
When(
191-
no_signal_swap=True,
192-
then=F("signal1__analysis__trait__gene__symbol"),
193-
),
194-
default=F("signal2__analysis__trait__gene__symbol"),
195-
),
196-
secondary_signal_gene_symbol=Case(
197-
When(
198-
no_signal_swap=True,
199-
then=F("signal2__analysis__trait__gene__symbol"),
200-
),
201-
default=F("signal1__analysis__trait__gene__symbol"),
202-
),
203-
primary_signal_exon_ens_id=Case(
204-
When(
205-
no_signal_swap=True,
206-
then=F("signal1__analysis__trait__exon__ens_id"),
207-
),
208-
default=F("signal2__analysis__trait__exon__ens_id"),
209-
),
210-
secondary_signal_exon_ens_id=Case(
211-
When(
212-
no_signal_swap=True,
213-
then=F("signal2__analysis__trait__exon__ens_id"),
214-
),
215-
default=F("signal1__analysis__trait__exon__ens_id"),
216-
),
217-
)
164+
# Only annotate the fields that are actually needed for ordering
165+
requested_fields = self._get_requested_ordering_fields()
166+
167+
annotations = {}
168+
for field_name, (swap_false_path, swap_true_path) in self.SIGNAL_ANNOTATIONS.items():
169+
if field_name in requested_fields:
170+
annotations[field_name] = Case(
171+
When(no_signal_swap=True, then=F(swap_false_path)),
172+
default=F(swap_true_path),
173+
)
174+
175+
if annotations:
176+
queryset = queryset.annotate(**annotations)
218177

219178
return super().filter_queryset(queryset)
220179

colocus/api/views.py

Lines changed: 26 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -94,83 +94,41 @@ def annotate_prioritized_signals(queryset, analysis_type_priority=None):
9494
Annotated queryset with 'no_signal_swap' boolean field
9595
"""
9696
if analysis_type_priority:
97-
# Only first two analysis types are used to designate slots 1/2; the rest are ignored
98-
order_list = analysis_type_priority.split(",")[0:2]
99-
100-
# Create CASE statements for order1 and order2
101-
order1_whens = [
102-
When(signal1__analysis__analysis_type=atype, then=Value(idx))
103-
for idx, atype in enumerate(order_list)
104-
]
105-
106-
order2_whens = [
107-
When(signal2__analysis__analysis_type=atype, then=Value(idx))
108-
for idx, atype in enumerate(order_list)
109-
]
110-
111-
queryset = queryset.annotate(
112-
order1=Case(
113-
*order1_whens,
114-
default=Value(None, output_field=IntegerField()),
115-
output_field=IntegerField(),
116-
),
117-
order2=Case(
118-
*order2_whens,
119-
default=Value(None, output_field=IntegerField()),
120-
output_field=IntegerField(),
121-
),
122-
)
97+
order_list = [t.strip() for t in analysis_type_priority.split(",")[0:2]]
12398

12499
if len(order_list) == 0:
125-
pass # nothing to do in this case
126-
elif len(order_list) == 1:
127-
queryset = queryset.filter(Q(order1=0) | Q(order2=0))
128-
elif len(order_list) == 2:
129-
queryset = queryset.filter(
130-
((Q(order1=0) & Q(order2=1)) | (Q(order1=1) & Q(order2=0)))
131-
)
132-
else:
133-
# Raise exception
134-
raise drf_exceptions.ValidationError(
135-
"analysis_type_priority should contain <=2 analysis types"
136-
)
100+
return queryset.annotate(no_signal_swap=Value(True, output_field=BooleanField()))
101+
102+
# Simplified: signal1 should be the first priority type
103+
# no_signal_swap=True means signal1 is already the preferred type
104+
first_priority = order_list[0]
137105

138106
queryset = queryset.annotate(
139107
no_signal_swap=Case(
140-
When(Q(order1__isnull=True) & Q(order2__isnull=True), then=Value(True)),
141-
When(
142-
Q(order1__isnull=False) & Q(order2__isnull=True),
143-
then=Case(
144-
When(order1=0, then=Value(True)),
145-
When(order1=1, then=Value(False)),
146-
default=Value(True),
147-
output_field=BooleanField(),
148-
),
149-
),
150-
When(
151-
Q(order1__isnull=True) & Q(order2__isnull=False),
152-
then=Case(
153-
When(order2=0, then=Value(False)),
154-
When(order2=1, then=Value(True)),
155-
default=Value(True),
156-
output_field=BooleanField(),
157-
),
158-
),
159-
When(
160-
Q(order1__isnull=False) & Q(order2__isnull=False),
161-
then=Case(
162-
When(order1=0, then=Value(True)),
163-
When(order1=1, then=Value(False)),
164-
When(order2=0, then=Value(False)),
165-
When(order2=1, then=Value(True)),
166-
default=Value(True),
167-
output_field=BooleanField(),
168-
),
169-
),
108+
# If signal1 is the first priority type, don't swap
109+
When(signal1__analysis__analysis_type=first_priority, then=Value(True)),
110+
# If signal2 is the first priority type, swap
111+
When(signal2__analysis__analysis_type=first_priority, then=Value(False)),
112+
# Otherwise, don't swap
170113
default=Value(True),
171114
output_field=BooleanField(),
172115
)
173116
)
117+
118+
# Apply filtering based on priority
119+
if len(order_list) == 1:
120+
queryset = queryset.filter(
121+
Q(signal1__analysis__analysis_type=first_priority) |
122+
Q(signal2__analysis__analysis_type=first_priority)
123+
)
124+
elif len(order_list) == 2:
125+
second_priority = order_list[1]
126+
queryset = queryset.filter(
127+
(Q(signal1__analysis__analysis_type=first_priority) &
128+
Q(signal2__analysis__analysis_type=second_priority)) |
129+
(Q(signal1__analysis__analysis_type=second_priority) &
130+
Q(signal2__analysis__analysis_type=first_priority))
131+
)
174132
else:
175133
queryset = queryset.annotate(
176134
no_signal_swap=Value(True, output_field=BooleanField())
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Generated by Django 4.2.9 on 2025-12-03 22:04
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("core", "0008_metabolite_methylprobe_alter_dataset_analysis_type_and_more"),
9+
]
10+
11+
operations = [
12+
migrations.AlterField(
13+
model_name="marginalanalysis",
14+
name="analysis_type",
15+
field=models.TextField(
16+
choices=[
17+
("GWAS", "GWAS"),
18+
("eQTL", "eQTL"),
19+
("mQTL", "mQTL"),
20+
("metabQTL", "metabQTL"),
21+
("pQTL", "pQTL"),
22+
],
23+
db_index=True,
24+
help_text="Type of association analysis - GWAS, eQTL, pQTL, ATAC-seq, methylation, etc.",
25+
),
26+
),
27+
]

colocus/core/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ class MarginalAnalysis(models.Model):
542542
analysis_type = models.TextField(
543543
choices=constants.ANALYSIS_TYPES,
544544
help_text="Type of association analysis - GWAS, eQTL, pQTL, ATAC-seq, methylation, etc.",
545+
db_index=True
545546
)
546547

547548
genome_build = models.TextField(

0 commit comments

Comments
 (0)