Skip to content

Commit 02a3b68

Browse files
committed
Vector filters (#1162)
added - Solarium\Core\Query\Helper::knn() - Solarium\Core\Query\Helper::knnTextToVector() - Solarium\Core\Query\Helper::vectorSimilarity()
1 parent a9a4fb1 commit 02a3b68

3 files changed

Lines changed: 186 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77
## [Unreleased]
88
### Added
99
- CBOR formatted update requests
10+
- Solarium\Core\Query\Helper::knn()
11+
- Solarium\Core\Query\Helper::knnTextToVector()
12+
- Solarium\Core\Query\Helper::vectorSimilarity()
1013

1114
### Changed
1215

src/Core/Query/Helper.php

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,11 @@ public function qparser(string $name, array $params = [], bool $dereferenced = f
346346
foreach ($params as $key => $value) {
347347
if (!$dereferenced || $forceKeys || \is_int($key)) {
348348
if (\is_array($value)) {
349+
if ('preFilter' === $key) {
350+
// preFilter is a special case, it needs to be split into multiple params
351+
$output .= ' '.$key.'='.implode(' '.$key.'=', $value);
352+
continue;
353+
}
349354
$value = implode(',', $value);
350355
} elseif (\is_bool($value)) {
351356
$value = $value ? 'true' : 'false';
@@ -564,4 +569,138 @@ protected function renderPlaceHolder(array $matches): string
564569

565570
return $value;
566571
}
572+
573+
/**
574+
* Render a knn filter.
575+
*
576+
* The knn k-nearest neighbors query parser matches k-nearest documents to
577+
* the target vector.
578+
*
579+
* @param string $field
580+
* @param float[] $vector
581+
* @param int|null $topK
582+
* @param array|string|null $preFilter
583+
* @param array|string|null $includeTags
584+
* @param array|string|null $excludeTags
585+
*
586+
* @return string
587+
*/
588+
public function knn(string $field, array $vector, ?int $topK = null, array|string|null $preFilter = null, array|string|null $includeTags = null, array|string|null $excludeTags = null): string
589+
{
590+
$params = $this->getCommonVectorParams($field, $preFilter, $includeTags, $excludeTags);
591+
if (null !== $topK) {
592+
$params['topK'] = $topK;
593+
}
594+
595+
return $this->qparser(
596+
'knn',
597+
$params,
598+
).$this->getFloatList($vector);
599+
}
600+
601+
/**
602+
* Render a knn_text_to_vector filter.
603+
*
604+
* The knn_text_to_vector query parser encode a textual query to a vector
605+
* using a dedicated Large Language Model(fine tuned for the task of
606+
* encoding text to vector for sentence similarity) and matches k-nearest
607+
* neighbours documents to such query vector.
608+
*
609+
* @param string $model
610+
* @param string $field
611+
* @param string $query
612+
* @param int|null $topK
613+
* @param array|string|null $preFilter
614+
* @param array|string|null $includeTags
615+
* @param array|string|null $excludeTags
616+
*
617+
* @return string
618+
*/
619+
public function knnTextToVector(string $model, string $field, string $query, ?int $topK = null, array|string|null $preFilter = null, array|string|null $includeTags = null, array|string|null $excludeTags = null): string
620+
{
621+
$params = $this->getCommonVectorParams($field, $preFilter, $includeTags, $excludeTags);
622+
$params['model'] = $model;
623+
if (null !== $topK) {
624+
$params['topK'] = $topK;
625+
}
626+
627+
return $this->qparser(
628+
'knn_text_to_vector',
629+
$params,
630+
).$query;
631+
}
632+
633+
/**
634+
* Render a vectorSimilarity filter.
635+
*
636+
* The vectorSimilarity vector similarity query parser matches documents
637+
* whose similarity with the target vector is a above a minimum threshold.
638+
*
639+
* @param string $field
640+
* @param float[] $vector
641+
* @param float $minReturn
642+
* @param string $minTraverse
643+
* @param array|string|null $preFilter
644+
* @param array|string|null $includeTags
645+
* @param array|string|null $excludeTags
646+
*
647+
* @return string
648+
*/
649+
public function vectorSimilarity(string $field, array $vector, float $minReturn, string $minTraverse = '-Infinity', array|string|null $preFilter = null, array|string|null $includeTags = null, array|string|null $excludeTags = null): string
650+
{
651+
$params = $this->getCommonVectorParams($field, $preFilter, $includeTags, $excludeTags);
652+
$params['minReturn'] = $minReturn;
653+
$params['minTraverse'] = $minTraverse;
654+
655+
return $this->qparser(
656+
'vectorSimilarity',
657+
$params,
658+
).$this->getFloatList($vector);
659+
}
660+
661+
/**
662+
* Get common knn and vector filter parameters.
663+
*
664+
* @param string $field
665+
* @param array|string|null $preFilter
666+
* @param array|string|null $includeTags
667+
* @param array|string|null $excludeTags
668+
*
669+
* @return array
670+
*/
671+
protected function getCommonVectorParams(string $field, array|string|null $preFilter = null, array|string|null $includeTags = null, array|string|null $excludeTags = null): array
672+
{
673+
$params = [
674+
'f' => $field,
675+
];
676+
if (null !== $preFilter) {
677+
$params['preFilter'] = $preFilter;
678+
}
679+
if (null !== $includeTags) {
680+
$params['includeTags'] = $includeTags;
681+
}
682+
if (null !== $excludeTags) {
683+
$params['excludeTags'] = $excludeTags;
684+
}
685+
686+
return $params;
687+
}
688+
689+
/**
690+
* Get a float list as a string.
691+
*
692+
* @param float[] $values
693+
*
694+
* @return string
695+
*/
696+
protected function getFloatList(array $values): string
697+
{
698+
return '['.implode(', ', array_map(function ($value) {
699+
if ($value == (int) $value) {
700+
return number_format($value, 1, '.', '');
701+
}
702+
703+
return (string) $value;
704+
}, $values)).']';
705+
}
567706
}

tests/Core/Query/HelperTest.php

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,4 +762,48 @@ protected function mockFormatDateOutput($timestamp): string
762762

763763
return strstr($date->format(\DateTime::ISO8601), '+', true).'Z';
764764
}
765+
766+
public function testKnn(): void
767+
{
768+
$this->assertSame(
769+
'{!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]',
770+
$this->helper->knn('vector', [1.0, 2.0, 3.0, 4.0], 10)
771+
);
772+
773+
$this->assertSame(
774+
'{!knn f=vector preFilter=category:AAA includeTags=tagA excludeTags=tagB topK=10}[1.0, 2.0, 3.0, 4.0]',
775+
$this->helper->knn('vector', [1.0, 2.0, 3.0, 4.0], 10, 'category:AAA', 'tagA', 'tagB')
776+
);
777+
778+
$this->assertSame(
779+
'{!knn f=vector preFilter=category:AAA preFilter=inStock:true includeTags=tagA,tagB topK=10}[1.0, 2.0, 3.0, 4.0]',
780+
$this->helper->knn('vector', [1.0, 2.0, 3.0, 4.0], 10, ['category:AAA', 'inStock:true'], ['tagA', 'tagB'])
781+
);
782+
}
783+
784+
public function testKnnTextToVector(): void
785+
{
786+
$this->assertSame(
787+
'{!knn_text_to_vector f=vector model=a-model topK=10}hello world query',
788+
$this->helper->knnTextToVector('a-model', 'vector', 'hello world query', 10)
789+
);
790+
791+
$this->assertSame(
792+
'{!knn_text_to_vector f=vector preFilter=category:AAA model=a-model topK=10}hello world query',
793+
$this->helper->knnTextToVector('a-model', 'vector', 'hello world query', 10, 'category:AAA')
794+
);
795+
}
796+
797+
public function testVectorSimilarity(): void
798+
{
799+
$this->assertSame(
800+
'{!vectorSimilarity f=vector minReturn=0.7 minTraverse=-Infinity}[1.0, 2.0, 3.0, 4.0]',
801+
$this->helper->vectorSimilarity('vector', [1.0, 2.0, 3.0, 4.0], 0.7)
802+
);
803+
804+
$this->assertSame(
805+
'{!vectorSimilarity f=vector preFilter=category:AAA minReturn=0.7 minTraverse=-Infinity}[1.0, 2.0, 3.0, 4.0]',
806+
$this->helper->vectorSimilarity('vector', [1.0, 2.0, 3.0, 4.0], 0.7, '-Infinity', 'category:AAA')
807+
);
808+
}
765809
}

0 commit comments

Comments
 (0)