alisql version: 8.0.44-alisql-dev
Hi,I understand that ANN indexes may sacrifice accuracy for performance. However, in the following example, setting M=200 causes the query to return vectors near the end of the dataset, which are almost the farthest possible results from the query vector.
This seems different from a typical approximation error. Is this expected behavior, or does it indicate a bug in the vector index implementation?
DROP TABLE IF EXISTS tab_vidx_m3_1d_060;
DROP TABLE IF EXISTS tab_vidx_m6_1d_060;
DROP TABLE IF EXISTS tab_vidx_m200_1d_060;
DROP PROCEDURE IF EXISTS proc_insert_m_1d_060;
CREATE TABLE tab_vidx_m3_1d_060 (
id INT PRIMARY KEY,
v VECTOR(1),
VECTOR INDEX vi(v) DISTANCE=EUCLIDEAN M=3
);
CREATE TABLE tab_vidx_m6_1d_060 (
id INT PRIMARY KEY,
v VECTOR(1),
VECTOR INDEX vi(v) DISTANCE=EUCLIDEAN M=6
);
CREATE TABLE tab_vidx_m200_1d_060 (
id INT PRIMARY KEY,
v VECTOR(1),
VECTOR INDEX vi(v) DISTANCE=EUCLIDEAN M=200
);
DELIMITER //
CREATE PROCEDURE proc_insert_m_1d_060()
BEGIN
DECLARE i INT DEFAULT 1;
WHILE i <= 10000 DO
INSERT INTO tab_vidx_m3_1d_060 VALUES
(i, VEC_FromText(CONCAT('[', i, ']')));
INSERT INTO tab_vidx_m6_1d_060 VALUES
(i, VEC_FromText(CONCAT('[', i, ']')));
INSERT INTO tab_vidx_m200_1d_060 VALUES
(i, VEC_FromText(CONCAT('[', i, ']')));
SET i = i + 1;
END WHILE;
END //
DELIMITER ;
CALL proc_insert_m_1d_060();
DROP PROCEDURE IF EXISTS proc_insert_m_1d_060;
SELECT id
FROM tab_vidx_m3_1d_060 FORCE INDEX(vi)
ORDER BY VEC_DISTANCE(v, VEC_FromText('[1]'))
LIMIT 10;
SELECT id
FROM tab_vidx_m6_1d_060 FORCE INDEX(vi)
ORDER BY VEC_DISTANCE(v, VEC_FromText('[1]'))
LIMIT 10;
SELECT id
FROM tab_vidx_m200_1d_060 FORCE INDEX(vi)
ORDER BY VEC_DISTANCE(v, VEC_FromText('[1]'))
LIMIT 10;
alisql version: 8.0.44-alisql-dev
Hi,I understand that ANN indexes may sacrifice accuracy for performance. However, in the following example, setting M=200 causes the query to return vectors near the end of the dataset, which are almost the farthest possible results from the query vector.
This seems different from a typical approximation error. Is this expected behavior, or does it indicate a bug in the vector index implementation?