conda create -n fastv python=3.10
conda activate fastv
cd src
bash setup.shwe provide a script (./src/FastV/inference/visualization.sh) to reproduce the visualization result of each LLaVA model layer for a given image and prompt.
bash ./src/FastV/inference/visualization.shor
python ./src/FastV/inference/plot_inefficient_attention_massive.py \
--model-path "PATH-to-HF-LLaVA1.5-Checkpoints" \
--image-path "./src/LLaVA/images/llava_logo.png" \
--prompt "Describe the image in details."\
--output-path "./output_example"\it will obtain a json file contain massive activation weights.
python plt_massive.py
@article{zhang2026drives,
title={What drives attention sinks? A study of massive activations and rotational positional encoding in large vision--language models},
author={Zhang, Xiaofeng and Zhu, Yuanchao and Gu, Chaochen and Cao, Jiawei and Cheng, Hao and Wu, Kaijie},
journal={Information Processing \& Management},
volume={63},
number={2},
pages={104431},
year={2026},
publisher={Elsevier}
}