mirror of
https://github.com/ultralytics/ultralytics
synced 2026-04-29 11:29:16 +00:00
Clone reference repos (EUPE, RADIO, UNIC, DUNE), install pinned deps (transformers==5.5.0, webdataset==0.2.111), download teacher weights, and verify all 7 teacher variants load correctly. Also ignore .codex/ and .agents/.
73 lines
2.6 KiB
Bash
Executable file
73 lines
2.6 KiB
Bash
Executable file
#!/bin/bash
|
|
# Setup for YOLO universal encoder distillation (repos + deps + teacher weights)
|
|
# Run this first. Dataset download is separate (get_datacomp12m.sh).
|
|
#
|
|
# Teachers load via native Python (not TorchScript .ts) because EUPE/DINOv3 RoPE
|
|
# position encoding hardcodes device in torch.arange, making traced graphs non-portable.
|
|
set -e
|
|
cd /home/fatih/dev/ultralytics
|
|
|
|
echo "=== Step 1: Clone reference repos ==="
|
|
[ -d /home/fatih/dev/eupe ] || git clone https://github.com/facebookresearch/eupe /home/fatih/dev/eupe
|
|
[ -d /home/fatih/dev/RADIO ] || git clone https://github.com/NVlabs/RADIO /home/fatih/dev/RADIO
|
|
[ -d /home/fatih/dev/unic ] || git clone https://github.com/naver/unic /home/fatih/dev/unic
|
|
[ -d /home/fatih/dev/dune ] || git clone https://github.com/naver/dune /home/fatih/dev/dune
|
|
|
|
echo "=== Step 2: Install dependencies ==="
|
|
source .venv/bin/activate
|
|
uv pip install \
|
|
"transformers==5.5.0" \
|
|
"webdataset==0.2.111" \
|
|
"img2dataset" \
|
|
"huggingface_hub>=1.8.0"
|
|
|
|
echo "=== Step 3: Download EUPE weights (3 variants) ==="
|
|
python -c "
|
|
from huggingface_hub import hf_hub_download
|
|
for repo, fname in [
|
|
('facebook/EUPE-ViT-B', 'EUPE-ViT-B.pt'),
|
|
('facebook/EUPE-ViT-S', 'EUPE-ViT-S.pt'),
|
|
('facebook/EUPE-ConvNeXt-B', 'EUPE-ConvNeXt-B.pt'),
|
|
]:
|
|
path = hf_hub_download(repo, fname)
|
|
print(f' {repo} -> {path}')
|
|
"
|
|
|
|
echo "=== Step 4: Pre-download DINOv3 + SigLIP2 weights ==="
|
|
python -c "
|
|
from transformers import AutoModel, SiglipVisionModel
|
|
|
|
for name in [
|
|
'facebook/dinov3-vitb16-pretrain-lvd1689m',
|
|
'facebook/dinov3-vitl16-pretrain-lvd1689m',
|
|
'facebook/dinov3-convnext-base-pretrain-lvd1689m',
|
|
]:
|
|
AutoModel.from_pretrained(name)
|
|
print(f' {name} OK')
|
|
|
|
SiglipVisionModel.from_pretrained('google/siglip2-giant-opt-patch16-384')
|
|
print(' google/siglip2-giant-opt-patch16-384 OK')
|
|
"
|
|
|
|
echo "=== Step 5: Verify all teachers ==="
|
|
python -c "
|
|
import torch
|
|
from ultralytics.nn.teacher_model import build_teacher_model, TEACHER_REGISTRY
|
|
|
|
# Skip vit7b (26GB, slow to load) and sam3:l (1024px, needs lots of RAM)
|
|
for variant in ['eupe:vitb16', 'eupe:vits16', 'eupe:convnextb',
|
|
'dinov3:vitb16', 'dinov3:vitl16', 'dinov3:convnextb',
|
|
'siglip2:g']:
|
|
reg = TEACHER_REGISTRY[variant]
|
|
teacher = build_teacher_model(variant, torch.device('cpu'))
|
|
dummy = torch.randn(1, 3, reg['imgsz'], reg['imgsz'])
|
|
with torch.no_grad():
|
|
out = teacher.encode(dummy)
|
|
cls_shape = out.cls.shape if out.cls is not None else None
|
|
print(f' {variant}: cls={cls_shape}, patches={out.patches.shape}')
|
|
del teacher
|
|
print('All teachers verified OK')
|
|
"
|
|
|
|
echo "Setup complete! Run get_datacomp12m.sh next for dataset."
|