mirror of
https://github.com/ollama/ollama
synced 2026-04-23 08:45:14 +00:00
convert: support fp8 safetensors import
Decode HF F8_E4M3 safetensors with block scale companions into GGUF-supported tensor types, and record which output tensors came from FP8 source weights. Use that source-precision metadata during create quantization: default FP8-sourced GGUFs to Q8_0, keep non-FP8 tensors at their original precision for Q8_0, and promote non-FP8 quantizable tensors to Q8_0 for Q4_K requests.
This commit is contained in:
parent
22d6c817f8
commit
799c0e9cc5
|
|
@ -387,6 +387,10 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
|
|||
}
|
||||
|
||||
func writeFile(f *os.File, kv KV, ts []*ggml.Tensor) error {
|
||||
for k, v := range sourceTensorKV(ts) {
|
||||
kv[k] = v
|
||||
}
|
||||
|
||||
for i := range ts {
|
||||
ts[i].Shape = slices.Clone(ts[i].Shape)
|
||||
slices.Reverse(ts[i].Shape)
|
||||
|
|
|
|||
|
|
@ -5,10 +5,12 @@ import (
|
|||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"maps"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
|
|
@ -23,6 +25,11 @@ type safetensorMetadata struct {
|
|||
}
|
||||
|
||||
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||
fp8Block, err := safetensorsFP8BlockSize(fsys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ts []Tensor
|
||||
for _, p := range ps {
|
||||
f, err := fsys.Open(p)
|
||||
|
|
@ -50,24 +57,47 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
|
|||
|
||||
names := make(map[string]struct{}, len(keys))
|
||||
|
||||
fp8Scales, err := collectSafetensorsFP8Scales(n, headers)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, key := range keys {
|
||||
if value := headers[key]; value.Type != "" {
|
||||
if _, ok := fp8Scales.consumed[key]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Scalar tensors (e.g. clipped linear min/max) are 0-dim in safetensors.
|
||||
// Promote them to 1-dim so they can be stored in GGUF.
|
||||
if len(value.Shape) == 0 {
|
||||
value.Shape = []uint64{1}
|
||||
}
|
||||
|
||||
var scale *safetensorScale
|
||||
if value.Type == "F8_E4M3" {
|
||||
if !fp8Block.ok {
|
||||
return nil, fmt.Errorf("missing fp8 block size metadata for tensor %q", key)
|
||||
}
|
||||
scale = fp8Scales.byWeight[key]
|
||||
if scale == nil {
|
||||
return nil, fmt.Errorf("missing fp8 scale companion for tensor %q", key)
|
||||
}
|
||||
}
|
||||
|
||||
ggufName := replacer.Replace(key)
|
||||
if _, ok := names[ggufName]; ok {
|
||||
return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName)
|
||||
}
|
||||
names[ggufName] = struct{}{}
|
||||
ts = append(ts, safetensor{
|
||||
fs: fsys,
|
||||
path: p,
|
||||
dtype: value.Type,
|
||||
offset: safetensorsPad(n, value.Offsets[0]),
|
||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||
fs: fsys,
|
||||
path: p,
|
||||
dtype: value.Type,
|
||||
offset: safetensorsPad(n, value.Offsets[0]),
|
||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||
scale: scale,
|
||||
fp8Block: fp8Block,
|
||||
tensorBase: &tensorBase{
|
||||
name: ggufName,
|
||||
shape: value.Shape,
|
||||
|
|
@ -85,12 +115,22 @@ func safetensorsPad(n, offset int64) int64 {
|
|||
return 8 + n + offset
|
||||
}
|
||||
|
||||
type safetensor struct {
|
||||
fs fs.FS
|
||||
path string
|
||||
type safetensorScale struct {
|
||||
name string
|
||||
dtype string
|
||||
shape []uint64
|
||||
offset int64
|
||||
size int64
|
||||
}
|
||||
|
||||
type safetensor struct {
|
||||
fs fs.FS
|
||||
path string
|
||||
dtype string
|
||||
offset int64
|
||||
size int64
|
||||
scale *safetensorScale
|
||||
fp8Block safetensorFP8BlockSize
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
|
|
@ -104,17 +144,26 @@ func (st safetensor) Kind() uint32 {
|
|||
kind != tensorKindFP32 {
|
||||
kind = tensorKindBF16
|
||||
}
|
||||
if st.dtype == "F8_E4M3" && kind != tensorKindFP32 {
|
||||
kind = tensorKindBF16
|
||||
}
|
||||
|
||||
return kind
|
||||
}
|
||||
|
||||
func (st safetensor) SourceDType() string {
|
||||
return st.dtype
|
||||
}
|
||||
|
||||
func (st safetensor) Clone() Tensor {
|
||||
return &safetensor{
|
||||
fs: st.fs,
|
||||
path: st.path,
|
||||
dtype: st.dtype,
|
||||
offset: st.offset,
|
||||
size: st.size,
|
||||
fs: st.fs,
|
||||
path: st.path,
|
||||
dtype: st.dtype,
|
||||
offset: st.offset,
|
||||
size: st.size,
|
||||
scale: st.scale.Clone(),
|
||||
fp8Block: st.fp8Block,
|
||||
tensorBase: &tensorBase{
|
||||
name: st.name,
|
||||
repacker: st.repacker,
|
||||
|
|
@ -123,6 +172,19 @@ func (st safetensor) Clone() Tensor {
|
|||
}
|
||||
}
|
||||
|
||||
func (ss *safetensorScale) Clone() *safetensorScale {
|
||||
if ss == nil {
|
||||
return nil
|
||||
}
|
||||
return &safetensorScale{
|
||||
name: ss.name,
|
||||
dtype: ss.dtype,
|
||||
shape: slices.Clone(ss.shape),
|
||||
offset: ss.offset,
|
||||
size: ss.size,
|
||||
}
|
||||
}
|
||||
|
||||
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
|
|
@ -180,6 +242,16 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
|||
}
|
||||
|
||||
f32s = bfloat16.DecodeFloat32(u8s)
|
||||
case "F8_E4M3":
|
||||
u8s := make([]uint8, st.size)
|
||||
if err = binary.Read(br, binary.LittleEndian, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s, err = st.decodeFP8E4M3(u8s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
|
||||
}
|
||||
|
|
@ -208,3 +280,334 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
|||
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
type safetensorsFP8Scales struct {
|
||||
byWeight map[string]*safetensorScale
|
||||
consumed map[string]struct{}
|
||||
}
|
||||
|
||||
func collectSafetensorsFP8Scales(n int64, headers map[string]safetensorMetadata) (safetensorsFP8Scales, error) {
|
||||
scales := safetensorsFP8Scales{
|
||||
byWeight: make(map[string]*safetensorScale),
|
||||
consumed: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
for key, value := range headers {
|
||||
if value.Type != "F8_E4M3" {
|
||||
continue
|
||||
}
|
||||
|
||||
scaleKey, scaleValue, ok, err := safetensorsFP8Scale(key, headers)
|
||||
if err != nil {
|
||||
return safetensorsFP8Scales{}, err
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := scales.consumed[scaleKey]; ok {
|
||||
return safetensorsFP8Scales{}, fmt.Errorf("fp8 scale companion %q is used by multiple tensors", scaleKey)
|
||||
}
|
||||
|
||||
scales.byWeight[key] = &safetensorScale{
|
||||
name: scaleKey,
|
||||
dtype: scaleValue.Type,
|
||||
shape: slices.Clone(scaleValue.Shape),
|
||||
offset: safetensorsPad(n, scaleValue.Offsets[0]),
|
||||
size: safetensorsPad(n, scaleValue.Offsets[1]) - safetensorsPad(n, scaleValue.Offsets[0]),
|
||||
}
|
||||
scales.consumed[scaleKey] = struct{}{}
|
||||
}
|
||||
|
||||
return scales, nil
|
||||
}
|
||||
|
||||
func safetensorsFP8Scale(key string, headers map[string]safetensorMetadata) (string, safetensorMetadata, bool, error) {
|
||||
candidates := safetensorsFP8ScaleCandidates(key)
|
||||
|
||||
var scaleKey string
|
||||
var scaleValue safetensorMetadata
|
||||
if strings.HasSuffix(key, ".weight") {
|
||||
// Keep support for compressed-tensors exports that place the scale name
|
||||
// between the module path and weight suffix.
|
||||
base := strings.TrimSuffix(key, ".weight")
|
||||
candidates = appendUnique(candidates, base+".weight_scale")
|
||||
candidates = appendUnique(candidates, base+".weight_scale_inv")
|
||||
}
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if value, ok := headers[candidate]; ok && value.Type != "" {
|
||||
if scaleKey != "" {
|
||||
return "", safetensorMetadata{}, false, fmt.Errorf("multiple fp8 scale companions for tensor %q: %q and %q", key, scaleKey, candidate)
|
||||
}
|
||||
scaleKey = candidate
|
||||
scaleValue = value
|
||||
}
|
||||
}
|
||||
if scaleKey == "" {
|
||||
return "", safetensorMetadata{}, false, nil
|
||||
}
|
||||
|
||||
return scaleKey, scaleValue, true, nil
|
||||
}
|
||||
|
||||
func safetensorsFP8ScaleCandidates(key string) []string {
|
||||
var candidates []string
|
||||
candidates = appendUnique(candidates, key+"_scale")
|
||||
candidates = appendUnique(candidates, key+"_scale_inv")
|
||||
candidates = appendUnique(candidates, key+".scale")
|
||||
candidates = appendUnique(candidates, key+".scale_inv")
|
||||
return candidates
|
||||
}
|
||||
|
||||
func appendUnique(values []string, value string) []string {
|
||||
if !slices.Contains(values, value) {
|
||||
values = append(values, value)
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
type safetensorFP8BlockSize struct {
|
||||
rows int
|
||||
cols int
|
||||
ok bool
|
||||
}
|
||||
|
||||
type safetensorsSourceQuantization struct {
|
||||
QuantMethod string `json:"quant_method"`
|
||||
Format string `json:"format"`
|
||||
WeightBlockSize []int `json:"weight_block_size"`
|
||||
ConfigGroups map[string]struct {
|
||||
Format string `json:"format"`
|
||||
Weights struct {
|
||||
BlockStructure []int `json:"block_structure"`
|
||||
NumBits int `json:"num_bits"`
|
||||
Type string `json:"type"`
|
||||
} `json:"weights"`
|
||||
} `json:"config_groups"`
|
||||
}
|
||||
|
||||
type safetensorsModelConfig struct {
|
||||
Quantization safetensorsSourceQuantization `json:"quantization"`
|
||||
QuantizationConfig safetensorsSourceQuantization `json:"quantization_config"`
|
||||
CompressionConfig safetensorsSourceQuantization `json:"compression_config"`
|
||||
TextConfig struct {
|
||||
Quantization safetensorsSourceQuantization `json:"quantization"`
|
||||
QuantizationConfig safetensorsSourceQuantization `json:"quantization_config"`
|
||||
CompressionConfig safetensorsSourceQuantization `json:"compression_config"`
|
||||
} `json:"text_config"`
|
||||
}
|
||||
|
||||
func safetensorsFP8BlockSize(fsys fs.FS) (safetensorFP8BlockSize, error) {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return safetensorFP8BlockSize{}, nil
|
||||
}
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
bts = sanitizeNonFiniteJSON(bts)
|
||||
|
||||
var cfg safetensorsModelConfig
|
||||
if err := json.Unmarshal(bts, &cfg); err != nil {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("parse config.json fp8 metadata: %w", err)
|
||||
}
|
||||
|
||||
var blocks []safetensorFP8BlockSize
|
||||
for _, q := range []safetensorsSourceQuantization{
|
||||
cfg.Quantization,
|
||||
cfg.QuantizationConfig,
|
||||
cfg.CompressionConfig,
|
||||
cfg.TextConfig.Quantization,
|
||||
cfg.TextConfig.QuantizationConfig,
|
||||
cfg.TextConfig.CompressionConfig,
|
||||
} {
|
||||
if strings.EqualFold(q.QuantMethod, "fp8") && len(q.WeightBlockSize) == 2 {
|
||||
block, err := newSafetensorFP8BlockSize(q.WeightBlockSize[0], q.WeightBlockSize[1])
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
|
||||
if !strings.EqualFold(q.QuantMethod, "compressed-tensors") && !strings.EqualFold(q.Format, "float-quantized") {
|
||||
continue
|
||||
}
|
||||
for _, group := range q.ConfigGroups {
|
||||
if !strings.EqualFold(group.Format, "float-quantized") ||
|
||||
group.Weights.NumBits != 8 ||
|
||||
!strings.EqualFold(group.Weights.Type, "float") ||
|
||||
len(group.Weights.BlockStructure) != 2 {
|
||||
continue
|
||||
}
|
||||
block, err := newSafetensorFP8BlockSize(group.Weights.BlockStructure[0], group.Weights.BlockStructure[1])
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
}
|
||||
|
||||
if len(blocks) == 0 {
|
||||
return safetensorFP8BlockSize{}, nil
|
||||
}
|
||||
|
||||
block := blocks[0]
|
||||
for _, other := range blocks[1:] {
|
||||
if other.rows != block.rows || other.cols != block.cols {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("multiple fp8 block sizes in config.json: %dx%d and %dx%d", block.rows, block.cols, other.rows, other.cols)
|
||||
}
|
||||
}
|
||||
return block, nil
|
||||
}
|
||||
|
||||
func newSafetensorFP8BlockSize(rows, cols int) (safetensorFP8BlockSize, error) {
|
||||
if rows <= 0 || cols <= 0 {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("invalid fp8 block size %dx%d", rows, cols)
|
||||
}
|
||||
return safetensorFP8BlockSize{rows: rows, cols: cols, ok: true}, nil
|
||||
}
|
||||
|
||||
func (st safetensor) decodeFP8E4M3(data []byte) ([]float32, error) {
|
||||
if st.scale == nil {
|
||||
return nil, fmt.Errorf("missing fp8 scale companion for tensor %q", st.name)
|
||||
}
|
||||
if !st.fp8Block.ok {
|
||||
return nil, fmt.Errorf("missing fp8 block size metadata for tensor %q", st.name)
|
||||
}
|
||||
if len(st.shape) != 2 {
|
||||
return nil, fmt.Errorf("expected 2D fp8 tensor %q, got shape %v", st.name, st.shape)
|
||||
}
|
||||
|
||||
rows, cols := int(st.shape[0]), int(st.shape[1])
|
||||
if rows < 0 || cols < 0 || rows*cols != len(data) {
|
||||
return nil, fmt.Errorf("fp8 tensor %q shape %v does not match %d bytes", st.name, st.shape, len(data))
|
||||
}
|
||||
|
||||
scale, err := st.readScale()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(st.scale.shape) != 2 {
|
||||
return nil, fmt.Errorf("expected 2D fp8 scale tensor %q, got shape %v", st.scale.name, st.scale.shape)
|
||||
}
|
||||
|
||||
blockRows := st.fp8Block.rows
|
||||
blockCols := st.fp8Block.cols
|
||||
scaleRows, scaleCols := int(st.scale.shape[0]), int(st.scale.shape[1])
|
||||
expectedRows := (rows + blockRows - 1) / blockRows
|
||||
expectedCols := (cols + blockCols - 1) / blockCols
|
||||
if scaleRows != expectedRows || scaleCols != expectedCols {
|
||||
return nil, fmt.Errorf("unexpected fp8 scale shape %v for tensor %q shape %v; want [%d %d]", st.scale.shape, st.name, st.shape, expectedRows, expectedCols)
|
||||
}
|
||||
if len(scale) != scaleRows*scaleCols {
|
||||
return nil, fmt.Errorf("fp8 scale tensor %q shape %v does not match decoded length %d", st.scale.name, st.scale.shape, len(scale))
|
||||
}
|
||||
|
||||
f32s := make([]float32, len(data))
|
||||
for r := range rows {
|
||||
scaleRow := r / blockRows
|
||||
rowOffset := r * cols
|
||||
for c := range cols {
|
||||
f32s[rowOffset+c] = decodeFloat8E4M3FN(data[rowOffset+c]) * scale[scaleRow*scaleCols+c/blockCols]
|
||||
}
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (st safetensor) readScale() ([]float32, error) {
|
||||
r, err := st.sectionReader(st.scale.offset, st.scale.size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read fp8 scale tensor %q: %w", st.scale.name, err)
|
||||
}
|
||||
if closer, ok := r.(io.Closer); ok {
|
||||
defer closer.Close()
|
||||
}
|
||||
br := bufio.NewReaderSize(r, min(32<<10, int(st.scale.size)))
|
||||
|
||||
switch st.scale.dtype {
|
||||
case "F32":
|
||||
f32s := make([]float32, st.scale.size/4)
|
||||
if err := binary.Read(br, binary.LittleEndian, f32s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f32s, nil
|
||||
case "F16":
|
||||
u16s := make([]uint16, st.scale.size/2)
|
||||
if err := binary.Read(br, binary.LittleEndian, u16s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f32s := make([]float32, len(u16s))
|
||||
for i := range u16s {
|
||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||
}
|
||||
return f32s, nil
|
||||
case "BF16":
|
||||
u8s := make([]uint8, st.scale.size)
|
||||
if err := binary.Read(br, binary.LittleEndian, u8s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return bfloat16.DecodeFloat32(u8s), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported fp8 scale dtype %q for tensor %q", st.scale.dtype, st.scale.name)
|
||||
}
|
||||
}
|
||||
|
||||
func (st safetensor) sectionReader(offset, size int64) (io.Reader, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if readerAt, ok := f.(io.ReaderAt); ok {
|
||||
return &readCloserReader{
|
||||
Reader: io.NewSectionReader(readerAt, offset, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
if seeker, ok := f.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(offset, io.SeekStart); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &readCloserReader{
|
||||
Reader: io.LimitReader(f, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
if _, err := io.CopyN(io.Discard, f, offset); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &readCloserReader{
|
||||
Reader: io.LimitReader(f, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type readCloserReader struct {
|
||||
io.Reader
|
||||
io.Closer
|
||||
}
|
||||
|
||||
func decodeFloat8E4M3FN(v byte) float32 {
|
||||
sign := float32(1)
|
||||
if v&0x80 != 0 {
|
||||
sign = -1
|
||||
}
|
||||
|
||||
exp := int((v >> 3) & 0x0f)
|
||||
mant := int(v & 0x07)
|
||||
if exp == 0 {
|
||||
if mant == 0 {
|
||||
return 0 * sign
|
||||
}
|
||||
return sign * float32(math.Ldexp(float64(mant)/8, -6))
|
||||
}
|
||||
if exp == 0x0f && mant == 0x07 {
|
||||
return float32(math.NaN())
|
||||
}
|
||||
|
||||
return sign * float32(math.Ldexp(1+float64(mant)/8, exp-7))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@ package convert
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
|
|
@ -231,6 +233,222 @@ func TestSafetensors(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestSafetensorWriteToFP8E4M3(t *testing.T) {
|
||||
root, err := os.OpenRoot(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer root.Close()
|
||||
|
||||
path := filepath.Base(t.Name())
|
||||
f, err := root.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// E4M3FN encodings for 1.0, 2.0, 0.5, and -1.0.
|
||||
if _, err := f.Write([]byte{0x38, 0x40, 0x30, 0xb8}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := f.Write(bfloat16.EncodeFloat32([]float32{2})); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
st := safetensor{
|
||||
fs: root.FS(),
|
||||
path: path,
|
||||
dtype: "F8_E4M3",
|
||||
offset: 0,
|
||||
size: 4,
|
||||
fp8Block: safetensorFP8BlockSize{rows: 128, cols: 128, ok: true},
|
||||
scale: &safetensorScale{
|
||||
name: "linear.weight_scale",
|
||||
dtype: "BF16",
|
||||
shape: []uint64{1, 1},
|
||||
offset: 4,
|
||||
size: 2,
|
||||
},
|
||||
tensorBase: &tensorBase{
|
||||
name: "linear.weight",
|
||||
shape: []uint64{2, 2},
|
||||
},
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := st.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
want := bfloat16.EncodeFloat32([]float32{2, 4, 1, -2})
|
||||
if diff := cmp.Diff(want, b.Bytes()); diff != "" {
|
||||
t.Errorf("safetensor.WriteTo() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafetensorWriteToFP8E4M3UsesConfiguredBlockSize(t *testing.T) {
|
||||
root, err := os.OpenRoot(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer root.Close()
|
||||
|
||||
path := filepath.Base(t.Name())
|
||||
f, err := root.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := f.Write(bytes.Repeat([]byte{0x38}, 12)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := f.Write(bfloat16.EncodeFloat32([]float32{1, 2, 3, 4})); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
st := safetensor{
|
||||
fs: root.FS(),
|
||||
path: path,
|
||||
dtype: "F8_E4M3",
|
||||
offset: 0,
|
||||
size: 12,
|
||||
fp8Block: safetensorFP8BlockSize{rows: 2, cols: 3, ok: true},
|
||||
scale: &safetensorScale{
|
||||
name: "linear.weight_scale",
|
||||
dtype: "BF16",
|
||||
shape: []uint64{2, 2},
|
||||
offset: 12,
|
||||
size: 8,
|
||||
},
|
||||
tensorBase: &tensorBase{
|
||||
name: "linear.weight",
|
||||
shape: []uint64{3, 4},
|
||||
},
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := st.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
want := bfloat16.EncodeFloat32([]float32{
|
||||
1, 1, 1, 2,
|
||||
1, 1, 1, 2,
|
||||
3, 3, 3, 4,
|
||||
})
|
||||
if diff := cmp.Diff(want, b.Bytes()); diff != "" {
|
||||
t.Errorf("safetensor.WriteTo() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsConsumesFP8ScaleCompanion(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
writeFP8BlockConfig(t, tempDir, 128, 128)
|
||||
|
||||
tensors, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(tensors) != 1 {
|
||||
t.Fatalf("expected one tensor, got %d", len(tensors))
|
||||
}
|
||||
if got := tensors[0].Name(); got != "linear.weight" {
|
||||
t.Fatalf("unexpected tensor name %q", got)
|
||||
}
|
||||
if got := tensors[0].Kind(); got != tensorKindBF16 {
|
||||
t.Fatalf("unexpected fp8 converted kind %d, want %d", got, tensorKindBF16)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsRejectsFP8WithoutBlockMetadata(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
|
||||
_, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err == nil || !strings.Contains(err.Error(), "missing fp8 block size metadata") {
|
||||
t.Fatalf("expected missing fp8 block size metadata error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsRejectsAmbiguousFP8ScaleCompanion(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
"linear.weight.scale": {
|
||||
Offsets: []int{6, 8},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
writeFP8BlockConfig(t, tempDir, 128, 128)
|
||||
|
||||
_, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err == nil || !strings.Contains(err.Error(), "multiple fp8 scale companions") {
|
||||
t.Fatalf("expected ambiguous fp8 scale companion error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func writeFP8BlockConfig(t *testing.T, dir string, rows, cols int) {
|
||||
t.Helper()
|
||||
|
||||
config := fmt.Sprintf(`{
|
||||
"architectures": ["GenericForCausalLM"],
|
||||
"compression_config": {
|
||||
"format": "float-quantized",
|
||||
"config_groups": {
|
||||
"group_0": {
|
||||
"format": "float-quantized",
|
||||
"weights": {
|
||||
"type": "float",
|
||||
"num_bits": 8,
|
||||
"block_structure": [%d, %d]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`, rows, cols)
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(config), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafetensorKind(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import (
|
|||
"errors"
|
||||
"io"
|
||||
"iter"
|
||||
"maps"
|
||||
"path"
|
||||
"slices"
|
||||
"strconv"
|
||||
|
|
@ -153,3 +154,54 @@ func (g mergeGroup) WriteTo(w io.Writer) (int64, error) {
|
|||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func sourceTensorKV(ts []*ggml.Tensor) KV {
|
||||
sourceFP8 := make(map[string]struct{})
|
||||
for _, t := range ts {
|
||||
if writerSourceDType(t.WriterTo) == "F8_E4M3" {
|
||||
sourceFP8[t.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
if len(sourceFP8) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return KV{
|
||||
"source_quantization": "hf_fp8",
|
||||
"source_fp8_tensors": slices.Sorted(maps.Keys(sourceFP8)),
|
||||
}
|
||||
}
|
||||
|
||||
type sourceDTypeTensor interface {
|
||||
SourceDType() string
|
||||
}
|
||||
|
||||
func writerSourceDType(w io.WriterTo) string {
|
||||
switch w := w.(type) {
|
||||
case sourceDTypeTensor:
|
||||
return w.SourceDType()
|
||||
case mergeGroup:
|
||||
if len(w) == 0 {
|
||||
return ""
|
||||
}
|
||||
dtype := sourceDType(w[0])
|
||||
if dtype == "" {
|
||||
return ""
|
||||
}
|
||||
for _, t := range w[1:] {
|
||||
if sourceDType(t) != dtype {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return dtype
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func sourceDType(t Tensor) string {
|
||||
if t, ok := t.(sourceDTypeTensor); ok {
|
||||
return t.SourceDType()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ type fakeTensor struct {
|
|||
shape []uint64
|
||||
data []float32
|
||||
|
||||
repacker Repacker
|
||||
sourceDType string
|
||||
repacker Repacker
|
||||
}
|
||||
|
||||
func (f fakeTensor) Name() string {
|
||||
|
|
@ -36,16 +37,21 @@ func (f fakeTensor) Kind() uint32 {
|
|||
return 0
|
||||
}
|
||||
|
||||
func (f fakeTensor) SourceDType() string {
|
||||
return f.sourceDType
|
||||
}
|
||||
|
||||
func (f *fakeTensor) SetRepacker(fn Repacker) {
|
||||
f.repacker = fn
|
||||
}
|
||||
|
||||
func (f fakeTensor) Clone() Tensor {
|
||||
return &fakeTensor{
|
||||
name: f.name,
|
||||
shape: slices.Clone(f.shape),
|
||||
data: slices.Clone(f.data),
|
||||
repacker: f.repacker,
|
||||
name: f.name,
|
||||
shape: slices.Clone(f.shape),
|
||||
data: slices.Clone(f.data),
|
||||
sourceDType: f.sourceDType,
|
||||
repacker: f.repacker,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -995,3 +1001,43 @@ func TestMergeOrder(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceTensorKVRecordsFP8OutputTensors(t *testing.T) {
|
||||
fp8 := &fakeTensor{name: "linear.weight", shape: []uint64{2, 2}, sourceDType: "F8_E4M3"}
|
||||
bf16 := &fakeTensor{name: "other.weight", shape: []uint64{2, 2}, sourceDType: "BF16"}
|
||||
|
||||
kv := sourceTensorKV([]*ggml.Tensor{
|
||||
{Name: "blk.0.linear.weight", WriterTo: fp8},
|
||||
{Name: "blk.0.other.weight", WriterTo: bf16},
|
||||
})
|
||||
|
||||
if got := kv["source_quantization"]; got != "hf_fp8" {
|
||||
t.Fatalf("source_quantization = %v, want hf_fp8", got)
|
||||
}
|
||||
got, ok := kv["source_fp8_tensors"].([]string)
|
||||
if !ok {
|
||||
t.Fatalf("source_fp8_tensors = %#v, want []string", kv["source_fp8_tensors"])
|
||||
}
|
||||
if diff := cmp.Diff([]string{"blk.0.linear.weight"}, got); diff != "" {
|
||||
t.Fatalf("source_fp8_tensors mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceTensorKVRecordsMergedFP8OutputTensors(t *testing.T) {
|
||||
fp8A := &fakeTensor{name: "expert.0.weight", shape: []uint64{2, 2}, sourceDType: "F8_E4M3"}
|
||||
fp8B := &fakeTensor{name: "expert.1.weight", shape: []uint64{2, 2}, sourceDType: "F8_E4M3"}
|
||||
bf16 := &fakeTensor{name: "expert.2.weight", shape: []uint64{2, 2}, sourceDType: "BF16"}
|
||||
|
||||
kv := sourceTensorKV([]*ggml.Tensor{
|
||||
{Name: "ffn_exps.weight", WriterTo: mergeGroup{fp8A, fp8B}},
|
||||
{Name: "mixed_exps.weight", WriterTo: mergeGroup{fp8A, bf16}},
|
||||
})
|
||||
|
||||
got, ok := kv["source_fp8_tensors"].([]string)
|
||||
if !ok {
|
||||
t.Fatalf("source_fp8_tensors = %#v, want []string", kv["source_fp8_tensors"])
|
||||
}
|
||||
if diff := cmp.Diff([]string{"ffn_exps.weight"}, got); diff != "" {
|
||||
t.Fatalf("source_fp8_tensors mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -494,15 +494,18 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
|
|||
for _, layer := range baseLayers {
|
||||
if layer.GGML != nil {
|
||||
quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
|
||||
ft := layer.GGML.KV().FileType()
|
||||
if quantType == "" && hasSourceFP8Tensors(layer.GGML.KV()) && layer.GGML.Name() == "gguf" && layer.MediaType == "application/vnd.ollama.image.model" && slices.Contains([]string{"F16", "BF16", "F32"}, ft.String()) {
|
||||
quantType = "Q8_0"
|
||||
}
|
||||
if quantType != "" && layer.GGML.Name() == "gguf" && layer.MediaType == "application/vnd.ollama.image.model" {
|
||||
want, err := ggml.ParseFileType(quantType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ft := layer.GGML.KV().FileType()
|
||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||
return errors.New("quantization is only supported for F16 and F32 models")
|
||||
if !slices.Contains([]string{"F16", "BF16", "F32"}, ft.String()) {
|
||||
return errors.New("quantization is only supported for F16, BF16 and F32 models")
|
||||
} else if ft != want {
|
||||
layer, err = quantizeLayer(layer, quantType, fn)
|
||||
if err != nil {
|
||||
|
|
@ -606,6 +609,10 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
|
|||
return nil
|
||||
}
|
||||
|
||||
func hasSourceFP8Tensors(kv ggml.KV) bool {
|
||||
return kv.String("source_quantization") == "hf_fp8" && len(kv.Strings("source_fp8_tensors")) > 0
|
||||
}
|
||||
|
||||
func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) {
|
||||
ft := layer.GGML.KV().FileType()
|
||||
var doneBytes atomic.Uint64
|
||||
|
|
|
|||
|
|
@ -51,11 +51,14 @@ func (q quantizer) WriteTo(w io.Writer) (int64, error) {
|
|||
}
|
||||
|
||||
type quantizeState struct {
|
||||
nAttnV int // Number of attn_*v* weight tensors
|
||||
nFfnDown int // Number of ffn_down tensors
|
||||
iAttnV int // Running counter of number of attn_v tensors that have been processed
|
||||
iFfnDown int // Running counter of number of ffn_down tensors that have been processed
|
||||
hasOutput bool // used to figure out if a model shares tok_embd with the output weight
|
||||
nAttnV int // Number of attn_*v* weight tensors
|
||||
nFfnDown int // Number of ffn_down tensors
|
||||
iAttnV int // Running counter of number of attn_v tensors that have been processed
|
||||
iFfnDown int // Running counter of number of ffn_down tensors that have been processed
|
||||
hasOutput bool // used to figure out if a model shares tok_embd with the output weight
|
||||
preserveSourceFP8ToQ8 bool
|
||||
preserveSourceQ4 bool
|
||||
sourceFP8Tensors map[string]struct{}
|
||||
}
|
||||
|
||||
func useMoreBits(iLayer, nLayers int) bool {
|
||||
|
|
@ -120,10 +123,10 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
|||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if strings.Contains(name, "attn_v.weight") {
|
||||
if (ftype == fsggml.FileTypeQ4_K_M) &&
|
||||
if newType != fsggml.TensorTypeQ8_0 && (ftype == fsggml.FileTypeQ4_K_M) &&
|
||||
useMoreBits(qs.iAttnV, qs.nAttnV) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
|
||||
} else if newType != fsggml.TensorTypeQ8_0 && ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
|
||||
|
|
@ -164,21 +167,21 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
|||
qs.iFfnDown++
|
||||
}
|
||||
n_layer := qs.nFfnDown
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if newType != fsggml.TensorTypeQ8_0 && ftype == fsggml.FileTypeQ4_K_M {
|
||||
if useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
|
||||
} else if newType != fsggml.TensorTypeQ8_0 && ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
} else if strings.Contains(name, "attn_output.weight") {
|
||||
if nExperts == 8 {
|
||||
if newType != fsggml.TensorTypeQ8_0 && nExperts == 8 {
|
||||
if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(name, "attn_qkv.weight") {
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if newType != fsggml.TensorTypeQ8_0 && ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
}
|
||||
|
|
@ -218,7 +221,12 @@ func quantize(in, out *os.File, orig *fsggml.GGML, newFileType fsggml.FileType,
|
|||
kv := maps.Clone(orig.KV())
|
||||
kv["general.file_type"] = newFileType
|
||||
// kv["general.quantization_version"] = ggml.QuantizationVersion()
|
||||
qs := &quantizeState{}
|
||||
qs := &quantizeState{
|
||||
sourceFP8Tensors: sourceFP8TensorSet(kv),
|
||||
}
|
||||
hasSourceFP8 := hasSourceFP8Tensors(kv)
|
||||
qs.preserveSourceFP8ToQ8 = hasSourceFP8 && newFileType == fsggml.FileTypeQ8_0
|
||||
qs.preserveSourceQ4 = hasSourceFP8 && slices.Contains([]fsggml.FileType{fsggml.FileTypeQ4_K_M, fsggml.FileTypeQ4_K_S}, newFileType)
|
||||
// Build up the quantize state so newType can adjust types
|
||||
layerCount := 0
|
||||
for k, l := range orig.Tensors().GroupLayers() {
|
||||
|
|
@ -304,6 +312,12 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
|
|||
|
||||
newType := fsggml.TensorType(t.Kind)
|
||||
if quantize {
|
||||
if qs.preserveSourceFP8ToQ8 {
|
||||
if _, ok := qs.sourceFP8Tensors[name]; !ok {
|
||||
return newType
|
||||
}
|
||||
}
|
||||
|
||||
if slices.Contains([]string{"qwen3next", "qwen35", "qwen35moe"}, kv.Architecture()) && (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ4_K_S) {
|
||||
if qt, ok := qwen3LinearAttnQuantType(name); ok {
|
||||
return qt
|
||||
|
|
@ -311,6 +325,11 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
|
|||
}
|
||||
|
||||
// get more optimal quantization type based on the tensor shape, layer, etc.
|
||||
if qs.preserveSourceQ4 {
|
||||
if _, ok := qs.sourceFP8Tensors[name]; !ok {
|
||||
defaultType = fsggml.TensorTypeQ8_0
|
||||
}
|
||||
}
|
||||
newType = getTensorNewType(kv, qs, defaultType, t.Name, t.Shape, ftype)
|
||||
if newType != defaultType {
|
||||
slog.Debug("tensor quantization adjusted for better quality", "name", t.Name, "requested", defaultType, "quantization", newType)
|
||||
|
|
@ -318,3 +337,16 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
|
|||
}
|
||||
return newType
|
||||
}
|
||||
|
||||
func sourceFP8TensorSet(kv fsggml.KV) map[string]struct{} {
|
||||
names := kv.Strings("source_fp8_tensors")
|
||||
if len(names) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make(map[string]struct{}, len(names))
|
||||
for _, name := range names {
|
||||
out[name] = struct{}{}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
|
|
|||
|
|
@ -308,6 +308,95 @@ func TestQuantizeModel(t *testing.T) {
|
|||
"output.weight": fsggml.TensorTypeQ8_0,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "source_fp8_q8_preserves_bf16_tensors",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "test",
|
||||
"source_quantization": "hf_fp8",
|
||||
"source_fp8_tensors": []string{"blk.1.ffn_down_exps.weight"},
|
||||
},
|
||||
tensors: []*fsggml.Tensor{
|
||||
{
|
||||
Name: "blk.1.ffn_down_exps.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.attn_q.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
},
|
||||
newType: "Q8_0",
|
||||
expectedTensorTypes: map[string]fsggml.TensorType{
|
||||
"blk.1.ffn_down_exps.weight": fsggml.TensorTypeQ8_0,
|
||||
"blk.1.attn_q.weight": fsggml.TensorTypeBF16,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "source_fp8_q4_promotes_bf16_tensors_to_q8",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "test",
|
||||
"source_quantization": "hf_fp8",
|
||||
"source_fp8_tensors": []string{
|
||||
"blk.1.ffn_gate_exps.weight",
|
||||
"blk.1.ffn_down_exps.weight",
|
||||
},
|
||||
},
|
||||
tensors: []*fsggml.Tensor{
|
||||
{
|
||||
Name: "blk.1.ffn_gate_exps.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.ffn_down_exps.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.attn_q.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.attn_v.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.ffn_down.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.attn_q_norm.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "blk.1.ffn_gate_inp.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
{
|
||||
Name: "output.weight", Kind: uint32(fsggml.TensorTypeBF16),
|
||||
Offset: uint64(0), Shape: []uint64{256, 1},
|
||||
WriterTo: bytes.NewReader(quantBytes[fsggml.TensorTypeBF16]),
|
||||
},
|
||||
},
|
||||
newType: "Q4_K_M",
|
||||
expectedTensorTypes: map[string]fsggml.TensorType{
|
||||
"blk.1.ffn_gate_exps.weight": fsggml.TensorTypeQ4_K,
|
||||
"blk.1.ffn_down_exps.weight": fsggml.TensorTypeQ6_K,
|
||||
"blk.1.attn_q.weight": fsggml.TensorTypeQ8_0,
|
||||
"blk.1.attn_v.weight": fsggml.TensorTypeQ8_0,
|
||||
"blk.1.ffn_down.weight": fsggml.TensorTypeQ8_0,
|
||||
"blk.1.attn_q_norm.weight": fsggml.TensorTypeBF16,
|
||||
"blk.1.ffn_gate_inp.weight": fsggml.TensorTypeBF16,
|
||||
"output.weight": fsggml.TensorTypeQ8_0,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "f32_short_data",
|
||||
kv: map[string]any{
|
||||
|
|
|
|||
Loading…
Reference in a new issue