diff --git a/model/parsers/gemma4.go b/model/parsers/gemma4.go index 38a46f6f2..ea0336e7a 100644 --- a/model/parsers/gemma4.go +++ b/model/parsers/gemma4.go @@ -32,7 +32,6 @@ const ( var ( gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`) - gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`) ) type Gemma4Parser struct { @@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string { return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00" }) - text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`) + text = quoteGemma4BareKeys(text) for i, value := range quotedStrings { escaped, _ := json.Marshal(value) @@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string { return text } +func quoteGemma4BareKeys(s string) string { + var sb strings.Builder + sb.Grow(len(s) + 16) + + for i := 0; i < len(s); { + if s[i] == '"' { + if end := gemma4JSONQuotedStringEnd(s, i); end != -1 { + sb.WriteString(s[i:end]) + i = end + continue + } + } + + if s[i] != '{' && s[i] != ',' { + sb.WriteByte(s[i]) + i++ + continue + } + + sb.WriteByte(s[i]) + i++ + + spaceStart := i + i = gemma4SkipSpace(s, i) + sb.WriteString(s[spaceStart:i]) + + keyEnd := gemma4BareKeyEnd(s, i) + if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' { + sb.WriteByte('"') + sb.WriteString(s[i:keyEnd]) + sb.WriteByte('"') + sb.WriteByte(':') + i = keyEnd + 1 + continue + } + } + + return sb.String() +} + +func gemma4BareKeyEnd(s string, start int) int { + i := start + for i < len(s) { + r, size := utf8.DecodeRuneInString(s[i:]) + if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) { + break + } + i += size + } + return i +} + // repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails. // For example, if the model emits an unclosed gemma string as the last value, // we can repair it by closing it with the gemma string delimiter. diff --git a/model/parsers/gemma4_test.go b/model/parsers/gemma4_test.go index 92f15edc3..112867364 100644 --- a/model/parsers/gemma4_test.go +++ b/model/parsers/gemma4_test.go @@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) { }, }, }, + { + name: "tool_call_exec_with_embedded_quoted_path", + input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}`, + expectedToolCalls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "exec", + Arguments: testArgs(map[string]any{ + "command": `ls -F "vault/"`, + }), + }, + }, + }, + }, + { + name: "tool_call_exec_with_embedded_quoted_url", + input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}`, + expectedToolCalls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "exec", + Arguments: testArgs(map[string]any{ + "command": `fetch "https://ollama.com/library/gemma4" --extract`, + }), + }, + }, + }, + }, { name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes", input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`, @@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) { }, }, }, + { + name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes", + input: `<|tool_call>call:edit{edits:[ + {newText:<|"|># Gemma4 + openclaude speed optimization guide + +Use "nvfp4" only after validating tool calls. +<|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide + +Use the default quantization. +<|"|>}, + {newText:<|"|>## 14. Methods tried but not adopted + +### 1. Model quantization +Do not enable "mxfp8" by default. +<|"|>,oldText:<|"|>## 14. Methods tried but not adopted +<|"|>} +]}`, + expectedToolCalls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "edit", + Arguments: testArgs(map[string]any{ + "edits": []any{ + map[string]any{ + "newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n", + "oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n", + }, + map[string]any{ + "newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n", + "oldText": "## 14. Methods tried but not adopted\n", + }, + }, + }), + }, + }, + }, + }, + { + name: "tool_call_edit_with_array_of_objects_issue_comment_spacing", + input: `<|tool_call>call:edit{edits:[ + {newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>, + oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>}, + {newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>, + oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>} +]}`, + expectedToolCalls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "edit", + Arguments: testArgs(map[string]any{ + "edits": []any{ + map[string]any{ + "newText": "# Gemma4 + openclaude speed optimization guide...", + "oldText": "# Gemma4 + openclaude speed optimization guide...", + }, + map[string]any{ + "newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...", + "oldText": "## 14. Methods tried but not adopted\\n", + }, + }, + }), + }, + }, + }, + }, { name: "tool_call_with_windows_path_single_backslashes", input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}`, @@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) { input: `{config:{enabled:true,name:<|"|>test<|"|>}}`, expected: `{"config":{"enabled":true,"name":"test"}}`, }, + { + name: "nested_object_with_space_after_object_open_and_before_bare_key", + input: `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`, + expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`, + }, + { + name: "nested_object_with_space_before_bare_key", + input: `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`, + expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`, + }, + { + name: "nested_object_with_newline_before_bare_key", + input: `{edits:[ + {newText:<|"|>a<|"|>, + oldText:<|"|>b<|"|>} +]}`, + expected: `{"edits":[ + {"newText":"a", + "oldText":"b"} +]}`, + }, { name: "array_value", input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`, @@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) { input: `{q:"say \"hi\" and \"bye\""}`, expected: `{"q":"say \"hi\" and \"bye\""}`, }, + { + name: "raw_quoted_string_containing_key_like_text", + input: `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`, + expected: `{"q":"keep , oldText: literal","note":"ok"}`, + }, + { + name: "raw_quoted_string_with_escaped_quotes_and_key_like_text", + input: `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`, + expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`, + }, { name: "nested_mixed_raw_and_gemma_quoted_values", input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`, @@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test } } -func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) { - _, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil) - if err == nil { - t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support") +func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) { + got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil) + if err != nil { + t.Fatalf("parseGemma4ToolCall returned error: %v", err) + } + + want := api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "foo", + Arguments: testArgs(map[string]any{ + "q": "a,b:c", + }), + }, + } + + if diff := cmp.Diff(want, got, argsComparer); diff != "" { + t.Fatalf("tool call mismatch (-want +got):\n%s", diff) } }