516 lines
16 KiB
TypeScript
516 lines
16 KiB
TypeScript
import { APIError } from '@anthropic-ai/sdk'
|
|
import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
|
|
import isEqual from 'lodash-es/isEqual.js'
|
|
import { getIsNonInteractiveSession } from '../bootstrap/state.js'
|
|
import { isClaudeAISubscriber } from '../utils/auth.js'
|
|
import { getModelBetas } from '../utils/betas.js'
|
|
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
|
|
import { logError } from '../utils/log.js'
|
|
import { getSmallFastModel } from '../utils/model/model.js'
|
|
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
|
|
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
|
|
import { logEvent } from './analytics/index.js'
|
|
import { getAPIMetadata } from './api/claude.js'
|
|
import { getAnthropicClient } from './api/client.js'
|
|
import {
|
|
processRateLimitHeaders,
|
|
shouldProcessRateLimits,
|
|
} from './rateLimitMocking.js'
|
|
|
|
// Re-export message functions from centralized location
|
|
export {
|
|
getRateLimitErrorMessage,
|
|
getRateLimitWarning,
|
|
getUsingOverageText,
|
|
} from './rateLimitMessages.js'
|
|
|
|
type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
|
|
|
|
type RateLimitType =
|
|
| 'five_hour'
|
|
| 'seven_day'
|
|
| 'seven_day_opus'
|
|
| 'seven_day_sonnet'
|
|
| 'overage'
|
|
|
|
export type { RateLimitType }
|
|
|
|
type EarlyWarningThreshold = {
|
|
utilization: number // 0-1 scale: trigger warning when usage >= this
|
|
timePct: number // 0-1 scale: trigger warning when time elapsed <= this
|
|
}
|
|
|
|
type EarlyWarningConfig = {
|
|
rateLimitType: RateLimitType
|
|
claimAbbrev: '5h' | '7d'
|
|
windowSeconds: number
|
|
thresholds: EarlyWarningThreshold[]
|
|
}
|
|
|
|
// Early warning configurations in priority order (checked first to last)
|
|
// Used as fallback when server doesn't send surpassed-threshold header
|
|
// Warns users when they're consuming quota faster than the time window allows
|
|
const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
|
|
{
|
|
rateLimitType: 'five_hour',
|
|
claimAbbrev: '5h',
|
|
windowSeconds: 5 * 60 * 60,
|
|
thresholds: [{ utilization: 0.9, timePct: 0.72 }],
|
|
},
|
|
{
|
|
rateLimitType: 'seven_day',
|
|
claimAbbrev: '7d',
|
|
windowSeconds: 7 * 24 * 60 * 60,
|
|
thresholds: [
|
|
{ utilization: 0.75, timePct: 0.6 },
|
|
{ utilization: 0.5, timePct: 0.35 },
|
|
{ utilization: 0.25, timePct: 0.15 },
|
|
],
|
|
},
|
|
]
|
|
|
|
// Maps claim abbreviations to rate limit types for header-based detection
|
|
const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
|
|
'5h': 'five_hour',
|
|
'7d': 'seven_day',
|
|
overage: 'overage',
|
|
}
|
|
|
|
const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
|
|
five_hour: 'session limit',
|
|
seven_day: 'weekly limit',
|
|
seven_day_opus: 'Opus limit',
|
|
seven_day_sonnet: 'Sonnet limit',
|
|
overage: 'extra usage limit',
|
|
}
|
|
|
|
export function getRateLimitDisplayName(type: RateLimitType): string {
|
|
return RATE_LIMIT_DISPLAY_NAMES[type] || type
|
|
}
|
|
|
|
/**
|
|
* Calculate what fraction of a time window has elapsed.
|
|
* Used for time-relative early warning fallback.
|
|
* @param resetsAt - Unix epoch timestamp in seconds when the limit resets
|
|
* @param windowSeconds - Duration of the window in seconds
|
|
* @returns fraction (0-1) of the window that has elapsed
|
|
*/
|
|
function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
|
|
const nowSeconds = Date.now() / 1000
|
|
const windowStart = resetsAt - windowSeconds
|
|
const elapsed = nowSeconds - windowStart
|
|
return Math.max(0, Math.min(1, elapsed / windowSeconds))
|
|
}
|
|
|
|
// Reason why overage is disabled/rejected
|
|
// These values come from the API's unified limiter
|
|
export type OverageDisabledReason =
|
|
| 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
|
|
| 'org_level_disabled' // Organization doesn't have overage enabled
|
|
| 'org_level_disabled_until' // Organization overage temporarily disabled
|
|
| 'out_of_credits' // Organization has insufficient credits
|
|
| 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
|
|
| 'member_level_disabled' // Account specifically has overage disabled
|
|
| 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
|
|
| 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
|
|
| 'member_zero_credit_limit' // Account has a zero credit limit
|
|
| 'org_service_level_disabled' // Org service specifically has overage disabled
|
|
| 'org_service_zero_credit_limit' // Org service has a zero credit limit
|
|
| 'no_limits_configured' // No overage limits configured for account
|
|
| 'unknown' // Unknown reason, should not happen
|
|
|
|
export type ClaudeAILimits = {
|
|
status: QuotaStatus
|
|
// unifiedRateLimitFallbackAvailable is currently used to warn users that set
|
|
// their model to Opus whenever they are about to run out of quota. It does
|
|
// not change the actual model that is used.
|
|
unifiedRateLimitFallbackAvailable: boolean
|
|
resetsAt?: number
|
|
rateLimitType?: RateLimitType
|
|
utilization?: number
|
|
overageStatus?: QuotaStatus
|
|
overageResetsAt?: number
|
|
overageDisabledReason?: OverageDisabledReason
|
|
isUsingOverage?: boolean
|
|
surpassedThreshold?: number
|
|
}
|
|
|
|
// Exported for testing only
|
|
export let currentLimits: ClaudeAILimits = {
|
|
status: 'allowed',
|
|
unifiedRateLimitFallbackAvailable: false,
|
|
isUsingOverage: false,
|
|
}
|
|
|
|
/**
|
|
* Raw per-window utilization from response headers, tracked on every API
|
|
* response (unlike currentLimits.utilization which is only set when a warning
|
|
* threshold fires). Exposed to statusline scripts via getRawUtilization().
|
|
*/
|
|
type RawWindowUtilization = {
|
|
utilization: number // 0-1 fraction
|
|
resets_at: number // unix epoch seconds
|
|
}
|
|
type RawUtilization = {
|
|
five_hour?: RawWindowUtilization
|
|
seven_day?: RawWindowUtilization
|
|
}
|
|
let rawUtilization: RawUtilization = {}
|
|
|
|
export function getRawUtilization(): RawUtilization {
|
|
return rawUtilization
|
|
}
|
|
|
|
function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
|
|
const result: RawUtilization = {}
|
|
for (const [key, abbrev] of [
|
|
['five_hour', '5h'],
|
|
['seven_day', '7d'],
|
|
] as const) {
|
|
const util = headers.get(
|
|
`anthropic-ratelimit-unified-${abbrev}-utilization`,
|
|
)
|
|
const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
|
|
if (util !== null && reset !== null) {
|
|
result[key] = { utilization: Number(util), resets_at: Number(reset) }
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
type StatusChangeListener = (limits: ClaudeAILimits) => void
|
|
export const statusListeners: Set<StatusChangeListener> = new Set()
|
|
|
|
export function emitStatusChange(limits: ClaudeAILimits) {
|
|
currentLimits = limits
|
|
statusListeners.forEach(listener => listener(limits))
|
|
const hoursTillReset = Math.round(
|
|
(limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
|
|
)
|
|
|
|
logEvent('tengu_claudeai_limits_status_changed', {
|
|
status:
|
|
limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
|
|
hoursTillReset,
|
|
})
|
|
}
|
|
|
|
async function makeTestQuery() {
|
|
const model = getSmallFastModel()
|
|
const anthropic = await getAnthropicClient({
|
|
maxRetries: 0,
|
|
model,
|
|
source: 'quota_check',
|
|
})
|
|
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
|
|
const betas = getModelBetas(model)
|
|
// biome-ignore lint/plugin: quota check needs raw response access via asResponse()
|
|
return anthropic.beta.messages
|
|
.create({
|
|
model,
|
|
max_tokens: 1,
|
|
messages,
|
|
metadata: getAPIMetadata(),
|
|
...(betas.length > 0 ? { betas } : {}),
|
|
})
|
|
.asResponse()
|
|
}
|
|
|
|
export async function checkQuotaStatus(): Promise<void> {
|
|
// Skip network requests if nonessential traffic is disabled
|
|
if (isEssentialTrafficOnly()) {
|
|
return
|
|
}
|
|
|
|
// Check if we should process rate limits (real subscriber or mock testing)
|
|
if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
|
|
return
|
|
}
|
|
|
|
// In non-interactive mode (-p), the real query follows immediately and
|
|
// extractQuotaStatusFromHeaders() will update limits from its response
|
|
// headers (claude.ts), so skip this pre-check API call.
|
|
if (getIsNonInteractiveSession()) {
|
|
return
|
|
}
|
|
|
|
try {
|
|
// Make a minimal request to check quota
|
|
const raw = await makeTestQuery()
|
|
|
|
// Update limits based on the response
|
|
extractQuotaStatusFromHeaders(raw.headers)
|
|
} catch (error) {
|
|
if (error instanceof APIError) {
|
|
extractQuotaStatusFromError(error)
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if early warning should be triggered based on surpassed-threshold header.
|
|
* Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
|
|
*/
|
|
function getHeaderBasedEarlyWarning(
|
|
headers: globalThis.Headers,
|
|
unifiedRateLimitFallbackAvailable: boolean,
|
|
): ClaudeAILimits | null {
|
|
// Check each claim type for surpassed threshold header
|
|
for (const [claimAbbrev, rateLimitType] of Object.entries(
|
|
EARLY_WARNING_CLAIM_MAP,
|
|
)) {
|
|
const surpassedThreshold = headers.get(
|
|
`anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
|
|
)
|
|
|
|
// If threshold header is present, user has crossed a warning threshold
|
|
if (surpassedThreshold !== null) {
|
|
const utilizationHeader = headers.get(
|
|
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
|
|
)
|
|
const resetHeader = headers.get(
|
|
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
|
|
)
|
|
|
|
const utilization = utilizationHeader
|
|
? Number(utilizationHeader)
|
|
: undefined
|
|
const resetsAt = resetHeader ? Number(resetHeader) : undefined
|
|
|
|
return {
|
|
status: 'allowed_warning',
|
|
resetsAt,
|
|
rateLimitType: rateLimitType as RateLimitType,
|
|
utilization,
|
|
unifiedRateLimitFallbackAvailable,
|
|
isUsingOverage: false,
|
|
surpassedThreshold: Number(surpassedThreshold),
|
|
}
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
/**
|
|
* Check if time-relative early warning should be triggered for a rate limit type.
|
|
* Fallback when server doesn't send surpassed-threshold header.
|
|
* Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
|
|
*/
|
|
function getTimeRelativeEarlyWarning(
|
|
headers: globalThis.Headers,
|
|
config: EarlyWarningConfig,
|
|
unifiedRateLimitFallbackAvailable: boolean,
|
|
): ClaudeAILimits | null {
|
|
const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
|
|
|
|
const utilizationHeader = headers.get(
|
|
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
|
|
)
|
|
const resetHeader = headers.get(
|
|
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
|
|
)
|
|
|
|
if (utilizationHeader === null || resetHeader === null) {
|
|
return null
|
|
}
|
|
|
|
const utilization = Number(utilizationHeader)
|
|
const resetsAt = Number(resetHeader)
|
|
const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
|
|
|
|
// Check if any threshold is exceeded: high usage early in the window
|
|
const shouldWarn = thresholds.some(
|
|
t => utilization >= t.utilization && timeProgress <= t.timePct,
|
|
)
|
|
|
|
if (!shouldWarn) {
|
|
return null
|
|
}
|
|
|
|
return {
|
|
status: 'allowed_warning',
|
|
resetsAt,
|
|
rateLimitType,
|
|
utilization,
|
|
unifiedRateLimitFallbackAvailable,
|
|
isUsingOverage: false,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get early warning limits using header-based detection with time-relative fallback.
|
|
* 1. First checks for surpassed-threshold header (new server-side approach)
|
|
* 2. Falls back to time-relative thresholds (client-side calculation)
|
|
*/
|
|
function getEarlyWarningFromHeaders(
|
|
headers: globalThis.Headers,
|
|
unifiedRateLimitFallbackAvailable: boolean,
|
|
): ClaudeAILimits | null {
|
|
// Try header-based detection first (preferred when API sends the header)
|
|
const headerBasedWarning = getHeaderBasedEarlyWarning(
|
|
headers,
|
|
unifiedRateLimitFallbackAvailable,
|
|
)
|
|
if (headerBasedWarning) {
|
|
return headerBasedWarning
|
|
}
|
|
|
|
// Fallback: Use time-relative thresholds (client-side calculation)
|
|
// This catches users burning quota faster than sustainable
|
|
for (const config of EARLY_WARNING_CONFIGS) {
|
|
const timeRelativeWarning = getTimeRelativeEarlyWarning(
|
|
headers,
|
|
config,
|
|
unifiedRateLimitFallbackAvailable,
|
|
)
|
|
if (timeRelativeWarning) {
|
|
return timeRelativeWarning
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
function computeNewLimitsFromHeaders(
|
|
headers: globalThis.Headers,
|
|
): ClaudeAILimits {
|
|
const status =
|
|
(headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
|
|
'allowed'
|
|
const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
|
|
const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
|
|
const unifiedRateLimitFallbackAvailable =
|
|
headers.get('anthropic-ratelimit-unified-fallback') === 'available'
|
|
|
|
// Headers for rate limit type and overage support
|
|
const rateLimitType = headers.get(
|
|
'anthropic-ratelimit-unified-representative-claim',
|
|
) as RateLimitType | null
|
|
const overageStatus = headers.get(
|
|
'anthropic-ratelimit-unified-overage-status',
|
|
) as QuotaStatus | null
|
|
const overageResetsAtHeader = headers.get(
|
|
'anthropic-ratelimit-unified-overage-reset',
|
|
)
|
|
const overageResetsAt = overageResetsAtHeader
|
|
? Number(overageResetsAtHeader)
|
|
: undefined
|
|
|
|
// Reason why overage is disabled (spending cap or wallet empty)
|
|
const overageDisabledReason = headers.get(
|
|
'anthropic-ratelimit-unified-overage-disabled-reason',
|
|
) as OverageDisabledReason | null
|
|
|
|
// Determine if we're using overage (standard limits rejected but overage allowed)
|
|
const isUsingOverage =
|
|
status === 'rejected' &&
|
|
(overageStatus === 'allowed' || overageStatus === 'allowed_warning')
|
|
|
|
// Check for early warning based on surpassed-threshold header
|
|
// If status is allowed/allowed_warning and we find a surpassed threshold, show warning
|
|
let finalStatus: QuotaStatus = status
|
|
if (status === 'allowed' || status === 'allowed_warning') {
|
|
const earlyWarning = getEarlyWarningFromHeaders(
|
|
headers,
|
|
unifiedRateLimitFallbackAvailable,
|
|
)
|
|
if (earlyWarning) {
|
|
return earlyWarning
|
|
}
|
|
// No early warning threshold surpassed
|
|
finalStatus = 'allowed'
|
|
}
|
|
|
|
return {
|
|
status: finalStatus,
|
|
resetsAt,
|
|
unifiedRateLimitFallbackAvailable,
|
|
...(rateLimitType && { rateLimitType }),
|
|
...(overageStatus && { overageStatus }),
|
|
...(overageResetsAt && { overageResetsAt }),
|
|
...(overageDisabledReason && { overageDisabledReason }),
|
|
isUsingOverage,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Cache the extra usage disabled reason from API headers.
|
|
*/
|
|
function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
|
|
// A null reason means extra usage is enabled (no disabled reason header)
|
|
const reason =
|
|
headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
|
|
const cached = getGlobalConfig().cachedExtraUsageDisabledReason
|
|
if (cached !== reason) {
|
|
saveGlobalConfig(current => ({
|
|
...current,
|
|
cachedExtraUsageDisabledReason: reason,
|
|
}))
|
|
}
|
|
}
|
|
|
|
export function extractQuotaStatusFromHeaders(
|
|
headers: globalThis.Headers,
|
|
): void {
|
|
// Check if we need to process rate limits
|
|
const isSubscriber = isClaudeAISubscriber()
|
|
|
|
if (!shouldProcessRateLimits(isSubscriber)) {
|
|
// If we have any rate limit state, clear it
|
|
rawUtilization = {}
|
|
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
|
|
const defaultLimits: ClaudeAILimits = {
|
|
status: 'allowed',
|
|
unifiedRateLimitFallbackAvailable: false,
|
|
isUsingOverage: false,
|
|
}
|
|
emitStatusChange(defaultLimits)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Process headers (applies mocks from /mock-limits command if active)
|
|
const headersToUse = processRateLimitHeaders(headers)
|
|
rawUtilization = extractRawUtilization(headersToUse)
|
|
const newLimits = computeNewLimitsFromHeaders(headersToUse)
|
|
|
|
// Cache extra usage status (persists across sessions)
|
|
cacheExtraUsageDisabledReason(headersToUse)
|
|
|
|
if (!isEqual(currentLimits, newLimits)) {
|
|
emitStatusChange(newLimits)
|
|
}
|
|
}
|
|
|
|
export function extractQuotaStatusFromError(error: APIError): void {
|
|
if (
|
|
!shouldProcessRateLimits(isClaudeAISubscriber()) ||
|
|
error.status !== 429
|
|
) {
|
|
return
|
|
}
|
|
|
|
try {
|
|
let newLimits = { ...currentLimits }
|
|
if (error.headers) {
|
|
// Process headers (applies mocks from /mock-limits command if active)
|
|
const headersToUse = processRateLimitHeaders(error.headers)
|
|
rawUtilization = extractRawUtilization(headersToUse)
|
|
newLimits = computeNewLimitsFromHeaders(headersToUse)
|
|
|
|
// Cache extra usage status (persists across sessions)
|
|
cacheExtraUsageDisabledReason(headersToUse)
|
|
}
|
|
// For errors, always set status to rejected even if headers are not present.
|
|
newLimits.status = 'rejected'
|
|
|
|
if (!isEqual(currentLimits, newLimits)) {
|
|
emitStatusChange(newLimits)
|
|
}
|
|
} catch (e) {
|
|
logError(e as Error)
|
|
}
|
|
}
|