962 lines
29 KiB
TypeScript
962 lines
29 KiB
TypeScript
|
|
import { createHash, randomUUID, type UUID } from 'crypto'
|
|||
|
|
import { stat } from 'fs/promises'
|
|||
|
|
import { isAbsolute, join, relative, sep } from 'path'
|
|||
|
|
import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
|
|||
|
|
import type {
|
|||
|
|
AttributionSnapshotMessage,
|
|||
|
|
FileAttributionState,
|
|||
|
|
} from '../types/logs.js'
|
|||
|
|
import { getCwd } from './cwd.js'
|
|||
|
|
import { logForDebugging } from './debug.js'
|
|||
|
|
import { execFileNoThrowWithCwd } from './execFileNoThrow.js'
|
|||
|
|
import { getFsImplementation } from './fsOperations.js'
|
|||
|
|
import { isGeneratedFile } from './generatedFiles.js'
|
|||
|
|
import { getRemoteUrlForDir, resolveGitDir } from './git/gitFilesystem.js'
|
|||
|
|
import { findGitRoot, gitExe } from './git.js'
|
|||
|
|
import { logError } from './log.js'
|
|||
|
|
import { getCanonicalName, type ModelName } from './model/model.js'
|
|||
|
|
import { sequential } from './sequential.js'
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* List of repos where internal model names are allowed in trailers.
|
|||
|
|
* Includes both SSH and HTTPS URL formats.
|
|||
|
|
*
|
|||
|
|
* NOTE: This is intentionally a repo allowlist, not an org-wide check.
|
|||
|
|
* The anthropics and anthropic-experimental orgs contain PUBLIC repos
|
|||
|
|
* (e.g. anthropics/claude-code, anthropic-experimental/sandbox-runtime).
|
|||
|
|
* Undercover mode must stay ON in those to prevent codename leaks.
|
|||
|
|
* Only add repos here that are confirmed PRIVATE.
|
|||
|
|
*/
|
|||
|
|
const INTERNAL_MODEL_REPOS = [
|
|||
|
|
'github.com:anthropics/claude-cli-internal',
|
|||
|
|
'github.com/anthropics/claude-cli-internal',
|
|||
|
|
'github.com:anthropics/anthropic',
|
|||
|
|
'github.com/anthropics/anthropic',
|
|||
|
|
'github.com:anthropics/apps',
|
|||
|
|
'github.com/anthropics/apps',
|
|||
|
|
'github.com:anthropics/casino',
|
|||
|
|
'github.com/anthropics/casino',
|
|||
|
|
'github.com:anthropics/dbt',
|
|||
|
|
'github.com/anthropics/dbt',
|
|||
|
|
'github.com:anthropics/dotfiles',
|
|||
|
|
'github.com/anthropics/dotfiles',
|
|||
|
|
'github.com:anthropics/terraform-config',
|
|||
|
|
'github.com/anthropics/terraform-config',
|
|||
|
|
'github.com:anthropics/hex-export',
|
|||
|
|
'github.com/anthropics/hex-export',
|
|||
|
|
'github.com:anthropics/feedback-v2',
|
|||
|
|
'github.com/anthropics/feedback-v2',
|
|||
|
|
'github.com:anthropics/labs',
|
|||
|
|
'github.com/anthropics/labs',
|
|||
|
|
'github.com:anthropics/argo-rollouts',
|
|||
|
|
'github.com/anthropics/argo-rollouts',
|
|||
|
|
'github.com:anthropics/starling-configs',
|
|||
|
|
'github.com/anthropics/starling-configs',
|
|||
|
|
'github.com:anthropics/ts-tools',
|
|||
|
|
'github.com/anthropics/ts-tools',
|
|||
|
|
'github.com:anthropics/ts-capsules',
|
|||
|
|
'github.com/anthropics/ts-capsules',
|
|||
|
|
'github.com:anthropics/feldspar-testing',
|
|||
|
|
'github.com/anthropics/feldspar-testing',
|
|||
|
|
'github.com:anthropics/trellis',
|
|||
|
|
'github.com/anthropics/trellis',
|
|||
|
|
'github.com:anthropics/claude-for-hiring',
|
|||
|
|
'github.com/anthropics/claude-for-hiring',
|
|||
|
|
'github.com:anthropics/forge-web',
|
|||
|
|
'github.com/anthropics/forge-web',
|
|||
|
|
'github.com:anthropics/infra-manifests',
|
|||
|
|
'github.com/anthropics/infra-manifests',
|
|||
|
|
'github.com:anthropics/mycro_manifests',
|
|||
|
|
'github.com/anthropics/mycro_manifests',
|
|||
|
|
'github.com:anthropics/mycro_configs',
|
|||
|
|
'github.com/anthropics/mycro_configs',
|
|||
|
|
'github.com:anthropics/mobile-apps',
|
|||
|
|
'github.com/anthropics/mobile-apps',
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Get the repo root for attribution operations.
|
|||
|
|
* Uses getCwd() which respects agent worktree overrides (AsyncLocalStorage),
|
|||
|
|
* then resolves to git root to handle `cd subdir` case.
|
|||
|
|
* Falls back to getOriginalCwd() if git root can't be determined.
|
|||
|
|
*/
|
|||
|
|
export function getAttributionRepoRoot(): string {
|
|||
|
|
const cwd = getCwd()
|
|||
|
|
return findGitRoot(cwd) ?? getOriginalCwd()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Cache for repo classification result. Primed once per process.
|
|||
|
|
// 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
|
|||
|
|
// 'external' = has a remote, not on allowlist (public/open-source repo)
|
|||
|
|
// 'none' = no remote URL (not a git repo, or no remote configured)
|
|||
|
|
let repoClassCache: 'internal' | 'external' | 'none' | null = null
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Synchronously return the cached repo classification.
|
|||
|
|
* Returns null if the async check hasn't run yet.
|
|||
|
|
*/
|
|||
|
|
export function getRepoClassCached(): 'internal' | 'external' | 'none' | null {
|
|||
|
|
return repoClassCache
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Synchronously return the cached result of isInternalModelRepo().
|
|||
|
|
* Returns false if the check hasn't run yet (safe default: don't leak).
|
|||
|
|
*/
|
|||
|
|
export function isInternalModelRepoCached(): boolean {
|
|||
|
|
return repoClassCache === 'internal'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Check if the current repo is in the allowlist for internal model names.
|
|||
|
|
* Memoized - only checks once per process.
|
|||
|
|
*/
|
|||
|
|
export const isInternalModelRepo = sequential(async (): Promise<boolean> => {
|
|||
|
|
if (repoClassCache !== null) {
|
|||
|
|
return repoClassCache === 'internal'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
const remoteUrl = await getRemoteUrlForDir(cwd)
|
|||
|
|
|
|||
|
|
if (!remoteUrl) {
|
|||
|
|
repoClassCache = 'none'
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
const isInternal = INTERNAL_MODEL_REPOS.some(repo => remoteUrl.includes(repo))
|
|||
|
|
repoClassCache = isInternal ? 'internal' : 'external'
|
|||
|
|
return isInternal
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Sanitize a surface key to use public model names.
|
|||
|
|
* Converts internal model variants to their public equivalents.
|
|||
|
|
*/
|
|||
|
|
export function sanitizeSurfaceKey(surfaceKey: string): string {
|
|||
|
|
// Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
|
|||
|
|
const slashIndex = surfaceKey.lastIndexOf('/')
|
|||
|
|
if (slashIndex === -1) {
|
|||
|
|
return surfaceKey
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const surface = surfaceKey.slice(0, slashIndex)
|
|||
|
|
const model = surfaceKey.slice(slashIndex + 1)
|
|||
|
|
const sanitizedModel = sanitizeModelName(model)
|
|||
|
|
|
|||
|
|
return `${surface}/${sanitizedModel}`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// @[MODEL LAUNCH]: Add a mapping for the new model ID so git commit trailers show the public name.
|
|||
|
|
/**
|
|||
|
|
* Sanitize a model name to its public equivalent.
|
|||
|
|
* Maps internal variants to their public names based on model family.
|
|||
|
|
*/
|
|||
|
|
export function sanitizeModelName(shortName: string): string {
|
|||
|
|
// Map internal variants to public equivalents based on model family
|
|||
|
|
if (shortName.includes('opus-4-6')) return 'claude-opus-4-6'
|
|||
|
|
if (shortName.includes('opus-4-5')) return 'claude-opus-4-5'
|
|||
|
|
if (shortName.includes('opus-4-1')) return 'claude-opus-4-1'
|
|||
|
|
if (shortName.includes('opus-4')) return 'claude-opus-4'
|
|||
|
|
if (shortName.includes('sonnet-4-6')) return 'claude-sonnet-4-6'
|
|||
|
|
if (shortName.includes('sonnet-4-5')) return 'claude-sonnet-4-5'
|
|||
|
|
if (shortName.includes('sonnet-4')) return 'claude-sonnet-4'
|
|||
|
|
if (shortName.includes('sonnet-3-7')) return 'claude-sonnet-3-7'
|
|||
|
|
if (shortName.includes('haiku-4-5')) return 'claude-haiku-4-5'
|
|||
|
|
if (shortName.includes('haiku-3-5')) return 'claude-haiku-3-5'
|
|||
|
|
// Unknown models get a generic name
|
|||
|
|
return 'claude'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Attribution state for tracking Claude's contributions to files.
|
|||
|
|
*/
|
|||
|
|
export type AttributionState = {
|
|||
|
|
// File states keyed by relative path (from cwd)
|
|||
|
|
fileStates: Map<string, FileAttributionState>
|
|||
|
|
// Session baseline states for net change calculation
|
|||
|
|
sessionBaselines: Map<string, { contentHash: string; mtime: number }>
|
|||
|
|
// Surface from which edits were made
|
|||
|
|
surface: string
|
|||
|
|
// HEAD SHA at session start (for detecting external commits)
|
|||
|
|
startingHeadSha: string | null
|
|||
|
|
// Total prompts in session (for steer count calculation)
|
|||
|
|
promptCount: number
|
|||
|
|
// Prompts at last commit (to calculate steers for current commit)
|
|||
|
|
promptCountAtLastCommit: number
|
|||
|
|
// Permission prompt tracking
|
|||
|
|
permissionPromptCount: number
|
|||
|
|
permissionPromptCountAtLastCommit: number
|
|||
|
|
// ESC press tracking (user cancelled permission prompt)
|
|||
|
|
escapeCount: number
|
|||
|
|
escapeCountAtLastCommit: number
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Summary of Claude's contribution for a commit.
|
|||
|
|
*/
|
|||
|
|
export type AttributionSummary = {
|
|||
|
|
claudePercent: number
|
|||
|
|
claudeChars: number
|
|||
|
|
humanChars: number
|
|||
|
|
surfaces: string[]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Per-file attribution details for git notes.
|
|||
|
|
*/
|
|||
|
|
export type FileAttribution = {
|
|||
|
|
claudeChars: number
|
|||
|
|
humanChars: number
|
|||
|
|
percent: number
|
|||
|
|
surface: string
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Full attribution data for git notes JSON.
|
|||
|
|
*/
|
|||
|
|
export type AttributionData = {
|
|||
|
|
version: 1
|
|||
|
|
summary: AttributionSummary
|
|||
|
|
files: Record<string, FileAttribution>
|
|||
|
|
surfaceBreakdown: Record<string, { claudeChars: number; percent: number }>
|
|||
|
|
excludedGenerated: string[]
|
|||
|
|
sessions: string[]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Get the current client surface from environment.
|
|||
|
|
*/
|
|||
|
|
export function getClientSurface(): string {
|
|||
|
|
return process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Build a surface key that includes the model name.
|
|||
|
|
* Format: "surface/model" (e.g., "cli/claude-sonnet")
|
|||
|
|
*/
|
|||
|
|
export function buildSurfaceKey(surface: string, model: ModelName): string {
|
|||
|
|
return `${surface}/${getCanonicalName(model)}`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Compute SHA-256 hash of content.
|
|||
|
|
*/
|
|||
|
|
export function computeContentHash(content: string): string {
|
|||
|
|
return createHash('sha256').update(content).digest('hex')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Normalize file path to relative path from cwd for consistent tracking.
|
|||
|
|
* Resolves symlinks to handle /tmp vs /private/tmp on macOS.
|
|||
|
|
*/
|
|||
|
|
export function normalizeFilePath(filePath: string): string {
|
|||
|
|
const fs = getFsImplementation()
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
|
|||
|
|
if (!isAbsolute(filePath)) {
|
|||
|
|
return filePath
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Resolve symlinks in both paths for consistent comparison
|
|||
|
|
// (e.g., /tmp -> /private/tmp on macOS)
|
|||
|
|
let resolvedPath = filePath
|
|||
|
|
let resolvedCwd = cwd
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
resolvedPath = fs.realpathSync(filePath)
|
|||
|
|
} catch {
|
|||
|
|
// File may not exist yet, use original path
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
resolvedCwd = fs.realpathSync(cwd)
|
|||
|
|
} catch {
|
|||
|
|
// Keep original cwd
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (
|
|||
|
|
resolvedPath.startsWith(resolvedCwd + sep) ||
|
|||
|
|
resolvedPath === resolvedCwd
|
|||
|
|
) {
|
|||
|
|
// Normalize to forward slashes so keys match git diff output on Windows
|
|||
|
|
return relative(resolvedCwd, resolvedPath).replaceAll(sep, '/')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Fallback: try original comparison
|
|||
|
|
if (filePath.startsWith(cwd + sep) || filePath === cwd) {
|
|||
|
|
return relative(cwd, filePath).replaceAll(sep, '/')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return filePath
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Expand a relative path to absolute path.
|
|||
|
|
*/
|
|||
|
|
export function expandFilePath(filePath: string): string {
|
|||
|
|
if (isAbsolute(filePath)) {
|
|||
|
|
return filePath
|
|||
|
|
}
|
|||
|
|
return join(getAttributionRepoRoot(), filePath)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Create an empty attribution state for a new session.
|
|||
|
|
*/
|
|||
|
|
export function createEmptyAttributionState(): AttributionState {
|
|||
|
|
return {
|
|||
|
|
fileStates: new Map(),
|
|||
|
|
sessionBaselines: new Map(),
|
|||
|
|
surface: getClientSurface(),
|
|||
|
|
startingHeadSha: null,
|
|||
|
|
promptCount: 0,
|
|||
|
|
promptCountAtLastCommit: 0,
|
|||
|
|
permissionPromptCount: 0,
|
|||
|
|
permissionPromptCountAtLastCommit: 0,
|
|||
|
|
escapeCount: 0,
|
|||
|
|
escapeCountAtLastCommit: 0,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Compute the character contribution for a file modification.
|
|||
|
|
* Returns the FileAttributionState to store, or null if tracking failed.
|
|||
|
|
*/
|
|||
|
|
function computeFileModificationState(
|
|||
|
|
existingFileStates: Map<string, FileAttributionState>,
|
|||
|
|
filePath: string,
|
|||
|
|
oldContent: string,
|
|||
|
|
newContent: string,
|
|||
|
|
mtime: number,
|
|||
|
|
): FileAttributionState | null {
|
|||
|
|
const normalizedPath = normalizeFilePath(filePath)
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// Calculate Claude's character contribution
|
|||
|
|
let claudeContribution: number
|
|||
|
|
|
|||
|
|
if (oldContent === '' || newContent === '') {
|
|||
|
|
// New file or full deletion - contribution is the content length
|
|||
|
|
claudeContribution =
|
|||
|
|
oldContent === '' ? newContent.length : oldContent.length
|
|||
|
|
} else {
|
|||
|
|
// Find actual changed region via common prefix/suffix matching.
|
|||
|
|
// This correctly handles same-length replacements (e.g., "Esc" → "esc")
|
|||
|
|
// where Math.abs(newLen - oldLen) would be 0.
|
|||
|
|
const minLen = Math.min(oldContent.length, newContent.length)
|
|||
|
|
let prefixEnd = 0
|
|||
|
|
while (
|
|||
|
|
prefixEnd < minLen &&
|
|||
|
|
oldContent[prefixEnd] === newContent[prefixEnd]
|
|||
|
|
) {
|
|||
|
|
prefixEnd++
|
|||
|
|
}
|
|||
|
|
let suffixLen = 0
|
|||
|
|
while (
|
|||
|
|
suffixLen < minLen - prefixEnd &&
|
|||
|
|
oldContent[oldContent.length - 1 - suffixLen] ===
|
|||
|
|
newContent[newContent.length - 1 - suffixLen]
|
|||
|
|
) {
|
|||
|
|
suffixLen++
|
|||
|
|
}
|
|||
|
|
const oldChangedLen = oldContent.length - prefixEnd - suffixLen
|
|||
|
|
const newChangedLen = newContent.length - prefixEnd - suffixLen
|
|||
|
|
claudeContribution = Math.max(oldChangedLen, newChangedLen)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Get current file state if it exists
|
|||
|
|
const existingState = existingFileStates.get(normalizedPath)
|
|||
|
|
const existingContribution = existingState?.claudeContribution ?? 0
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
contentHash: computeContentHash(newContent),
|
|||
|
|
claudeContribution: existingContribution + claudeContribution,
|
|||
|
|
mtime,
|
|||
|
|
}
|
|||
|
|
} catch (error) {
|
|||
|
|
logError(error as Error)
|
|||
|
|
return null
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Get a file's modification time (mtimeMs), falling back to Date.now() if
|
|||
|
|
* the file doesn't exist. This is async so it can be precomputed before
|
|||
|
|
* entering a sync setAppState callback.
|
|||
|
|
*/
|
|||
|
|
export async function getFileMtime(filePath: string): Promise<number> {
|
|||
|
|
const normalizedPath = normalizeFilePath(filePath)
|
|||
|
|
const absPath = expandFilePath(normalizedPath)
|
|||
|
|
try {
|
|||
|
|
const stats = await stat(absPath)
|
|||
|
|
return stats.mtimeMs
|
|||
|
|
} catch {
|
|||
|
|
return Date.now()
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Track a file modification by Claude.
|
|||
|
|
* Called after Edit/Write tool completes.
|
|||
|
|
*/
|
|||
|
|
export function trackFileModification(
|
|||
|
|
state: AttributionState,
|
|||
|
|
filePath: string,
|
|||
|
|
oldContent: string,
|
|||
|
|
newContent: string,
|
|||
|
|
_userModified: boolean,
|
|||
|
|
mtime: number = Date.now(),
|
|||
|
|
): AttributionState {
|
|||
|
|
const normalizedPath = normalizeFilePath(filePath)
|
|||
|
|
const newFileState = computeFileModificationState(
|
|||
|
|
state.fileStates,
|
|||
|
|
filePath,
|
|||
|
|
oldContent,
|
|||
|
|
newContent,
|
|||
|
|
mtime,
|
|||
|
|
)
|
|||
|
|
if (!newFileState) {
|
|||
|
|
return state
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const newFileStates = new Map(state.fileStates)
|
|||
|
|
newFileStates.set(normalizedPath, newFileState)
|
|||
|
|
|
|||
|
|
logForDebugging(
|
|||
|
|
`Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
...state,
|
|||
|
|
fileStates: newFileStates,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Track a file creation by Claude (e.g., via bash command).
|
|||
|
|
* Used when Claude creates a new file through a non-tracked mechanism.
|
|||
|
|
*/
|
|||
|
|
export function trackFileCreation(
|
|||
|
|
state: AttributionState,
|
|||
|
|
filePath: string,
|
|||
|
|
content: string,
|
|||
|
|
mtime: number = Date.now(),
|
|||
|
|
): AttributionState {
|
|||
|
|
// A creation is simply a modification from empty to the new content
|
|||
|
|
return trackFileModification(state, filePath, '', content, false, mtime)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Track a file deletion by Claude (e.g., via bash rm command).
|
|||
|
|
* Used when Claude deletes a file through a non-tracked mechanism.
|
|||
|
|
*/
|
|||
|
|
export function trackFileDeletion(
|
|||
|
|
state: AttributionState,
|
|||
|
|
filePath: string,
|
|||
|
|
oldContent: string,
|
|||
|
|
): AttributionState {
|
|||
|
|
const normalizedPath = normalizeFilePath(filePath)
|
|||
|
|
const existingState = state.fileStates.get(normalizedPath)
|
|||
|
|
const existingContribution = existingState?.claudeContribution ?? 0
|
|||
|
|
const deletedChars = oldContent.length
|
|||
|
|
|
|||
|
|
const newFileState: FileAttributionState = {
|
|||
|
|
contentHash: '', // Empty hash for deleted files
|
|||
|
|
claudeContribution: existingContribution + deletedChars,
|
|||
|
|
mtime: Date.now(),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const newFileStates = new Map(state.fileStates)
|
|||
|
|
newFileStates.set(normalizedPath, newFileState)
|
|||
|
|
|
|||
|
|
logForDebugging(
|
|||
|
|
`Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${newFileState.claudeContribution})`,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
...state,
|
|||
|
|
fileStates: newFileStates,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// --
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Track multiple file changes in bulk, mutating a single Map copy.
|
|||
|
|
* This avoids the O(n²) cost of copying the Map per file when processing
|
|||
|
|
* large git diffs (e.g., jj operations that touch hundreds of thousands of files).
|
|||
|
|
*/
|
|||
|
|
export function trackBulkFileChanges(
|
|||
|
|
state: AttributionState,
|
|||
|
|
changes: ReadonlyArray<{
|
|||
|
|
path: string
|
|||
|
|
type: 'modified' | 'created' | 'deleted'
|
|||
|
|
oldContent: string
|
|||
|
|
newContent: string
|
|||
|
|
mtime?: number
|
|||
|
|
}>,
|
|||
|
|
): AttributionState {
|
|||
|
|
// Create ONE copy of the Map, then mutate it for each file
|
|||
|
|
const newFileStates = new Map(state.fileStates)
|
|||
|
|
|
|||
|
|
for (const change of changes) {
|
|||
|
|
const mtime = change.mtime ?? Date.now()
|
|||
|
|
if (change.type === 'deleted') {
|
|||
|
|
const normalizedPath = normalizeFilePath(change.path)
|
|||
|
|
const existingState = newFileStates.get(normalizedPath)
|
|||
|
|
const existingContribution = existingState?.claudeContribution ?? 0
|
|||
|
|
const deletedChars = change.oldContent.length
|
|||
|
|
|
|||
|
|
newFileStates.set(normalizedPath, {
|
|||
|
|
contentHash: '',
|
|||
|
|
claudeContribution: existingContribution + deletedChars,
|
|||
|
|
mtime,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
logForDebugging(
|
|||
|
|
`Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${existingContribution + deletedChars})`,
|
|||
|
|
)
|
|||
|
|
} else {
|
|||
|
|
const newFileState = computeFileModificationState(
|
|||
|
|
newFileStates,
|
|||
|
|
change.path,
|
|||
|
|
change.oldContent,
|
|||
|
|
change.newContent,
|
|||
|
|
mtime,
|
|||
|
|
)
|
|||
|
|
if (newFileState) {
|
|||
|
|
const normalizedPath = normalizeFilePath(change.path)
|
|||
|
|
newFileStates.set(normalizedPath, newFileState)
|
|||
|
|
|
|||
|
|
logForDebugging(
|
|||
|
|
`Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
...state,
|
|||
|
|
fileStates: newFileStates,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Calculate final attribution for staged files.
|
|||
|
|
* Compares session baseline to committed state.
|
|||
|
|
*/
|
|||
|
|
export async function calculateCommitAttribution(
|
|||
|
|
states: AttributionState[],
|
|||
|
|
stagedFiles: string[],
|
|||
|
|
): Promise<AttributionData> {
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
const sessionId = getSessionId()
|
|||
|
|
|
|||
|
|
const files: Record<string, FileAttribution> = {}
|
|||
|
|
const excludedGenerated: string[] = []
|
|||
|
|
const surfaces = new Set<string>()
|
|||
|
|
const surfaceCounts: Record<string, number> = {}
|
|||
|
|
|
|||
|
|
let totalClaudeChars = 0
|
|||
|
|
let totalHumanChars = 0
|
|||
|
|
|
|||
|
|
// Merge file states from all sessions
|
|||
|
|
const mergedFileStates = new Map<string, FileAttributionState>()
|
|||
|
|
const mergedBaselines = new Map<
|
|||
|
|
string,
|
|||
|
|
{ contentHash: string; mtime: number }
|
|||
|
|
>()
|
|||
|
|
|
|||
|
|
for (const state of states) {
|
|||
|
|
surfaces.add(state.surface)
|
|||
|
|
|
|||
|
|
// Merge baselines (earliest baseline wins)
|
|||
|
|
// Handle both Map and plain object (in case of serialization)
|
|||
|
|
const baselines =
|
|||
|
|
state.sessionBaselines instanceof Map
|
|||
|
|
? state.sessionBaselines
|
|||
|
|
: new Map(
|
|||
|
|
Object.entries(
|
|||
|
|
(state.sessionBaselines ?? {}) as Record<
|
|||
|
|
string,
|
|||
|
|
{ contentHash: string; mtime: number }
|
|||
|
|
>,
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
for (const [path, baseline] of baselines) {
|
|||
|
|
if (!mergedBaselines.has(path)) {
|
|||
|
|
mergedBaselines.set(path, baseline)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Merge file states (accumulate contributions)
|
|||
|
|
// Handle both Map and plain object (in case of serialization)
|
|||
|
|
const fileStates =
|
|||
|
|
state.fileStates instanceof Map
|
|||
|
|
? state.fileStates
|
|||
|
|
: new Map(
|
|||
|
|
Object.entries(
|
|||
|
|
(state.fileStates ?? {}) as Record<string, FileAttributionState>,
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
for (const [path, fileState] of fileStates) {
|
|||
|
|
const existing = mergedFileStates.get(path)
|
|||
|
|
if (existing) {
|
|||
|
|
mergedFileStates.set(path, {
|
|||
|
|
...fileState,
|
|||
|
|
claudeContribution:
|
|||
|
|
existing.claudeContribution + fileState.claudeContribution,
|
|||
|
|
})
|
|||
|
|
} else {
|
|||
|
|
mergedFileStates.set(path, fileState)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Process files in parallel
|
|||
|
|
const fileResults = await Promise.all(
|
|||
|
|
stagedFiles.map(async file => {
|
|||
|
|
// Skip generated files
|
|||
|
|
if (isGeneratedFile(file)) {
|
|||
|
|
return { type: 'generated' as const, file }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const absPath = join(cwd, file)
|
|||
|
|
const fileState = mergedFileStates.get(file)
|
|||
|
|
const baseline = mergedBaselines.get(file)
|
|||
|
|
|
|||
|
|
// Get the surface for this file
|
|||
|
|
const fileSurface = states[0]!.surface
|
|||
|
|
|
|||
|
|
let claudeChars = 0
|
|||
|
|
let humanChars = 0
|
|||
|
|
|
|||
|
|
// Check if file was deleted
|
|||
|
|
const deleted = await isFileDeleted(file)
|
|||
|
|
|
|||
|
|
if (deleted) {
|
|||
|
|
// File was deleted
|
|||
|
|
if (fileState) {
|
|||
|
|
// Claude deleted this file (tracked deletion)
|
|||
|
|
claudeChars = fileState.claudeContribution
|
|||
|
|
humanChars = 0
|
|||
|
|
} else {
|
|||
|
|
// Human deleted this file (untracked deletion)
|
|||
|
|
// Use diff size to get the actual change size
|
|||
|
|
const diffSize = await getGitDiffSize(file)
|
|||
|
|
humanChars = diffSize > 0 ? diffSize : 100 // Minimum attribution for a deletion
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
try {
|
|||
|
|
// Only need file size, not content - stat() avoids loading GB-scale
|
|||
|
|
// build artifacts into memory when they appear in the working tree.
|
|||
|
|
// stats.size (bytes) is an adequate proxy for char count here.
|
|||
|
|
const stats = await stat(absPath)
|
|||
|
|
|
|||
|
|
if (fileState) {
|
|||
|
|
// We have tracked modifications for this file
|
|||
|
|
claudeChars = fileState.claudeContribution
|
|||
|
|
humanChars = 0
|
|||
|
|
} else if (baseline) {
|
|||
|
|
// File was modified but not tracked - human modification
|
|||
|
|
const diffSize = await getGitDiffSize(file)
|
|||
|
|
humanChars = diffSize > 0 ? diffSize : stats.size
|
|||
|
|
} else {
|
|||
|
|
// New file not created by Claude
|
|||
|
|
humanChars = stats.size
|
|||
|
|
}
|
|||
|
|
} catch {
|
|||
|
|
// File doesn't exist or stat failed - skip it
|
|||
|
|
return null
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Ensure non-negative values
|
|||
|
|
claudeChars = Math.max(0, claudeChars)
|
|||
|
|
humanChars = Math.max(0, humanChars)
|
|||
|
|
|
|||
|
|
const total = claudeChars + humanChars
|
|||
|
|
const percent = total > 0 ? Math.round((claudeChars / total) * 100) : 0
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
type: 'file' as const,
|
|||
|
|
file,
|
|||
|
|
claudeChars,
|
|||
|
|
humanChars,
|
|||
|
|
percent,
|
|||
|
|
surface: fileSurface,
|
|||
|
|
}
|
|||
|
|
}),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// Aggregate results
|
|||
|
|
for (const result of fileResults) {
|
|||
|
|
if (!result) continue
|
|||
|
|
|
|||
|
|
if (result.type === 'generated') {
|
|||
|
|
excludedGenerated.push(result.file)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
files[result.file] = {
|
|||
|
|
claudeChars: result.claudeChars,
|
|||
|
|
humanChars: result.humanChars,
|
|||
|
|
percent: result.percent,
|
|||
|
|
surface: result.surface,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
totalClaudeChars += result.claudeChars
|
|||
|
|
totalHumanChars += result.humanChars
|
|||
|
|
|
|||
|
|
surfaceCounts[result.surface] =
|
|||
|
|
(surfaceCounts[result.surface] ?? 0) + result.claudeChars
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const totalChars = totalClaudeChars + totalHumanChars
|
|||
|
|
const claudePercent =
|
|||
|
|
totalChars > 0 ? Math.round((totalClaudeChars / totalChars) * 100) : 0
|
|||
|
|
|
|||
|
|
// Calculate surface breakdown (percentage of total content per surface)
|
|||
|
|
const surfaceBreakdown: Record<
|
|||
|
|
string,
|
|||
|
|
{ claudeChars: number; percent: number }
|
|||
|
|
> = {}
|
|||
|
|
for (const [surface, chars] of Object.entries(surfaceCounts)) {
|
|||
|
|
// Calculate what percentage of TOTAL content this surface contributed
|
|||
|
|
const percent = totalChars > 0 ? Math.round((chars / totalChars) * 100) : 0
|
|||
|
|
surfaceBreakdown[surface] = { claudeChars: chars, percent }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
version: 1,
|
|||
|
|
summary: {
|
|||
|
|
claudePercent,
|
|||
|
|
claudeChars: totalClaudeChars,
|
|||
|
|
humanChars: totalHumanChars,
|
|||
|
|
surfaces: Array.from(surfaces),
|
|||
|
|
},
|
|||
|
|
files,
|
|||
|
|
surfaceBreakdown,
|
|||
|
|
excludedGenerated,
|
|||
|
|
sessions: [sessionId],
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Get the size of changes for a file from git diff.
|
|||
|
|
* Returns the number of characters added/removed (absolute difference).
|
|||
|
|
* For new files, returns the total file size.
|
|||
|
|
* For deleted files, returns the size of the deleted content.
|
|||
|
|
*/
|
|||
|
|
export async function getGitDiffSize(filePath: string): Promise<number> {
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// Use git diff --stat to get a summary of changes
|
|||
|
|
const result = await execFileNoThrowWithCwd(
|
|||
|
|
gitExe(),
|
|||
|
|
['diff', '--cached', '--stat', '--', filePath],
|
|||
|
|
{ cwd, timeout: 5000 },
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if (result.code !== 0 || !result.stdout) {
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Parse the stat output to extract additions and deletions
|
|||
|
|
// Format: " file | 5 ++---" or " file | 10 +"
|
|||
|
|
const lines = result.stdout.split('\n').filter(Boolean)
|
|||
|
|
let totalChanges = 0
|
|||
|
|
|
|||
|
|
for (const line of lines) {
|
|||
|
|
// Skip the summary line (e.g., "1 file changed, 3 insertions(+), 2 deletions(-)")
|
|||
|
|
if (line.includes('file changed') || line.includes('files changed')) {
|
|||
|
|
const insertMatch = line.match(/(\d+) insertions?/)
|
|||
|
|
const deleteMatch = line.match(/(\d+) deletions?/)
|
|||
|
|
|
|||
|
|
// Use line-based changes and approximate chars per line (~40 chars average)
|
|||
|
|
const insertions = insertMatch ? parseInt(insertMatch[1]!, 10) : 0
|
|||
|
|
const deletions = deleteMatch ? parseInt(deleteMatch[1]!, 10) : 0
|
|||
|
|
totalChanges += (insertions + deletions) * 40
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return totalChanges
|
|||
|
|
} catch {
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Check if a file was deleted in the staged changes.
|
|||
|
|
*/
|
|||
|
|
export async function isFileDeleted(filePath: string): Promise<boolean> {
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const result = await execFileNoThrowWithCwd(
|
|||
|
|
gitExe(),
|
|||
|
|
['diff', '--cached', '--name-status', '--', filePath],
|
|||
|
|
{ cwd, timeout: 5000 },
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if (result.code === 0 && result.stdout) {
|
|||
|
|
// Format: "D\tfilename" for deleted files
|
|||
|
|
return result.stdout.trim().startsWith('D\t')
|
|||
|
|
}
|
|||
|
|
} catch {
|
|||
|
|
// Ignore errors
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Get staged files from git.
|
|||
|
|
*/
|
|||
|
|
export async function getStagedFiles(): Promise<string[]> {
|
|||
|
|
const cwd = getAttributionRepoRoot()
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const result = await execFileNoThrowWithCwd(
|
|||
|
|
gitExe(),
|
|||
|
|
['diff', '--cached', '--name-only'],
|
|||
|
|
{ cwd, timeout: 5000 },
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if (result.code === 0 && result.stdout) {
|
|||
|
|
return result.stdout.split('\n').filter(Boolean)
|
|||
|
|
}
|
|||
|
|
} catch (error) {
|
|||
|
|
logError(error as Error)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return []
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// formatAttributionTrailer moved to attributionTrailer.ts for tree-shaking
|
|||
|
|
// (contains excluded strings that should not be in external builds)
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Check if we're in a transient git state (rebase, merge, cherry-pick).
|
|||
|
|
*/
|
|||
|
|
export async function isGitTransientState(): Promise<boolean> {
|
|||
|
|
const gitDir = await resolveGitDir(getAttributionRepoRoot())
|
|||
|
|
if (!gitDir) return false
|
|||
|
|
|
|||
|
|
const indicators = [
|
|||
|
|
'rebase-merge',
|
|||
|
|
'rebase-apply',
|
|||
|
|
'MERGE_HEAD',
|
|||
|
|
'CHERRY_PICK_HEAD',
|
|||
|
|
'BISECT_LOG',
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
const results = await Promise.all(
|
|||
|
|
indicators.map(async indicator => {
|
|||
|
|
try {
|
|||
|
|
await stat(join(gitDir, indicator))
|
|||
|
|
return true
|
|||
|
|
} catch {
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
}),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return results.some(exists => exists)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Convert attribution state to snapshot message for persistence.
|
|||
|
|
*/
|
|||
|
|
export function stateToSnapshotMessage(
|
|||
|
|
state: AttributionState,
|
|||
|
|
messageId: UUID,
|
|||
|
|
): AttributionSnapshotMessage {
|
|||
|
|
const fileStates: Record<string, FileAttributionState> = {}
|
|||
|
|
|
|||
|
|
for (const [path, fileState] of state.fileStates) {
|
|||
|
|
fileStates[path] = fileState
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
type: 'attribution-snapshot',
|
|||
|
|
messageId,
|
|||
|
|
surface: state.surface,
|
|||
|
|
fileStates,
|
|||
|
|
promptCount: state.promptCount,
|
|||
|
|
promptCountAtLastCommit: state.promptCountAtLastCommit,
|
|||
|
|
permissionPromptCount: state.permissionPromptCount,
|
|||
|
|
permissionPromptCountAtLastCommit: state.permissionPromptCountAtLastCommit,
|
|||
|
|
escapeCount: state.escapeCount,
|
|||
|
|
escapeCountAtLastCommit: state.escapeCountAtLastCommit,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Restore attribution state from snapshot messages.
|
|||
|
|
*/
|
|||
|
|
export function restoreAttributionStateFromSnapshots(
|
|||
|
|
snapshots: AttributionSnapshotMessage[],
|
|||
|
|
): AttributionState {
|
|||
|
|
const state = createEmptyAttributionState()
|
|||
|
|
|
|||
|
|
// Snapshots are full-state dumps (see stateToSnapshotMessage), not deltas.
|
|||
|
|
// The last snapshot has the most recent count for every path — fileStates
|
|||
|
|
// never shrinks. Iterating and SUMMING counts across snapshots causes
|
|||
|
|
// quadratic growth on restore (837 snapshots × 280 files → 1.15 quadrillion
|
|||
|
|
// "chars" tracked for a 5KB file over a 5-day session).
|
|||
|
|
const lastSnapshot = snapshots[snapshots.length - 1]
|
|||
|
|
if (!lastSnapshot) {
|
|||
|
|
return state
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
state.surface = lastSnapshot.surface
|
|||
|
|
for (const [path, fileState] of Object.entries(lastSnapshot.fileStates)) {
|
|||
|
|
state.fileStates.set(path, fileState)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Restore prompt counts from the last snapshot (most recent state)
|
|||
|
|
state.promptCount = lastSnapshot.promptCount ?? 0
|
|||
|
|
state.promptCountAtLastCommit = lastSnapshot.promptCountAtLastCommit ?? 0
|
|||
|
|
state.permissionPromptCount = lastSnapshot.permissionPromptCount ?? 0
|
|||
|
|
state.permissionPromptCountAtLastCommit =
|
|||
|
|
lastSnapshot.permissionPromptCountAtLastCommit ?? 0
|
|||
|
|
state.escapeCount = lastSnapshot.escapeCount ?? 0
|
|||
|
|
state.escapeCountAtLastCommit = lastSnapshot.escapeCountAtLastCommit ?? 0
|
|||
|
|
|
|||
|
|
return state
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Restore attribution state from log snapshots on session resume.
|
|||
|
|
*/
|
|||
|
|
export function attributionRestoreStateFromLog(
|
|||
|
|
attributionSnapshots: AttributionSnapshotMessage[],
|
|||
|
|
onUpdateState: (newState: AttributionState) => void,
|
|||
|
|
): void {
|
|||
|
|
const state = restoreAttributionStateFromSnapshots(attributionSnapshots)
|
|||
|
|
onUpdateState(state)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Increment promptCount and save an attribution snapshot.
|
|||
|
|
* Used to persist the prompt count across compaction.
|
|||
|
|
*
|
|||
|
|
* @param attribution - Current attribution state
|
|||
|
|
* @param saveSnapshot - Function to save the snapshot (allows async handling by caller)
|
|||
|
|
* @returns New attribution state with incremented promptCount
|
|||
|
|
*/
|
|||
|
|
export function incrementPromptCount(
|
|||
|
|
attribution: AttributionState,
|
|||
|
|
saveSnapshot: (snapshot: AttributionSnapshotMessage) => void,
|
|||
|
|
): AttributionState {
|
|||
|
|
const newAttribution = {
|
|||
|
|
...attribution,
|
|||
|
|
promptCount: attribution.promptCount + 1,
|
|||
|
|
}
|
|||
|
|
const snapshot = stateToSnapshotMessage(newAttribution, randomUUID())
|
|||
|
|
saveSnapshot(snapshot)
|
|||
|
|
return newAttribution
|
|||
|
|
}
|