207 lines
7.3 KiB
Go
207 lines
7.3 KiB
Go
package session
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/warrenronsiek/ctask/internal/lockfile"
|
|
"github.com/warrenronsiek/ctask/internal/shell"
|
|
"github.com/warrenronsiek/ctask/internal/workspace"
|
|
)
|
|
|
|
// adoptAttacher and adoptPoll are test seams that *wrap* shell primitives
|
|
// rather than hand-rolling tmux invocations. Production calls go through
|
|
// shell.AttachSession / shell.PollSessionEnd. Tests override these
|
|
// variables; do not run such tests with t.Parallel().
|
|
var (
|
|
adoptAttacher = shell.AttachSession
|
|
adoptPoll = shell.PollSessionEnd
|
|
)
|
|
|
|
// AdoptExistingPersistentSession is the adopted-reattach path. It is
|
|
// invoked when a tmux session for the workspace exists but the lease is
|
|
// missing, stale, or fresh-but-from-another-host (the cmd-layer dispatcher
|
|
// has already prompted for confirmation in the fresh_remote case before
|
|
// calling here — see cmd/persistent.go::confirmFreshRemoteAdoption).
|
|
//
|
|
// Eight-step flow (see v0.5.3-spec.md §3 path C):
|
|
// 1. Print one diagnostic line announcing the adoption.
|
|
// 2. Acquire write lock; under lock, re-read lease state. If now
|
|
// fresh-local (concurrent adopter raced ahead), release the lock and
|
|
// fall through to AttachExisting — the race winner owns the lease.
|
|
// Otherwise, remove the orphaned lease, write a new lease for this
|
|
// process, and bump task.yaml.UpdatedAt.
|
|
// 3. Capture a fresh start manifest. Without it, finalize has no
|
|
// reliable diff baseline.
|
|
// 4. Start the heartbeat goroutine.
|
|
// 5. shell.AttachSession (returns nil on clean exit, error on non-zero).
|
|
// 6. Polling loop until tmux reports session gone — runs ONLY after a
|
|
// successful attach.
|
|
// 7. Stop the heartbeat goroutine.
|
|
// 8. finalize with SessionOwnership="adopted" and AdoptedFromOrphanAt set.
|
|
//
|
|
// On attach failure (step 5 returns error), steps 6-8 are skipped and the
|
|
// error is returned — the user sees the underlying tmux failure.
|
|
func AdoptExistingPersistentSession(tmuxPath, sessionName, wsDir string, opts LaunchOpts) error {
|
|
fmt.Fprintln(os.Stderr,
|
|
"[ctask] adopting orphaned persistent session (previous owner exited without finalizing)")
|
|
|
|
startTime := time.Now().UTC().Truncate(time.Second)
|
|
adoptedAt := startTime
|
|
leasePath := LeasePath(wsDir)
|
|
|
|
var raced bool
|
|
skipped, lockErr := lockfile.WithLock(
|
|
ctaskWriteLockPath(wsDir),
|
|
sessionWriteLockTimeout, sessionWriteLockStaleAfter,
|
|
func() error {
|
|
// Re-check under lock. If a concurrent adopter beat us to it,
|
|
// fall through to passive reattach. No lease writes, no
|
|
// task.yaml.UpdatedAt bump on this branch.
|
|
if InspectLease(wsDir) == LeaseStateFreshLocal {
|
|
raced = true
|
|
return nil
|
|
}
|
|
if _, rmErr := CleanupStaleLease(leasePath, StaleLeaseAfter); rmErr != nil {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] warning: failed to remove orphaned lease: %v\n", rmErr)
|
|
}
|
|
lease := NewLease(startTime, opts.Agent, opts.Mode)
|
|
if err := WriteLease(leasePath, lease); err != nil {
|
|
return fmt.Errorf("writing lease: %w", err)
|
|
}
|
|
// Adoption-only: bump task.yaml.UpdatedAt. Passive reattach
|
|
// (and the race fall-through above) leave it untouched.
|
|
metaPath := filepath.Join(wsDir, "task.yaml")
|
|
if meta, err := workspace.ReadMeta(metaPath); err == nil && meta != nil {
|
|
meta.UpdatedAt = startTime
|
|
if err := workspace.WriteMeta(metaPath, meta); err != nil {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] warning: failed to update task.yaml on adoption: %v\n", err)
|
|
}
|
|
}
|
|
return nil
|
|
},
|
|
)
|
|
if skipped {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] Warning: could not acquire metadata lock; falling through to passive reattach\n")
|
|
return AttachExisting(tmuxPath, sessionName)
|
|
}
|
|
if lockErr != nil {
|
|
return fmt.Errorf("adoption lease write: %w", lockErr)
|
|
}
|
|
if raced {
|
|
return AttachExisting(tmuxPath, sessionName)
|
|
}
|
|
|
|
// Step 3: fresh start manifest — load-bearing for finalize's diff baseline.
|
|
startManifest, err := CaptureManifest(wsDir)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] warning: failed to capture start manifest during adoption: %v; finalize diff will be skipped\n", err)
|
|
} else {
|
|
mPath := manifestStartPath(wsDir)
|
|
if _, lockErr := lockfile.WithLock(
|
|
ctaskWriteLockPath(wsDir),
|
|
sessionWriteLockTimeout, sessionWriteLockStaleAfter,
|
|
func() error { return WriteManifest(mPath, startManifest) },
|
|
); lockErr != nil {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] warning: failed to write start manifest: %v\n", lockErr)
|
|
startManifest = nil
|
|
}
|
|
}
|
|
|
|
// Step 4: heartbeat.
|
|
hb := StartHeartbeat(leasePath, HeartbeatInterval)
|
|
|
|
// Step 5: attach. Non-zero exit is a real failure; surface it
|
|
// and skip steps 6-8 (polling and finalize). Stop the heartbeat first
|
|
// so we don't leak the goroutine.
|
|
if attachErr := adoptAttacher(tmuxPath, sessionName); attachErr != nil {
|
|
hb.Stop()
|
|
return attachErr
|
|
}
|
|
|
|
// Step 6: poll until session ends.
|
|
adoptPoll(tmuxPath, sessionName, shell.PollInterval)
|
|
|
|
// Step 7: stop heartbeat.
|
|
hb.Stop()
|
|
|
|
// Step 8: finalize with adoption fields.
|
|
endTime := time.Now().UTC().Truncate(time.Second)
|
|
if startManifest != nil {
|
|
if err := finalizeAdopted(opts, wsDir, startManifest, startTime, endTime, adoptedAt); err != nil {
|
|
fmt.Fprintf(os.Stderr, "[ctask] warning: adoption finalize failed: %v\n", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// finalizeAdopted is the adoption-specific finalize path. Mirrors
|
|
// session.Run's finalize() but stamps SessionOwnership = "adopted" and
|
|
// AdoptedFromOrphanAt onto the summary.
|
|
func finalizeAdopted(opts LaunchOpts, wsDir string, startManifest *Manifest, startTime, endTime, adoptedAt time.Time) error {
|
|
endManifest, err := CaptureManifest(wsDir)
|
|
if err != nil {
|
|
return fmt.Errorf("capturing end manifest: %w", err)
|
|
}
|
|
diff := DiffManifests(startManifest, endManifest)
|
|
|
|
agent := opts.Agent
|
|
if opts.Shell {
|
|
agent = "shell"
|
|
}
|
|
info := &SessionInfo{
|
|
Agent: agent,
|
|
Mode: opts.Mode,
|
|
StartTime: startTime,
|
|
EndTime: endTime,
|
|
Diff: diff,
|
|
}
|
|
|
|
sessionID := NewSessionID(currentHostname(), os.Getpid(), startTime)
|
|
if l, err := ReadLease(LeasePath(wsDir)); err == nil && l != nil {
|
|
sessionID = l.SessionID
|
|
}
|
|
|
|
summary := SummarizeFromDiff(
|
|
sessionID, currentHostname(), agent, opts.Mode,
|
|
startTime, endTime, diff, endManifest,
|
|
)
|
|
summary.EndReason = "tmux_session_ended"
|
|
summary.DetectedVia = "polling"
|
|
summary.SessionOwnership = "adopted"
|
|
summary.AdoptedFromOrphanAt = &adoptedAt
|
|
|
|
skipped, lockErr := lockfile.WithLock(
|
|
ctaskWriteLockPath(wsDir),
|
|
sessionWriteLockTimeout, sessionWriteLockStaleAfter,
|
|
func() error {
|
|
if err := AppendSessionLog(wsDir, info); err != nil {
|
|
fmt.Fprintf(os.Stderr, "[ctask] warning: append session log failed: %v\n", err)
|
|
}
|
|
if err := WriteSummary(SummaryPath(wsDir), summary); err != nil {
|
|
return fmt.Errorf("write summary: %w", err)
|
|
}
|
|
if rmErr := os.Remove(LeasePath(wsDir)); rmErr != nil && !os.IsNotExist(rmErr) {
|
|
fmt.Fprintf(os.Stderr, "[ctask] warning: could not remove lease: %v\n", rmErr)
|
|
}
|
|
if rmErr := os.Remove(manifestStartPath(wsDir)); rmErr != nil && !os.IsNotExist(rmErr) {
|
|
fmt.Fprintf(os.Stderr, "[ctask] warning: could not remove start manifest: %v\n", rmErr)
|
|
}
|
|
return nil
|
|
},
|
|
)
|
|
if skipped {
|
|
fmt.Fprintf(os.Stderr,
|
|
"[ctask] Warning: could not acquire metadata lock at adoption finalize; skipping summary write\n")
|
|
return nil
|
|
}
|
|
return lockErr
|
|
}
|