Commit 3e8678f7 authored by Mitchell Hashimoto's avatar Mitchell Hashimoto

builder/amazonebs: retry SSH handshakes [GH-130]

parent 019ab13f
......@@ -22,6 +22,7 @@ BUG FIXES:
* core: Non-200 response codes on downloads now show proper errors.
* amazon-ebs: SSH handshake is retried. [GH-130]
* vagrant: The `BuildName` template propery works properly in
the output path.
* vagrant: Properly configure the provider-specific post-processors so
......@@ -14,10 +14,64 @@ import (
type stepConnectSSH struct {
cancel bool
conn net.Conn
func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction {
config := state["config"].(config)
ui := state["ui"].(packer.Ui)
var comm packer.Communicator
var err error
waitDone := make(chan bool, 1)
go func() {
comm, err = s.waitForSSH(state)
waitDone <- true
log.Printf("Waiting for SSH, up to timeout: %s", config.SSHTimeout.String())
timeout := time.After(config.SSHTimeout)
for {
// Wait for either SSH to become available, a timeout to occur,
// or an interrupt to come through.
select {
case <-waitDone:
if err != nil {
ui.Error(fmt.Sprintf("Error waiting for SSH: %s", err))
return multistep.ActionHalt
state["communicator"] = comm
break WaitLoop
case <-timeout:
ui.Error("Timeout waiting for SSH.")
s.cancel = true
return multistep.ActionHalt
case <-time.After(1 * time.Second):
if _, ok := state[multistep.StateCancelled]; ok {
log.Println("Interrupt detected, quitting waiting for SSH.")
return multistep.ActionHalt
return multistep.ActionContinue
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
if s.conn != nil {
s.conn = nil
// This blocks until SSH becomes available, and sends the communicator
// on the given channel.
func (s *stepConnectSSH) waitForSSH(state map[string]interface{}) (packer.Communicator, error) {
config := state["config"].(config)
instance := state["instance"].(*ec2.Instance)
privateKey := state["privateKey"].(string)
......@@ -28,10 +82,33 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
keyring := &ssh.SimpleKeychain{}
err := keyring.AddPEMKey(privateKey)
if err != nil {
err := fmt.Errorf("Error setting up SSH config: %s", err)
state["error"] = err
return multistep.ActionHalt
return nil, fmt.Errorf("Error setting up SSH config: %s", err)
ui.Say("Waiting for SSH to become available...")
var comm packer.Communicator
var nc net.Conn
for {
if nc != nil {
time.Sleep(5 * time.Second)
if s.cancel {
log.Println("SSH wait cancelled. Exiting loop.")
return nil, errors.New("SSH wait cancelled")
// Attempt to connect to SSH port
"Opening TCP conn for SSH to %s:%d",
instance.DNSName, config.SSHPort)
nc, err := net.Dial("tcp",
fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
if err != nil {
log.Printf("TCP connection to SSH ip/port failed: %s", err)
// Build the actual SSH client configuration
......@@ -42,84 +119,33 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
// Start trying to connect to SSH
connected := make(chan bool, 1)
connectQuit := make(chan bool, 1)
defer func() {
connectQuit <- true
sshConnectSuccess := make(chan bool, 1)
go func() {
var err error
ui.Say("Connecting to the instance via SSH...")
attempts := 0
for {
select {
case <-connectQuit:
comm, err = ssh.New(nc, sshConfig)
if err != nil {
log.Printf("SSH connection fail: %s", err)
sshConnectSuccess <- false
attempts += 1
"Opening TCP conn for SSH to %s:%d (attempt %d)",
instance.DNSName, config.SSHPort, attempts)
s.conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
if err == nil {
// A brief sleep so we're not being overly zealous attempting
// to connect to the instance.
time.Sleep(500 * time.Millisecond)
connected <- true
sshConnectSuccess <- true
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
timeout := time.After(config.SSHTimeout)
for {
select {
case <-connected:
// We connected. Just break the loop.
break ConnectWaitLoop
case <-timeout:
err := errors.New("Timeout waiting for SSH to become available.")
state["error"] = err
return multistep.ActionHalt
case <-time.After(1 * time.Second):
if _, ok := state[multistep.StateCancelled]; ok {
log.Println("Interrupt detected, quitting waiting for SSH.")
return multistep.ActionHalt
case success := <-sshConnectSuccess:
if !success {
case <-time.After(5 * time.Second):
log.Printf("SSH handshake timeout. Trying again.")
var comm packer.Communicator
if err == nil {
comm, err = ssh.New(s.conn, sshConfig)
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
return multistep.ActionHalt
ui.Say("Connected via SSH!")
// Set the communicator on the state bag so it can be used later
state["communicator"] = comm
return multistep.ActionContinue
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
if s.conn != nil {
// Store the connection so we can close it later
s.conn = nc
return comm, nil
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment