mitchellh / goamz

Golang Amazon Library

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

S3 Multi from STDIN

mrrooijen opened this issue · comments

Is it possible to stream directly from STDIN to an S3 bucket? I've tried the following:

package main

import (
    "github.com/mitchellh/goamz/aws"
    "github.com/mitchellh/goamz/s3"
    "log"
    "os"
)

func main() {
    auth := aws.Auth{
        AccessKey: "...",
        SecretKey: "...",
    }
    region := aws.USEast
    conn := s3.New(auth, region)
    bucket := conn.Bucket("...")
    err := bucket.PutBucket(s3.Private)
    if err != nil {
        log.Fatal(err)
    }

    multi, err := bucket.Multi("/data.zip", "application/zip", s3.Private)
    if err != nil {
        log.Fatal(err)
    }

    parts, err = multi.PutAll(os.Stdin, 1024*1024*5)
    if err != nil {
        log.Fatal(err)
    }

    err = multi.Complete(parts)
    if err != nil {
        log.Fatal(err)
    }
}

But receive the following error at multi.PutAll:

seek /dev/stdin: illegal seek

When I swap out os.Stdin for a file that I've opened with os.Open() it doesn't return an error.

Any help is much appreciated.

Ended up solving it this way. Basically just accumulate 5mb worth of data from os.Stdin and then write it to S3 using PutPart, and repeat until there's nothing left to write (the last part likely not being 5mb).

Uses around 18mb ram, was hoping for around half of that. Not sure if it's possible to further reduce it.

package main

import (
    "bytes"
    "fmt"
    "github.com/mitchellh/goamz/aws"
    "github.com/mitchellh/goamz/s3"
    "io"
    "log"
    "os"
)

func main() {
    if len(os.Args) < 2 {
        fmt.Fprintln(os.Stderr, "Must provide the path to store the data in.")
        os.Exit(2)
    }

    stat, _ := os.Stdin.Stat()
    if (stat.Mode() & os.ModeCharDevice) != 0 {
        fmt.Fprintln(os.Stderr, "Provide data to store through STDIN.")
        os.Exit(2)
    }

    path := os.Args[1]
    auth := aws.Auth{
        AccessKey: "...",
        SecretKey: "...",
    }
    conn := s3.New(auth, aws.USEast)
    bucket := conn.Bucket("...")
    if err := bucket.PutBucket(s3.Private); err != nil {
        log.Fatal(err)
    }

    multi, err := bucket.Multi(path, "application/octet-stream", s3.Private)
    if err != nil {
        log.Fatal(err)
    }

    var (
        parts       = make([]s3.Part, 0)
        partSize    = 1024 * 1024 * 5
        stdinBuffer = make([]byte, 1024)
        partBuffer  bytes.Buffer
        sendPart    = initPartSender(&partBuffer, multi)
    )

    for {
        n, err := os.Stdin.Read(stdinBuffer)
        if err != nil && err != io.EOF {
            log.Fatal("Couldn't read STDIN", err)
        }

        if n == 0 {
            if partBuffer.Len() > 0 {
                if part, err := sendPart(); err != nil {
                    log.Fatal(err)
                } else {
                    parts = append(parts, part)
                }
            }

            break
        } else {
            combined := partBuffer.Len() + n
            if combined > partSize {
                needed := partSize - partBuffer.Len()
                partBuffer.Write(stdinBuffer[:needed])
                if part, err := sendPart(); err != nil {
                    log.Fatal(err)
                } else {
                    parts = append(parts, part)
                }
                partBuffer.Write(stdinBuffer[needed:n])
            } else {
                partBuffer.Write(stdinBuffer[:n])
            }
        }
    }

    err = multi.Complete(parts)
    if err != nil {
        log.Fatal(err)
    }
}

func initPartSender(
    partBuffer *bytes.Buffer, multi *s3.Multi) func() (s3.Part, error) {

    var partNr int

    return func() (s3.Part, error) {
        partNr++
        defer partBuffer.Reset()
        reader := bytes.NewReader(partBuffer.Bytes())
        return multi.PutPart(partNr, reader)
    }
}