extract mirror size from rsync provider automatically

This commit is contained in:
zyx 2019-04-13 01:27:35 +08:00
parent a283328dc4
commit d0deeb19a9
9 changed files with 89 additions and 2 deletions

View File

@ -8,6 +8,7 @@ import (
"errors"
"io/ioutil"
"net/http"
"regexp"
"time"
)
@ -84,3 +85,14 @@ func GetJSON(url string, obj interface{}, client *http.Client) (*http.Response,
}
return resp, json.Unmarshal(body, obj)
}
func ExtractSizeFromRsyncLog(content []byte) string {
// (?m) flag enables multi-line mode
re := regexp.MustCompile(`(?m)^Total file size: ([0-9\.]+[KMGTP]?) bytes`)
matches := re.FindAllSubmatch(content, -1)
// fmt.Printf("%q\n", matches)
if len(matches) == 0 {
return ""
}
return string(matches[len(matches)-1][1])
}

32
internal/util_test.go Normal file
View File

@ -0,0 +1,32 @@
package internal
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
)
func TestExtractSizeFromRsyncLog(t *testing.T) {
realLogContent := `
Number of files: 998,470 (reg: 925,484, dir: 58,892, link: 14,094)
Number of created files: 1,049 (reg: 1,049)
Number of deleted files: 1,277 (reg: 1,277)
Number of regular files transferred: 5,694
Total file size: 1.33T bytes
Total transferred file size: 2.86G bytes
Literal data: 780.62M bytes
Matched data: 2.08G bytes
File list size: 37.55M
File list generation time: 7.845 seconds
File list transfer time: 0.000 seconds
Total bytes sent: 7.55M
Total bytes received: 823.25M
sent 7.55M bytes received 823.25M bytes 5.11M bytes/sec
total size is 1.33T speedup is 1,604.11
`
Convey("Log parser should work", t, func() {
res := ExtractSizeFromRsyncLog([]byte(realLogContent))
So(res, ShouldEqual, "1.33T")
})
}

View File

@ -161,3 +161,7 @@ func (p *baseProvider) Terminate() error {
return err
}
func (p *baseProvider) DataSize() string {
return ""
}

View File

@ -53,6 +53,7 @@ type mirrorJob struct {
ctrlChan chan ctrlAction
disabled chan empty
state uint32
size string
}
func newMirrorJob(provider mirrorProvider) *mirrorJob {
@ -182,6 +183,7 @@ func (m *mirrorJob) Run(managerChan chan<- jobMessage, semaphore chan empty) err
if syncErr == nil {
// syncing success
logger.Noticef("succeeded syncing %s", m.Name())
m.size = provider.DataSize()
managerChan <- jobMessage{tunasync.Success, m.Name(), "", (m.State() == stateReady)}
// post-success hooks
err := runHooks(rHooks, func(h jobHook) error { return h.postSuccess() }, "post-success")

View File

@ -50,6 +50,7 @@ type mirrorProvider interface {
LogDir() string
LogFile() string
IsMaster() bool
DataSize() string
// enter context
EnterContext() *Context

View File

@ -73,6 +73,7 @@ func TestRsyncProvider(t *testing.T) {
echo "syncing to $(pwd)"
echo $RSYNC_PASSWORD $@
sleep 1
echo "Total file size: 1.33T bytes"
echo "Done"
exit 0
`
@ -83,6 +84,7 @@ exit 0
expectedOutput := fmt.Sprintf(
"syncing to %s\n"+
"%s\n"+
"Total file size: 1.33T bytes\n"+
"Done\n",
targetDir,
fmt.Sprintf(
@ -99,6 +101,7 @@ exit 0
So(err, ShouldBeNil)
So(string(loggedContent), ShouldEqual, expectedOutput)
// fmt.Println(string(loggedContent))
So(provider.DataSize(), ShouldEqual, "1.33T")
})
})

View File

@ -2,8 +2,11 @@ package worker
import (
"errors"
"io/ioutil"
"strings"
"time"
"github.com/tuna/tunasync/internal"
)
type rsyncConfig struct {
@ -19,7 +22,8 @@ type rsyncConfig struct {
type rsyncProvider struct {
baseProvider
rsyncConfig
options []string
options []string
dataSize string
}
func newRsyncProvider(c rsyncConfig) (*rsyncProvider, error) {
@ -73,11 +77,22 @@ func (p *rsyncProvider) Upstream() string {
return p.upstreamURL
}
func (p *rsyncProvider) DataSize() string {
return p.dataSize
}
func (p *rsyncProvider) Run() error {
p.dataSize = ""
if err := p.Start(); err != nil {
return err
}
return p.Wait()
if err := p.Wait(); err != nil {
return err
}
if logContent, err := ioutil.ReadFile(p.LogFile()); err == nil {
p.dataSize = internal.ExtractSizeFromRsyncLog(logContent)
}
return nil
}
func (p *rsyncProvider) Start() error {

View File

@ -3,8 +3,11 @@ package worker
import (
"errors"
"fmt"
"io/ioutil"
"strings"
"time"
"github.com/tuna/tunasync/internal"
)
type twoStageRsyncConfig struct {
@ -23,6 +26,7 @@ type twoStageRsyncProvider struct {
twoStageRsyncConfig
stage1Options []string
stage2Options []string
dataSize string
}
var rsyncStage1Profiles = map[string]([]string){
@ -78,6 +82,10 @@ func (p *twoStageRsyncProvider) Upstream() string {
return p.upstreamURL
}
func (p *twoStageRsyncProvider) DataSize() string {
return p.dataSize
}
func (p *twoStageRsyncProvider) Options(stage int) ([]string, error) {
var options []string
if stage == 1 {
@ -123,6 +131,7 @@ func (p *twoStageRsyncProvider) Run() error {
env["RSYNC_PASSWORD"] = p.password
}
p.dataSize = ""
stages := []int{1, 2}
for _, stage := range stages {
command := []string{p.rsyncCmd}
@ -151,5 +160,8 @@ func (p *twoStageRsyncProvider) Run() error {
return err
}
}
if logContent, err := ioutil.ReadFile(p.LogFile()); err == nil {
p.dataSize = internal.ExtractSizeFromRsyncLog(logContent)
}
return nil
}

View File

@ -416,6 +416,12 @@ func (w *Worker) updateStatus(job *mirrorJob, jobMsg jobMessage) {
ErrorMsg: jobMsg.msg,
}
// Certain Providers (rsync for example) may know the size of mirror,
// so we report it to Manager here
if len(job.size) != 0 {
smsg.Size = job.size
}
for _, root := range w.cfg.Manager.APIBaseList() {
url := fmt.Sprintf(
"%s/workers/%s/jobs/%s", root, w.Name(), jobMsg.name,