added ZSTD Layer

master
cheetah 1 year ago
parent d1bddfd037
commit 0b00bac7df

3
.gitignore vendored

@ -5,4 +5,5 @@ temp/
main main
storageserver/storageserver storageserver/storageserver
gmad_linux gmad_linux
.vscode/ .vscode/
storageserver/test/

@ -3,7 +3,6 @@ package common
import ( import (
"fmt" "fmt"
"io" "io"
"io/ioutil"
"mime/multipart" "mime/multipart"
"net/http" "net/http"
"os" "os"
@ -32,13 +31,14 @@ type DB_GMA struct {
} }
type DB_File struct { type DB_File struct {
ID string `json:"_key"` ID string `json:"_key"`
BatchID string `json:"batch"` BatchID string `json:"batch"`
InitialPath string `json:"initialPath"` InitialPath string `json:"initialPath"`
Extension string `json:"extension"` Extension string `json:"extension"`
Size int64 `json:"size"` Created time.Time `json:"created"`
CRC uint32 `json:"crc"` Size int64 `json:"size"`
Hash string `json:"hash"` CRC uint32 `json:"crc"`
Hash string `json:"hash"`
} }
type DB_GMA2File struct { type DB_GMA2File struct {
@ -62,9 +62,10 @@ type DB_GMA2File struct {
type DB_Chunk struct { type DB_Chunk struct {
ID string `json:"_key"` ID string `json:"_key"`
NotReady bool `json:"notReady"` NotReady bool `json:"notReady"`
Finalized bool `json:"finalized"` Finalized bool `json:"finalized"`
ReadOnly bool `json:"readOnly"` ReadOnly bool `json:"readOnly"`
Created time.Time `json:"created"`
FileCount int `json:"fileCount"` FileCount int `json:"fileCount"`
Size int64 `json:"size"` Size int64 `json:"size"`
@ -77,13 +78,13 @@ type DB_File2Chunk struct {
File string `json:"_from"` File string `json:"_from"`
} }
func MultipartUpload(client *http.Client, url string, path string, jsonBytes []byte) (err error) { func MultipartUpload(client *http.Client, url string, path string, jsonBytes []byte, workerID string) (err error) {
//fmt.Printf("\nMultipartUpload(%s, %s)\n", url, path) //fmt.Printf("\nMultipartUpload(%s, %s)\n", url, path)
file, err := os.Open(path) file, err := os.Open(path)
if err != nil { if err != nil {
return err return err
} }
fileContents, err := ioutil.ReadAll(file) fileContents, err := io.ReadAll(file)
if err != nil { if err != nil {
return err return err
} }
@ -103,6 +104,11 @@ func MultipartUpload(client *http.Client, url string, path string, jsonBytes []b
return return
} }
err = writer.WriteField("worker", fmt.Sprintf("gma-%s", workerID))
if err != nil {
return
}
part, err := writer.CreateFormFile("file", fi.Name()) part, err := writer.CreateFormFile("file", fi.Name())
if err != nil { if err != nil {
return return
@ -142,7 +148,7 @@ func MultipartUpload(client *http.Client, url string, path string, jsonBytes []b
// Discard Response Body, to clean up tcp socket // Discard Response Body, to clean up tcp socket
defer res.Body.Close() defer res.Body.Close()
_, err = io.Copy(ioutil.Discard, res.Body) _, err = io.Copy(io.Discard, res.Body)
if err != nil { if err != nil {
return err return err
} }

@ -0,0 +1,258 @@
package main
import (
"archive/tar"
"context"
"crypto/sha256"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"time"
"git.cheetah.cat/worksucc/gma-puzzles/common"
adriver "github.com/arangodb/go-driver"
ahttp "github.com/arangodb/go-driver/http"
"github.com/klauspost/compress/zstd"
)
var (
arangoDB adriver.Database
arangoCTX context.Context
colChunk adriver.Collection
colFile adriver.Collection
colFile2Chunk adriver.Collection
)
func ConnectDB(baseURL string, arangoUser string, arangoPWD string, arangoDatabase string) (driver adriver.Database, ctx context.Context, err error) {
log.Println("connectDB:", "Starting Connection Process...")
// Retry Loop for Failed Connections
for i := 0; i < 6; i++ {
if i == 5 {
return driver, ctx, fmt.Errorf("connectdb unable to connect to database %d times", i)
} else if i > 0 {
time.Sleep(30 * time.Second)
}
// Connect to ArangoDB URL
conn, err := ahttp.NewConnection(ahttp.ConnectionConfig{
Endpoints: []string{baseURL},
TLSConfig: &tls.Config{ /*...*/ },
})
if err != nil {
log.Println("connectDB:", "Cannot Connect to ArangoDB!", err)
continue
}
// Connect Driver to User
client, err := adriver.NewClient(adriver.ClientConfig{
Connection: conn,
Authentication: adriver.BasicAuthentication(arangoUser, arangoPWD),
})
if err != nil {
log.Println("connectDB:", "Cannot Authenticate ArangoDB User!", err)
continue
}
// Create Context for Database Access
ctx = context.Background()
driver, err = client.Database(ctx, arangoDatabase)
if err != nil {
log.Println("connectDB:", "Cannot Load ArangoDB Database!", err)
continue
}
log.Println("connectDB:", "Connection Sucessful!")
return driver, ctx, nil
}
return driver, ctx, fmt.Errorf("connectDB: FUCK HOW DID THIS EXCUTE?")
}
func InitDatabase() (err error) {
arangoDB, arangoCTX, err = ConnectDB("http://192.168.45.8:8529/", "gma-inator", "gma-inator", "gma-inator")
if err != nil {
return err
}
colChunk, err = arangoDB.Collection(arangoCTX, "chunk")
if err != nil {
return err
}
colFile, err = arangoDB.Collection(arangoCTX, "file")
if err != nil {
return err
}
colFile2Chunk, err = arangoDB.Collection(arangoCTX, "file_chunk_map")
if err != nil {
return err
}
return nil
}
type PoolRecoveryData struct {
PoolID string `json:"_key"`
Size uint64 `json:"size"`
Created time.Time `json:"date"`
Hash string `json:"hash"`
ItemCount int `json:"itemCount"`
Items []string `json:"items"`
RecoveryData []common.DB_File `json:"recoveryData"`
}
func bla() error {
entries, err := os.ReadDir("/mnt/SC9000/storagePools/")
if err != nil {
return err
}
for _, e := range entries {
if !e.IsDir() {
fmt.Printf("Scanning For Local Pools, found %s:", e.Name())
tarFinalPath := filepath.Join("/mnt/SC9000/storagePools/", e.Name())
stats, err := os.Stat(tarFinalPath)
if err != nil {
return err
}
parts := strings.Split(e.Name(), ".")
var chunk common.DB_Chunk
_, err = colChunk.ReadDocument(arangoCTX, parts[0], &chunk)
if err != nil {
return err
}
chunk.Finalized = true
chunk.NotReady = false
chunk.ReadOnly = true
chunk.Size = stats.Size()
zstFile, err := os.Open(tarFinalPath)
if err != nil {
return err
}
shaHasher := sha256.New()
_, err = io.Copy(shaHasher, zstFile)
if err != nil {
return err
}
jsonPath := filepath.Join("/mnt/SC9000/storagePools/", fmt.Sprintf("%s.json", parts[0]))
_, err = os.Stat(jsonPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
// rewrite json from db
zstFile.Seek(0, 0)
decompressor, err := zstd.NewReader(zstFile)
if err != nil {
panic(err)
}
defer decompressor.Close()
items := []string{}
tarReader := tar.NewReader(decompressor)
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
items = append(items, header.Name)
}
poolRecoveryData := PoolRecoveryData{
PoolID: parts[0],
Size: uint64(stats.Size()),
Created: time.Now(),
Hash: fmt.Sprintf("%x", shaHasher.Sum(nil)),
ItemCount: 500,
Items: items,
//RecoveryData,
}
chunk.Hash = poolRecoveryData.Hash
//TODO: fetch RecoveryData from DB
poolRecoveryData.RecoveryData = make([]common.DB_File, len(items))
_, _, err = colFile.ReadDocuments(arangoCTX, items, poolRecoveryData.RecoveryData)
if err != nil {
return fmt.Errorf("error @ReadDocuments %v", err)
}
json, err := json.MarshalIndent(poolRecoveryData, "", "\t")
if err != nil {
return fmt.Errorf("error @json.MarshalIndent %v", err)
}
recoveryFile, err := os.Create(jsonPath)
if err != nil {
return err
}
_, err = recoveryFile.Write(json)
if err != nil {
return fmt.Errorf("error @recoveryFile.Write %v", err)
}
}
} else {
var poolRecoveryData PoolRecoveryData
readJSONFile, err := os.Open(jsonPath)
if err != nil {
return err
}
defer readJSONFile.Close()
readBytes, err := io.ReadAll(readJSONFile)
if err != nil {
return err
}
err = json.Unmarshal(readBytes, &poolRecoveryData)
if err != nil {
return err
}
poolRecoveryData.Size = uint64(stats.Size())
poolRecoveryData.Created = time.Now()
poolRecoveryData.Hash = fmt.Sprintf("%x", shaHasher.Sum(nil))
chunk.Hash = poolRecoveryData.Hash
json, err := json.MarshalIndent(poolRecoveryData, "", "\t")
if err != nil {
return fmt.Errorf("error @json.MarshalIndent %v", err)
}
recoveryFile, err := os.Create(jsonPath)
if err != nil {
return err
}
_, err = recoveryFile.Write(json)
if err != nil {
return fmt.Errorf("error @recoveryFile.Write %v", err)
}
}
_, err = colChunk.UpdateDocument(arangoCTX, parts[0], &chunk)
if err != nil {
return err
}
}
}
return nil
}
func main() {
err := InitDatabase()
if err != nil {
panic(err)
}
err = bla()
if err != nil {
panic(err)
}
}

@ -8,6 +8,7 @@ require (
github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect
github.com/djherbis/times v1.5.0 // indirect github.com/djherbis/times v1.5.0 // indirect
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/klauspost/compress v1.16.6 // indirect
github.com/labstack/echo v3.3.10+incompatible // indirect github.com/labstack/echo v3.3.10+incompatible // indirect
github.com/labstack/echo/v4 v4.10.2 // indirect github.com/labstack/echo/v4 v4.10.2 // indirect
github.com/labstack/gommon v0.4.0 // indirect github.com/labstack/gommon v0.4.0 // indirect

@ -19,6 +19,7 @@ import (
"git.cheetah.cat/worksucc/gma-puzzles/common" "git.cheetah.cat/worksucc/gma-puzzles/common"
"git.cheetah.cat/worksucc/gma-puzzles/gma" "git.cheetah.cat/worksucc/gma-puzzles/gma"
"github.com/arangodb/go-driver"
adriver "github.com/arangodb/go-driver" adriver "github.com/arangodb/go-driver"
ahttp "github.com/arangodb/go-driver/http" ahttp "github.com/arangodb/go-driver/http"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
@ -35,6 +36,8 @@ var (
//colFile2Chunk adriver.Collection //colFile2Chunk adriver.Collection
colGMA adriver.Collection colGMA adriver.Collection
colGMA2File adriver.Collection colGMA2File adriver.Collection
workerID string
) )
func ConnectDB(baseURL string, arangoUser string, arangoPWD string, arangoDatabase string) (driver adriver.Database, ctx context.Context, err error) { func ConnectDB(baseURL string, arangoUser string, arangoPWD string, arangoDatabase string) (driver adriver.Database, ctx context.Context, err error) {
@ -117,12 +120,14 @@ var GlobalWriteLock sync.Mutex
func main() { func main() {
folderPathP := flag.String("path", "/mnt/SC9000/TemporaryTestingShit2", "a string") folderPathP := flag.String("path", "/mnt/SC9000/TemporaryTestingShit2", "a string")
skipNameP := flag.String("skip", "", "skip until this name") skipNameP := flag.String("skip", "", "skip until this name")
workerNameP := flag.String("worker", "def", "worker name")
debug.SetMemoryLimit(6e9) debug.SetMemoryLimit(6e9)
flag.Parse() flag.Parse()
folderPath := *folderPathP folderPath := *folderPathP
skipName := *skipNameP skipName := *skipNameP
skipNameEnabled := len(skipName) > 0 skipNameEnabled := len(skipName) > 0
workerID = *workerNameP
go func() { go func() {
log.Println(http.ListenAndServe("0.0.0.0:6060", nil)) log.Println(http.ListenAndServe("0.0.0.0:6060", nil))
}() }()
@ -191,7 +196,7 @@ func main() {
} }
func undoBatch(undoBatch bool, gmaID string, fileIDs []string, gma2FileIDs []string) (err error) { func undoBatch(undoBatch bool, gmaID string, fileIDs []string, gma2FileIDs []string) (err error) {
log.Printf("undoBatch(%x, %s)\n", undoBatch, gmaID) log.Printf("undoBatch(%v, %s)\n", undoBatch, gmaID)
/* /*
_, err = colGMA.RemoveDocument(arangoCTX, gmaID) _, err = colGMA.RemoveDocument(arangoCTX, gmaID)
if err != nil { if err != nil {
@ -220,7 +225,7 @@ func ProcessGMA(filePath string) (err error) {
fmt.Println("aquired global write lock") fmt.Println("aquired global write lock")
defer unlockOnce.Do(GlobalWriteLock.Unlock) // release anyway defer unlockOnce.Do(GlobalWriteLock.Unlock) // release anyway
defer fmt.Println("unlocking GlobalWriteLock") defer fmt.Println("unlocking GlobalWriteLock")
time.Sleep(5 * time.Second) //time.Sleep(5 * time.Second)
var ( var (
fileIDs []string fileIDs []string
@ -231,6 +236,31 @@ func ProcessGMA(filePath string) (err error) {
dboGMA.OriginalPath = filePath dboGMA.OriginalPath = filePath
dboGMA.ProcessingStart = time.Now() dboGMA.ProcessingStart = time.Now()
cursor, err := arangoDB.Query(arangoCTX, fmt.Sprintf("FOR g IN gma FILTER g.originalPath == '%s' RETURN g", dboGMA.OriginalPath), nil)
if err != nil {
return err
}
defer cursor.Close()
skipHashCheck := false
if cursor.Count() > 0 || cursor.HasMore() {
for {
gma := common.DB_GMA{}
_, err = cursor.ReadDocument(arangoCTX, &gma)
if driver.IsNoMoreDocuments(err) {
break
} else if err != nil {
return err
}
if gma.Success {
return fmt.Errorf("GMA with ID %s was successfull at %v", gma.ID, gma.ProcessingEnd)
} else {
skipHashCheck = true
}
}
}
fileStat, err := os.Stat(filePath) fileStat, err := os.Stat(filePath)
if err != nil { if err != nil {
return err return err
@ -241,6 +271,7 @@ func ProcessGMA(filePath string) (err error) {
if dboGMA.GMASize < 200 { if dboGMA.GMASize < 200 {
return fmt.Errorf("GMA File too small, skipping") return fmt.Errorf("GMA File too small, skipping")
} }
log.Printf("Opening %s\n", filePath) log.Printf("Opening %s\n", filePath)
gmaReader, err := gma.NewReader(filePath) gmaReader, err := gma.NewReader(filePath)
if err != nil { if err != nil {
@ -262,9 +293,15 @@ func ProcessGMA(filePath string) (err error) {
if err != nil { if err != nil {
return err return err
} }
if dboIDExists { if dboIDExists && !skipHashCheck {
return fmt.Errorf("GMA with ID %s exists", dboGMA.ID) return fmt.Errorf("GMA with ID %s exists", dboGMA.ID)
} }
if dboIDExists && skipHashCheck {
_, err = colGMA.RemoveDocument(arangoCTX, dboGMA.ID)
if err != nil {
return err
}
}
header, err := gmaReader.ReadHeader() header, err := gmaReader.ReadHeader()
if err != nil { if err != nil {
@ -444,7 +481,7 @@ func ProcessGMA(filePath string) (err error) {
} }
uploadBar.Describe("Uploading") uploadBar.Describe("Uploading")
err = common.MultipartUpload(httpClient, fmt.Sprintf("http://127.0.0.1:13371/stash/%s/%d", dboGMA2File.UploadID, dboGMA2File.FileSize), dboGMA2File.LocalFileName, fileInfoJSON) err = common.MultipartUpload(httpClient, fmt.Sprintf("http://127.0.0.1:13371/stash/%s/%d", dboGMA2File.UploadID, dboGMA2File.FileSize), dboGMA2File.LocalFileName, fileInfoJSON, workerID)
if err != nil { if err != nil {
log.Println("err @common.MultipartUpload") log.Println("err @common.MultipartUpload")
log.Println(err) log.Println(err)

@ -22,6 +22,7 @@ import (
"git.cheetah.cat/worksucc/gma-puzzles/common" "git.cheetah.cat/worksucc/gma-puzzles/common"
adriver "github.com/arangodb/go-driver" adriver "github.com/arangodb/go-driver"
ahttp "github.com/arangodb/go-driver/http" ahttp "github.com/arangodb/go-driver/http"
"github.com/klauspost/compress/zstd"
"github.com/labstack/echo/v4" "github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware" "github.com/labstack/echo/v4/middleware"
"github.com/twinj/uuid" "github.com/twinj/uuid"
@ -68,7 +69,7 @@ type PoolFile struct {
type PoolMaster struct { type PoolMaster struct {
cachePath string cachePath string
finalPath string finalPath string
CurrentPool *Pool CurrentPool map[string]*Pool
lock sync.Mutex lock sync.Mutex
WORMLock sync.Mutex WORMLock sync.Mutex
@ -175,7 +176,14 @@ func (p *Pool) OpenTar() (err error) {
return err return err
} }
p.items = []string{} p.items = []string{}
p.tarReader = tar.NewReader(p.file)
decompressor, err := zstd.NewReader(p.file, zstd.WithDecoderConcurrency(8))
if err != nil {
panic(err)
}
defer decompressor.Close()
p.tarReader = tar.NewReader(decompressor)
for { for {
header, err := p.tarReader.Next() header, err := p.tarReader.Next()
if err == io.EOF { if err == io.EOF {
@ -229,6 +237,7 @@ func (p *Pool) Unload() {
func NewPoolMaster(finalPath string, cachePath string) (poolMaster PoolMaster, err error) { func NewPoolMaster(finalPath string, cachePath string) (poolMaster PoolMaster, err error) {
poolMaster.finalPath = finalPath poolMaster.finalPath = finalPath
poolMaster.cachePath = cachePath poolMaster.cachePath = cachePath
poolMaster.CurrentPool = make(map[string]*Pool)
poolMaster.WORMPools = make(map[string]*Pool) poolMaster.WORMPools = make(map[string]*Pool)
//poolMaster.lock = sync.Mutex{} //poolMaster.lock = sync.Mutex{}
@ -266,30 +275,33 @@ func (p *PoolMaster) NewPool() (*Pool, error) {
return &pool, nil return &pool, nil
} }
func (p *PoolMaster) GetCurrentWriteablePool() (pool *Pool, err error) { func (p *PoolMaster) GetCurrentWriteablePool(workerID string) (pool *Pool, err error) {
//fmt.Printf("Current Pool %s, ItemCount = %d\n", pool.PoolID, pool.itemCount) //fmt.Printf("Current Pool %s, ItemCount = %d\n", pool.PoolID, pool.itemCount)
if p.CurrentPool != nil && p.CurrentPool.itemCount >= PoolMaxItems { if p.CurrentPool[workerID] != nil && p.CurrentPool[workerID].itemCount >= PoolMaxItems {
fmt.Printf("Aquiring Lock for GetCurrentWriteablepool\n") fmt.Printf("Aquiring Lock for GetCurrentWriteablepool\n")
p.lock.Lock() p.lock.Lock()
defer p.lock.Unlock() defer p.lock.Unlock()
defer fmt.Printf("unlock GetCurrentWriteablepool\n") defer fmt.Printf("unlock GetCurrentWriteablepool\n")
fmt.Printf("Aquired Lock success for GetCurrentWriteablepool\n") fmt.Printf("Aquired Lock success for GetCurrentWriteablepool\n")
p.CurrentPool.ReadOnly = true
p.FullPools = append(p.FullPools, p.CurrentPool) p.CurrentPool[workerID].ReadOnly = true
p.CurrentPool[workerID].LastTouchy = time.Now()
p.FullPools = append(p.FullPools, p.CurrentPool[workerID])
// queue for compression // queue for compression
fmt.Printf("GetCurrentWriteablePool(): current Pool (%s) is full (%d), creating new one\n", p.CurrentPool.PoolID, p.CurrentPool.itemCount) fmt.Printf("GetCurrentWriteablePool(): current Pool (%s) is full (%d), creating new one\n", p.CurrentPool[workerID].PoolID, p.CurrentPool[workerID].itemCount)
p.CurrentPool = nil p.CurrentPool[workerID] = nil
} }
if p.CurrentPool == nil { if p.CurrentPool[workerID] == nil {
fmt.Printf("Creating new Pool") fmt.Printf("Creating new Pool")
p.CurrentPool, err = p.AcquireNewOrRecoverPool() p.CurrentPool[workerID], err = p.AcquireNewOrRecoverPool()
fmt.Printf("... got [%s]\n", p.CurrentPool.PoolID) err := os.WriteFile(filepath.Join(p.cachePath, "pool", fmt.Sprintf("%s.worker", p.CurrentPool[workerID].PoolID)), []byte(workerID), os.ModePerm)
if err != nil { if err != nil {
return pool, err return pool, err
} }
fmt.Printf("... got [%s]\n", p.CurrentPool[workerID].PoolID)
} }
return p.CurrentPool, nil return p.CurrentPool[workerID], nil
} }
func RestorePoolFromFolder(folderPath string) (pool *Pool, err error) { func RestorePoolFromFolder(folderPath string) (pool *Pool, err error) {
pool = &Pool{} pool = &Pool{}
@ -324,7 +336,7 @@ func (p *PoolMaster) ScanForLocalPools() (err error) {
if e.IsDir() { if e.IsDir() {
fmt.Printf("Scanning For Local Pools, found %s:", e.Name()) fmt.Printf("Scanning For Local Pools, found %s:", e.Name())
tarFinalPath := filepath.Join(p.finalPath, fmt.Sprintf("%s.tar", e.Name())) tarFinalPath := filepath.Join(p.finalPath, fmt.Sprintf("%s.tar.zst", e.Name()))
_, err = os.Stat(tarFinalPath) _, err = os.Stat(tarFinalPath)
finalPathExists := false finalPathExists := false
if err != nil { if err != nil {
@ -358,8 +370,27 @@ func (p *PoolMaster) ScanForLocalPools() (err error) {
if err != nil { if err != nil {
return err return err
} }
workerCacheFileName := filepath.Join(p.cachePath, "pool", fmt.Sprintf("%s.worker", e.Name()))
fmt.Printf("is readonly %v itemCount=%d\n", restoredPool.ReadOnly, restoredPool.itemCount) fmt.Printf("is readonly %v itemCount=%d\n", restoredPool.ReadOnly, restoredPool.itemCount)
p.LocalPools = append(p.LocalPools, restoredPool) if restoredPool.itemCount == 500 {
p.LocalPools = append(p.LocalPools, restoredPool)
} else {
_, err = os.Stat(workerCacheFileName) //if we have a worker assingment file
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
// if not exists
p.LocalPools = append(p.LocalPools, restoredPool)
} else {
workerBytes, err := os.ReadFile(workerCacheFileName)
if err != nil {
return err
}
p.CurrentPool[string(workerBytes)] = restoredPool
}
}
} }
} }
return nil return nil
@ -373,6 +404,7 @@ func (p *PoolMaster) ImportPoolPackResult(packResult PoolPackResult) (err error)
Hash: packResult.Hash, Hash: packResult.Hash,
Size: packResult.Size, Size: packResult.Size,
FileCount: packResult.FileCount, FileCount: packResult.FileCount,
Created: time.Now(),
NotReady: true, NotReady: true,
ReadOnly: true, ReadOnly: true,
Finalized: true, Finalized: true,
@ -429,7 +461,7 @@ func (p *PoolMaster) ImportPoolPackResult(packResult PoolPackResult) (err error)
func (p *PoolMaster) MovePoolPackToWORM(packResult PoolPackResult) (err error) { func (p *PoolMaster) MovePoolPackToWORM(packResult PoolPackResult) (err error) {
startTime := time.Now() startTime := time.Now()
targetFileName := filepath.Join(p.finalPath, fmt.Sprintf("%s.tar", packResult.PoolID)) targetFileName := filepath.Join(p.finalPath, fmt.Sprintf("%s.tar.zst", packResult.PoolID))
err = common.MoveFile(packResult.outputFileName, targetFileName) err = common.MoveFile(packResult.outputFileName, targetFileName)
if err != nil { if err != nil {
return err return err
@ -501,19 +533,25 @@ func (p *PoolMaster) PackPool(poolID string) (packResult PoolPackResult, err err
startTime := time.Now() startTime := time.Now()
packResult.PoolID = poolID packResult.PoolID = poolID
packResult.outputFileName = filepath.Join(p.cachePath, "pool", fmt.Sprintf("%s.tar", poolID)) packResult.outputFileName = filepath.Join(p.cachePath, "pool", fmt.Sprintf("%s.tar.zst", poolID))
tarFile, err := os.Create(packResult.outputFileName) tarFile, err := os.Create(packResult.outputFileName)
if err != nil { if err != nil {
return packResult, err return packResult, fmt.Errorf("os.Create: %v", err)
} }
defer tarFile.Close() defer tarFile.Close()
tw := tar.NewWriter(tarFile) compressor, err := zstd.NewWriter(tarFile, zstd.WithEncoderLevel(4))
if err != nil {
return packResult, fmt.Errorf("zstd.NewWriter: %v", err)
}
defer compressor.Close()
tw := tar.NewWriter(compressor)
defer tw.Close() defer tw.Close()
entries, err := os.ReadDir(filepath.Join(p.cachePath, "pool", poolID)) entries, err := os.ReadDir(filepath.Join(p.cachePath, "pool", poolID))
if err != nil { if err != nil {
return packResult, err return packResult, fmt.Errorf("os.ReadDir: %v", err)
} }
//fmt.Printf("len(entries) == %d\n", len(entries)) //fmt.Printf("len(entries) == %d\n", len(entries))
if len(entries) != PoolMaxItems { if len(entries) != PoolMaxItems {
@ -524,13 +562,13 @@ func (p *PoolMaster) PackPool(poolID string) (packResult PoolPackResult, err err
file, err := os.Open(originalPath) file, err := os.Open(originalPath)
if err != nil { if err != nil {
return packResult, err return packResult, fmt.Errorf("for os.Open: %v", err)
} }
defer file.Close() defer file.Close()
info, err := file.Stat() info, err := file.Stat()
if err != nil { if err != nil {
return packResult, err return packResult, fmt.Errorf("for fs.Stat: %v", err)
} }
tarFileHeader, err := tar.FileInfoHeader(info, info.Name()) tarFileHeader, err := tar.FileInfoHeader(info, info.Name())
@ -548,47 +586,88 @@ func (p *PoolMaster) PackPool(poolID string) (packResult PoolPackResult, err err
} }
packResult.FileCount++ packResult.FileCount++
packResult.Files = append(packResult.Files, e.Name()) packResult.Files = append(packResult.Files, e.Name())
fmt.Printf("*")
} }
err = tw.Flush() err = tw.Close()
if err != nil {
return packResult, err
}
err = compressor.Close()
if err != nil { if err != nil {
return packResult, err return packResult, err
} }
err = tarFile.Close() err = tarFile.Close()
if err != nil { if err != nil {
return packResult, err return packResult, err
} }
// re-open and check // re-open and check
{
tarFileCheck, err := os.Open(packResult.outputFileName)
if err != nil {
return packResult, err
}
defer tarFileCheck.Close()
tarFileCheck, err := os.Open(packResult.outputFileName) shaHasher := sha256.New()
if err != nil { hashedBytes, err := io.Copy(shaHasher, tarFileCheck)
return packResult, err if err != nil {
} return packResult, err
defer tarFileCheck.Close() }
packResult.Hash = fmt.Sprintf("%x", shaHasher.Sum(nil))
fmt.Printf("PackPoolTar hash is %s\n", packResult.Hash)
tarFileCheck.Seek(0, 0)
// validate written tar-chunk
decompressor, err := zstd.NewReader(tarFileCheck, zstd.WithDecoderConcurrency(8))
if err != nil {
panic(err)
}
defer decompressor.Close()
tarFileCheckReader := tar.NewReader(decompressor)
//filenamesReadBackList := []string{}
for {
header, err := tarFileCheckReader.Next()
//header.PAXRecords
if err == io.EOF {
break
}
if err != nil {
return packResult, err
}
hasher := sha256.New()
hashedBytes, err := io.Copy(hasher, tarFileCheckReader)
if err != nil {
return packResult, err
}
readBackChecksum := fmt.Sprintf("%x", hasher.Sum(nil))
if hashedBytes != header.Size {
return packResult, fmt.Errorf("validation on output archive, incorrect size file %s has %d should be %d", header.Name, hashedBytes, header.Size)
}
if header.Name != readBackChecksum {
return packResult, fmt.Errorf("validation on output archive, incorrect checksum file %s has %s", header.Name, readBackChecksum)
}
//filenamesReadBackList = append(filenamesReadBackList, header.Name)
}
packFileStats, err := tarFileCheck.Stat()
if err != nil {
return packResult, err
}
packResult.Size = packFileStats.Size()
if hashedBytes != packResult.Size {
return packResult, fmt.Errorf("WORM Copy HashedBytes %d != FileSize %d", hashedBytes, packResult.Size)
}
fmt.Printf("PackPool Duration %dms\n", time.Since(startTime).Milliseconds())
shaHasher := sha256.New()
hashedBytes, err := io.Copy(shaHasher, tarFileCheck)
if err != nil {
return packResult, err
} }
packResult.Hash = fmt.Sprintf("%x", shaHasher.Sum(nil))
fmt.Printf("PackPoolTar hash is %s\n", packResult.Hash)
// TODO: write index json describing the files inside, pure hash list, aswell // TODO: write index json describing the files inside, pure hash list, aswell
// as dictionary containing all the infos for restore/disaster-recovery into Arango // as dictionary containing all the infos for restore/disaster-recovery into Arango
packFileStats, err := tarFileCheck.Stat()
if err != nil {
return packResult, err
}
packResult.Size = packFileStats.Size()
if hashedBytes != packResult.Size {
return packResult, fmt.Errorf("WORM Copy HashedBytes %d != FileSize %d", hashedBytes, packResult.Size)
}
fmt.Printf("PackPool Duration %dms\n", time.Since(startTime).Milliseconds())
return packResult, nil return packResult, nil
} }
@ -603,10 +682,12 @@ func (p *PoolMaster) AcquireNewOrRecoverPool() (pool *Pool, err error) {
} }
func (p *PoolMaster) Lookup(id string) (exists bool) { func (p *PoolMaster) Lookup(id string) (exists bool) {
if p.CurrentPool != nil { // CurrentPool for _, cp := range p.CurrentPool {
for _, poolItem := range p.CurrentPool.items { if cp != nil { // CurrentPool
if poolItem == id { for _, poolItem := range cp.items {
return true if poolItem == id {
return true
}
} }
} }
} }
@ -643,13 +724,15 @@ func (p *PoolMaster) Lookup(id string) (exists bool) {
func (p *PoolMaster) FetchLoadWORM(chunkID string, fileID string, writer io.Writer) (err error) { func (p *PoolMaster) FetchLoadWORM(chunkID string, fileID string, writer io.Writer) (err error) {
fmt.Printf("FetchLoadWORM(chunkID %s, fileID %s, ...)\n", chunkID, fileID) fmt.Printf("FetchLoadWORM(chunkID %s, fileID %s, ...)\n", chunkID, fileID)
// search within loaded worm-pools // search within loaded worm-pools
//fmt.Println("WormPool For Start")
for wormID, wormPool := range p.WORMPools { for wormID, wormPool := range p.WORMPools {
//fmt.Printf("WORMPool[%s] for-iter\n", wormID)
if wormID != chunkID { if wormID != chunkID {
continue continue
} }
for _, poolItem := range wormPool.items { for _, poolItem := range wormPool.items {
if poolItem == fileID { if poolItem == fileID {
fmt.Printf("Fetch WORMPool %s file %s\n", wormID, fileID) //fmt.Printf("Fetch WORMPool %s file %s\n", wormID, fileID)
return wormPool.Fetch(fileID, writer) return wormPool.Fetch(fileID, writer)
} }
} }
@ -660,11 +743,11 @@ func (p *PoolMaster) FetchLoadWORM(chunkID string, fileID string, writer io.Writ
// TODO: every Method here should have locked the WORMLock!! // TODO: every Method here should have locked the WORMLock!!
/* /*
fmt.Printf("Aquiring WORMLock for FetchLoadWORM\n") fmt.Printf("Aquiring WORMLock for FetchLoadWORM\n")
p.WORMLock.Lock() p.WORMLock.Lock()
defer p.WORMLock.Unlock() defer p.WORMLock.Unlock()
defer fmt.Printf("unlock WORMLock FetchLoadWORM\n") 0 defer fmt.Printf("unlock WORMLock FetchLoadWORM\n")
fmt.Printf("Aquired WORMLock success for FetchLoadWORM\n") fmt.Printf("Aquired WORMLock success for FetchLoadWORM\n")
*/ */
fmt.Printf("Aquiring Lock for FetchLoadWORM\n") fmt.Printf("Aquiring Lock for FetchLoadWORM\n")
@ -685,15 +768,17 @@ func (p *PoolMaster) FetchLoadWORM(chunkID string, fileID string, writer io.Writ
Finalized: dboChunk.Finalized, Finalized: dboChunk.Finalized,
LastTouchy: time.Now(), LastTouchy: time.Now(),
filePath: filepath.Join(p.finalPath, fmt.Sprintf("%s.tar", dboChunk.ID)), filePath: filepath.Join(p.finalPath, fmt.Sprintf("%s.tar.zst", dboChunk.ID)),
} }
fmt.Println("initialized loadedWormPool, Opening tar...") fmt.Printf("initialized loadedWormPool (%s), Opening tar...\n", dboChunk.ID)
err = loadedWormPool.OpenTar() err = loadedWormPool.OpenTar()
if err != nil { if err != nil {
fmt.Println(err)
return err return err
} }
fmt.Println("extracted") fmt.Printf("extracted and key = %s\n", loadedWormPool.PoolID)
p.WORMPools[loadedWormPool.PoolID] = &loadedWormPool p.WORMPools[loadedWormPool.PoolID] = &loadedWormPool
fmt.Println(p.WORMPools)
return loadedWormPool.Fetch(fileID, writer) return loadedWormPool.Fetch(fileID, writer)
//return nil //return nil
} }
@ -714,24 +799,26 @@ func (p *PoolMaster) CleanWORMTemp() (err error) {
} }
func (p *PoolMaster) Fetch(id string, writer io.Writer) (err error) { func (p *PoolMaster) Fetch(id string, writer io.Writer) (err error) {
if p.CurrentPool != nil { for _, cp := range p.CurrentPool {
for _, poolItem := range p.CurrentPool.items { if cp != nil {
if poolItem == id { for _, poolItem := range cp.items {
fmt.Printf("Fetch CurrentPool %s\n", id) if poolItem == id {
poolLocalFilePath := filepath.Join(p.cachePath, "pool", p.CurrentPool.PoolID, id) fmt.Printf("Fetch CurrentPool %s\n", id)
//fmt.Println(poolLocalFilePath) poolLocalFilePath := filepath.Join(p.cachePath, "pool", cp.PoolID, id)
//fmt.Printf("%s %s\n", p.CurrentPool.PoolID, poolItem) //fmt.Println(poolLocalFilePath)
srcLocalFile, err := os.Open(poolLocalFilePath) //fmt.Printf("%s %s\n", p.CurrentPool.PoolID, poolItem)
if err != nil { srcLocalFile, err := os.Open(poolLocalFilePath)
return err if err != nil {
} return err
//fmt.Println("Closer") }
defer srcLocalFile.Close() //fmt.Println("Closer")
//fmt.Println("io.Copy") defer srcLocalFile.Close()
if _, err = io.Copy(writer, srcLocalFile); err != nil { //fmt.Println("io.Copy")
return err if _, err = io.Copy(writer, srcLocalFile); err != nil {
return err
}
return nil
} }
return nil
} }
} }
} }
@ -750,6 +837,7 @@ func (p *PoolMaster) Fetch(id string, writer io.Writer) (err error) {
for _, poolItem := range fullPool.items { for _, poolItem := range fullPool.items {
if poolItem == id { if poolItem == id {
fmt.Printf("Fetch FullPool %s\n", id) fmt.Printf("Fetch FullPool %s\n", id)
fullPool.LastTouchy = time.Now()
poolLocalFilePath := filepath.Join(p.cachePath, "pool", fullPool.PoolID, id) poolLocalFilePath := filepath.Join(p.cachePath, "pool", fullPool.PoolID, id)
srcLocalFile, err := os.Open(poolLocalFilePath) srcLocalFile, err := os.Open(poolLocalFilePath)
if err != nil { if err != nil {
@ -800,9 +888,8 @@ func (p *PoolMaster) Fetch(id string, writer io.Writer) (err error) {
} }
return nil return nil
} }
func (p *PoolMaster) Store(id string, src io.Reader, targetSize int64) (err error) { func (p *PoolMaster) Store(id string, workerID string, src io.Reader, targetSize int64) (err error) {
pool, err := p.GetCurrentWriteablePool(workerID)
pool, err := p.GetCurrentWriteablePool()
if err != nil { if err != nil {
return err return err
} }
@ -899,26 +986,34 @@ func main() {
var deletedPools []string var deletedPools []string
for _, fullPool := range poolMaster.FullPools { for _, fullPool := range poolMaster.FullPools {
if time.Since(fullPool.LastTouchy).Minutes() < 1 {
continue
}
packResult, err := poolMaster.PackPool(fullPool.PoolID) packResult, err := poolMaster.PackPool(fullPool.PoolID)
if err != nil { if err != nil {
panic(err) panic(fmt.Errorf("error @PackPool: %v", err))
} }
err = poolMaster.ImportPoolPackResult(packResult) err = poolMaster.ImportPoolPackResult(packResult)
if err != nil { if err != nil {
panic(err) panic(fmt.Errorf("error @ImportPoolPackResult: %v", err))
} }
err = poolMaster.MovePoolPackToWORM(packResult) err = poolMaster.MovePoolPackToWORM(packResult)
if err != nil { if err != nil {
panic(err) panic(fmt.Errorf("error @MovePoolPackToWORM: %v", err))
} }
err = poolMaster.WriteRecoveryFile(packResult, fullPool) err = poolMaster.WriteRecoveryFile(packResult, fullPool)
if err != nil { if err != nil {
panic(err) panic(fmt.Errorf("error @WriteRecoveryFile: %v", err))
} }
os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", fullPool.PoolID)) os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", fullPool.PoolID))
deletedPools = append(deletedPools, fullPool.PoolID) deletedPools = append(deletedPools, fullPool.PoolID)
_, err = colChunk.UpdateDocument(arangoCTX, packResult.PoolID, common.DB_Chunk{ _, err = colChunk.UpdateDocument(arangoCTX, packResult.PoolID, common.DB_Chunk{
NotReady: false, NotReady: false,
Finalized: true,
ReadOnly: true,
Hash: packResult.Hash,
Size: packResult.Size,
}) })
if err != nil { if err != nil {
panic(err) panic(err)
@ -966,6 +1061,10 @@ func main() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
err = poolMaster.WriteRecoveryFile(packResult, localPool)
if err != nil {
panic(err)
}
os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", localPool.PoolID)) os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", localPool.PoolID))
} }
//os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", localPool.PoolID)) //os.RemoveAll(filepath.Join(poolMaster.cachePath, "pool", localPool.PoolID))
@ -981,6 +1080,9 @@ func main() {
e.GET("/", func(c echo.Context) error { e.GET("/", func(c echo.Context) error {
return c.String(http.StatusOK, "Hello, World!") return c.String(http.StatusOK, "Hello, World!")
}) })
e.GET("/pmdump", func(c echo.Context) error {
return c.JSON(http.StatusOK, poolMaster)
})
e.GET("/check/:id", func(c echo.Context) error { e.GET("/check/:id", func(c echo.Context) error {
id := c.Param("id") id := c.Param("id")
@ -1021,6 +1123,7 @@ func main() {
fmt.Println(err) fmt.Println(err)
return c.String(http.StatusExpectationFailed, "Error") return c.String(http.StatusExpectationFailed, "Error")
} }
workerID := c.FormValue("worker")
infoJSON := c.FormValue("info") infoJSON := c.FormValue("info")
fileInfo := common.DB_File{} fileInfo := common.DB_File{}
err = json.Unmarshal([]byte(infoJSON), &fileInfo) err = json.Unmarshal([]byte(infoJSON), &fileInfo)
@ -1061,13 +1164,14 @@ func main() {
return c.String(http.StatusExpectationFailed, "Error") return c.String(http.StatusExpectationFailed, "Error")
} }
defer formStream.Close() defer formStream.Close()
err = poolMaster.Store(id, formStream, sizeVal) err = poolMaster.Store(id, workerID, formStream, sizeVal)
if err != nil { if err != nil {
fmt.Println(err) fmt.Println(err)
return c.String(http.StatusExpectationFailed, "Error") return c.String(http.StatusExpectationFailed, "Error")
} }
fmt.Println("...stashed") fmt.Println("...stashed")
fmt.Println(fileInfo) fmt.Println(fileInfo)
fileInfo.Created = time.Now()
_, err = colFile.CreateDocument(arangoCTX, fileInfo) _, err = colFile.CreateDocument(arangoCTX, fileInfo)
if err != nil { if err != nil {
fmt.Println(err) fmt.Println(err)

@ -0,0 +1,76 @@
package main
import (
"archive/tar"
"fmt"
"io"
"os"
"path/filepath"
"github.com/klauspost/compress/zstd"
)
func main() {
tarFile, err := os.Create("test.tar.zst")
if err != nil {
panic(err)
}
defer tarFile.Close()
compressor, err := zstd.NewWriter(tarFile, zstd.WithEncoderLevel(4))
if err != nil {
panic(err)
}
defer compressor.Close()
tw := tar.NewWriter(compressor)
defer tw.Close()
entries, err := os.ReadDir("/home/cheetah/dev/gma-puzzles/zstd-tar-test/testpayload")
if err != nil {
panic(err)
}
for _, e := range entries {
originalPath := filepath.Join("/home/cheetah/dev/gma-puzzles/zstd-tar-test/testpayload", e.Name())
file, err := os.Open(originalPath)
if err != nil {
panic(err)
}
defer file.Close()
info, err := file.Stat()
if err != nil {
panic(err)
}
tarFileHeader, err := tar.FileInfoHeader(info, info.Name())
if err != nil {
panic(err)
}
err = tw.WriteHeader(tarFileHeader)
if err != nil {
panic(err)
}
_, err = io.Copy(tw, file)
if err != nil {
panic(err)
}
fmt.Println(info.Name())
}
err = tw.Close()
if err != nil {
panic(err)
}
err = compressor.Close()
if err != nil {
panic(err)
}
err = tarFile.Close()
if err != nil {
panic(err)
}
}
Loading…
Cancel
Save