You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
498 lines
15 KiB
Go
498 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime/debug"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.cheetah.cat/worksucc/gma-puzzles/common"
|
|
"git.cheetah.cat/worksucc/gma-puzzles/gma"
|
|
adriver "github.com/arangodb/go-driver"
|
|
ahttp "github.com/arangodb/go-driver/http"
|
|
"github.com/twinj/uuid"
|
|
|
|
_ "net/http/pprof"
|
|
)
|
|
|
|
var (
|
|
arangoDB adriver.Database
|
|
arangoCTX context.Context
|
|
colChunk adriver.Collection
|
|
colFile adriver.Collection
|
|
colFile2Chunk adriver.Collection
|
|
colGMA adriver.Collection
|
|
colGMA2File adriver.Collection
|
|
)
|
|
|
|
func ConnectDB(baseURL string, arangoUser string, arangoPWD string, arangoDatabase string) (driver adriver.Database, ctx context.Context, err error) {
|
|
log.Println("connectDB:", "Starting Connection Process...")
|
|
|
|
// Retry Loop for Failed Connections
|
|
for i := 0; i < 6; i++ {
|
|
if i == 5 {
|
|
return driver, ctx, fmt.Errorf("connectdb: unable to connect to database %d times!", i)
|
|
} else if i > 0 {
|
|
time.Sleep(30 * time.Second)
|
|
}
|
|
|
|
// Connect to ArangoDB URL
|
|
conn, err := ahttp.NewConnection(ahttp.ConnectionConfig{
|
|
Endpoints: []string{baseURL},
|
|
TLSConfig: &tls.Config{ /*...*/ },
|
|
})
|
|
if err != nil {
|
|
log.Println("connectDB:", "Cannot Connect to ArangoDB!", err)
|
|
continue
|
|
}
|
|
|
|
// Connect Driver to User
|
|
client, err := adriver.NewClient(adriver.ClientConfig{
|
|
Connection: conn,
|
|
Authentication: adriver.BasicAuthentication(arangoUser, arangoPWD),
|
|
})
|
|
if err != nil {
|
|
log.Println("connectDB:", "Cannot Authenticate ArangoDB User!", err)
|
|
continue
|
|
}
|
|
|
|
// Create Context for Database Access
|
|
ctx = context.Background()
|
|
driver, err = client.Database(ctx, arangoDatabase)
|
|
if err != nil {
|
|
log.Println("connectDB:", "Cannot Load ArangoDB Database!", err)
|
|
continue
|
|
}
|
|
log.Println("connectDB:", "Connection Sucessful!")
|
|
return driver, ctx, nil
|
|
}
|
|
|
|
return driver, ctx, fmt.Errorf("connectDB: FUCK HOW DID THIS EXCUTE?")
|
|
}
|
|
func InitDatabase() (err error) {
|
|
arangoDB, arangoCTX, err = ConnectDB("http://192.168.45.8:8529/", "gma-inator", "gma-inator", "gma-inator")
|
|
|
|
colChunk, err = arangoDB.Collection(arangoCTX, "chunk")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
colFile, err = arangoDB.Collection(arangoCTX, "file")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
colGMA, err = arangoDB.Collection(arangoCTX, "gma")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
colFile2Chunk, err = arangoDB.Collection(arangoCTX, "file_chunk_map")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
colGMA2File, err = arangoDB.Collection(arangoCTX, "gma_file_map")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
debug.SetMemoryLimit(6e9)
|
|
|
|
go func() {
|
|
log.Println(http.ListenAndServe("0.0.0.0:6060", nil))
|
|
}()
|
|
|
|
err := InitDatabase()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
/*
|
|
err = colGMA.Truncate(arangoCTX)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
err = colFile.Truncate(arangoCTX)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
err = colGMA2File.Truncate(arangoCTX)
|
|
if err != nil {
|
|
panic(err)
|
|
}*/
|
|
|
|
// /mnt/worksucc/san2/gma/2/1/2/3/2123406190.1591573904.gma
|
|
//fileHandle, err := os.Open("2143898000.1593250551.bin.gma") //2143898000.1593250551.bin")
|
|
//gma, err := gma.NewReader("2143898000.1593250551.bin.gma")
|
|
|
|
folderPath := "/mnt/SC9000/TemporaryTestingShit/" //"/mnt/worksucc/san1/gma/2/5/4/8/"
|
|
folderPathTarget := "/mnt/SC9000/ProcessedGMATest/" //"/mnt/worksucc/san1/gma/2/5/4/8/"
|
|
//0
|
|
entries, err := os.ReadDir(folderPath)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
skipBla := false
|
|
for _, e := range entries {
|
|
if !e.IsDir() && skipBla == true {
|
|
if e.Name() == "2547463094.1626322945.gma" {
|
|
skipBla = false
|
|
} else {
|
|
continue
|
|
}
|
|
}
|
|
if !e.IsDir() {
|
|
err = ProcessGMA(filepath.Join(folderPath, e.Name()))
|
|
if err != nil {
|
|
fmt.Printf("\nERROR: %v\n", err)
|
|
//panic(err)
|
|
continue
|
|
}
|
|
os.Rename(filepath.Join(folderPath, e.Name()), filepath.Join(folderPathTarget, e.Name()))
|
|
}
|
|
}
|
|
}
|
|
|
|
func undoBatch(undoBatch *bool, gmaID string, fileIDs []string, gma2FileIDs []string) (err error) {
|
|
fmt.Printf("undoBatch(%x, %s)\n", undoBatch, gmaID)
|
|
_, err = colGMA.RemoveDocument(arangoCTX, gmaID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, _, err = colFile.RemoveDocuments(arangoCTX, fileIDs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, _, err = colGMA2File.RemoveDocuments(arangoCTX, gma2FileIDs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
func ProcessGMA(filePath string) (err error) {
|
|
var (
|
|
fileIDs []string
|
|
gma2FileIDs []string
|
|
failedProcessing = true
|
|
)
|
|
dboGMA := common.DB_GMA{}
|
|
dboGMA.BatchID = uuid.NewV4().String() // use this for rapid unscheduled dissassembly
|
|
|
|
dboGMA.OriginalPath = filePath
|
|
dboGMA.ProcessingStart = time.Now()
|
|
fileStat, err := os.Stat(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dboGMA.StatModTime = fileStat.ModTime()
|
|
dboGMA.GMASize = fileStat.Size()
|
|
|
|
if dboGMA.GMASize < 200 {
|
|
return fmt.Errorf("GMA File too small, skipping")
|
|
}
|
|
fmt.Printf("Opening %s\n", filePath)
|
|
gmaReader, err := gma.NewReader(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer gmaReader.Close()
|
|
|
|
dboGMA.GMAHash, err = gmaReader.GetSHA256()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dboGMA.ID = dboGMA.GMAHash
|
|
gmaReader.FileHandle.Seek(0, 0)
|
|
|
|
gmaTempPath := filepath.Join("/home/cheetah/dev/gma-puzzles/temp", dboGMA.ID)
|
|
defer os.RemoveAll(gmaTempPath) // clean up under any circumstances
|
|
|
|
dboIDExists, err := colGMA.DocumentExists(arangoCTX, dboGMA.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if dboIDExists {
|
|
return fmt.Errorf("GMA with ID %s exists", dboGMA.ID)
|
|
}
|
|
|
|
header, err := gmaReader.ReadHeader()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dboGMA.Header = header
|
|
fmt.Printf("AddonVersion=%d\n", header.AddonVersion)
|
|
fmt.Printf("FormatVersion=%d\n", header.FormatVersion)
|
|
fmt.Printf("FormatVersionDiscardByte=%d\n", header.FormatVersionDiscardByte)
|
|
//fmt.Printf("r.cursorOffset = %d\n", gmaReader.GetOffset())
|
|
firstType, files, err := gmaReader.ReadFiles()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dboGMA.FirstType = firstType
|
|
|
|
//fmt.Printf("r.cursorOffset = %d\n", gmaReader.GetOffset())
|
|
var (
|
|
dboGMA2Files []common.DB_GMA2File
|
|
dboFiles []common.DB_File
|
|
)
|
|
for _, file := range files {
|
|
fmt.Printf("%s CRC: %d Offset: %d Size: %d NextType: %d FileNumber: %d\n", file.FileName, file.CRC, file.Offset, file.FileSize, file.NextType, file.FileNumber)
|
|
if file.NextType > uint32(file.FileNumber+10) { // Something is fucked
|
|
fmt.Printf("Current Cursor %d", gmaReader.GetOffset())
|
|
for _, otherFile := range files[file.FileNumber:] {
|
|
fmt.Printf("OTHERFILE %s CRC: %d Offset: %d Size: %d NextType: %d FileNumber: %d\n", otherFile.FileName, otherFile.CRC, otherFile.Offset, otherFile.FileSize, otherFile.NextType, otherFile.FileNumber)
|
|
}
|
|
return fmt.Errorf("GMA Header corrupted, NextType %d, FileNumber %d", file.NextType, file.FileNumber)
|
|
}
|
|
destPath := filepath.Join(gmaTempPath, "contents", file.FileName)
|
|
dir := filepath.Dir(destPath)
|
|
|
|
err := os.MkdirAll(dir, os.ModePerm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
destFile, err := os.Create(destPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer destFile.Close()
|
|
extractMeta, err := gmaReader.ExtractFileTo(file, destFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if extractMeta.ExtractedCRC != extractMeta.OriginalMeta.CRC {
|
|
fmt.Printf("gma(%s) checksum in meta (%d) differs from read (%d) [%s]\n", filePath, extractMeta.OriginalMeta.CRC, extractMeta.ExtractedCRC, extractMeta.OriginalMeta.FileName)
|
|
}
|
|
//fmt.Printf("ExtractedMeta %s CRC: %d SHA256: %s\n", file.FileName, extractMeta.ExtractedCRC, extractMeta.ExtractedSHA256)
|
|
dboFile := common.DB_File{
|
|
ID: extractMeta.ExtractedSHA256, // ID is the SHA256, i guess that is good enough?
|
|
BatchID: dboGMA.BatchID,
|
|
InitialPath: file.FileName,
|
|
CRC: file.CRC,
|
|
Size: file.FileSize,
|
|
Hash: extractMeta.ExtractedSHA256,
|
|
Extension: filepath.Ext(file.FileName),
|
|
}
|
|
dboGMA2File := common.DB_GMA2File{
|
|
ID: fmt.Sprintf("%s_%s", dboGMA.ID, extractMeta.ExtractedSHA256),
|
|
BatchID: dboGMA.BatchID,
|
|
File: fmt.Sprintf("file/%s", extractMeta.ExtractedSHA256),
|
|
GMA: fmt.Sprintf("gma/%s", dboGMA.ID),
|
|
FileNumber: extractMeta.OriginalMeta.FileNumber,
|
|
FileName: extractMeta.OriginalMeta.FileName,
|
|
Offset: extractMeta.OriginalMeta.Offset,
|
|
FileSize: extractMeta.OriginalMeta.FileSize,
|
|
CRCMatch: extractMeta.ExtractedCRC == extractMeta.OriginalMeta.CRC,
|
|
CRC: extractMeta.OriginalMeta.CRC,
|
|
NextType: extractMeta.OriginalMeta.NextType,
|
|
|
|
LocalFileName: destPath,
|
|
UploadID: extractMeta.ExtractedSHA256,
|
|
}
|
|
//fmt.Println(dboFile)
|
|
// Add fileIDs from new unknowns
|
|
dboFiles = append(dboFiles, dboFile)
|
|
//fmt.Println(dboGMA2File)
|
|
gma2FileIDs = append(gma2FileIDs, dboGMA2File.ID)
|
|
dboGMA2Files = append(dboGMA2Files, dboGMA2File)
|
|
}
|
|
|
|
lastFile := files[len(files)-1]
|
|
dboGMA.FooterAddonCRC, err = gmaReader.ReadAddonCRC(lastFile.Offset + lastFile.FileSize)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dboGMA.ProcessingEnd = time.Now()
|
|
dboGMA.ProcessingDuration = dboGMA.ProcessingEnd.Sub(dboGMA.ProcessingStart).Milliseconds()
|
|
|
|
// if anything fails, lets undo the documents we imported
|
|
defer undoBatch(&failedProcessing, dboGMA.ID, fileIDs, gma2FileIDs)
|
|
// TODO: Calculate dboGMA.OptimizedSize
|
|
dboGMA.OptimizedSize = 0
|
|
_, err = colGMA.CreateDocument(arangoCTX, dboGMA)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
importStartTime := time.Now()
|
|
|
|
var newUnknownFiles []string
|
|
chunkSize := 5
|
|
for {
|
|
if len(dboFiles) == 0 {
|
|
break
|
|
}
|
|
|
|
// necessary check to avoid slicing beyond
|
|
// slice capacity
|
|
if len(dboFiles) < chunkSize {
|
|
chunkSize = len(dboFiles)
|
|
}
|
|
|
|
// process and work withj
|
|
metaSlice, errorSlice, _ := colFile.CreateDocuments(arangoCTX, dboFiles[0:chunkSize])
|
|
|
|
//fmt.Println("Metaslice")
|
|
//fmt.Println(metaSlice)
|
|
for _, meta := range metaSlice {
|
|
if !meta.ID.IsEmpty() {
|
|
newUnknownFiles = append(newUnknownFiles, meta.Key)
|
|
fileIDs = append(fileIDs, meta.Key)
|
|
}
|
|
}
|
|
//fmt.Println("ErrorSlice")
|
|
//fmt.Println(errorSlice)
|
|
for _, createError := range errorSlice {
|
|
if createError != nil && strings.Contains(createError.Error(), "unique constraint violated - in index primary of type primary over '_key'") {
|
|
} else if createError != nil {
|
|
return createError
|
|
}
|
|
}
|
|
|
|
dboFiles = dboFiles[chunkSize:]
|
|
}
|
|
fmt.Println()
|
|
|
|
fmt.Printf("Imported dboFiles into Arango and now we have %d new files from %d addon files\n", len(newUnknownFiles), len(files))
|
|
deltaFileSize := int64(0)
|
|
for _, unknownFile := range newUnknownFiles {
|
|
unknownFileID := fmt.Sprintf("file/%s", unknownFile)
|
|
for _, dboGMA2File := range dboGMA2Files {
|
|
if unknownFileID == dboGMA2File.File {
|
|
deltaFileSize += dboGMA2File.FileSize
|
|
}
|
|
}
|
|
}
|
|
dboGMA.OptimizedSize = deltaFileSize
|
|
fmt.Printf("Delta Storage %d bytes\n", deltaFileSize)
|
|
|
|
_, err = colGMA2File.ImportDocuments(arangoCTX, dboGMA2Files, &adriver.ImportDocumentOptions{
|
|
OnDuplicate: adriver.ImportOnDuplicateIgnore,
|
|
//FromPrefix: "gma/",
|
|
//ToPrefix: "file/",
|
|
Complete: true, // will make it fail if any error occurs (and hopefully reverse the trans-action)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("ImportDocuments File fail: %v", err)
|
|
}
|
|
//fmt.Printf("Code: %d, Created: %d, Ignored: %d, Errors: %d", statsImportGMA2File.Code, statsImportGMA2File.Created, statsImportGMA2File.Ignored, statsImportGMA2File.Errors)
|
|
|
|
fmt.Printf("Import Duration %dms\n", time.Since(importStartTime).Milliseconds())
|
|
fmt.Println()
|
|
// TODO: upload all unknownNewFiles to StorageServer
|
|
var httpClient *http.Client = http.DefaultClient
|
|
for _, unknownFile := range newUnknownFiles {
|
|
unknownFileID := fmt.Sprintf("file/%s", unknownFile)
|
|
for _, dboGMA2File := range dboGMA2Files {
|
|
if unknownFileID == dboGMA2File.File {
|
|
//fmt.Printf("Uploading %s (local %s) to Storage\n", dboGMA2File.UploadID, dboGMA2File.LocalFileName)
|
|
err = common.MultipartUpload(httpClient, fmt.Sprintf("http://127.0.0.1:13371/stash/%s/%d", dboGMA2File.UploadID, dboGMA2File.FileSize), dboGMA2File.LocalFileName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// TODO : fetch all files from storageServer
|
|
// TODO : write new gma from arangoinfo
|
|
// TODO : compare hashes
|
|
{
|
|
destPath := filepath.Join(gmaTempPath, "rewrite.gma")
|
|
dir := filepath.Dir(destPath)
|
|
|
|
err := os.MkdirAll(dir, os.ModePerm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
gmaWriter, err := gma.NewWriter(destPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer gmaWriter.Close()
|
|
|
|
//fmt.Printf("Writing Header with FormatVersion: %d\n", dboGMA.Header.FormatVersion)
|
|
err = gmaWriter.WriteHeader(dboGMA.Header)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = gmaWriter.WriteFirstType(dboGMA.FirstType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sort.SliceStable(dboGMA2Files, func(i, j int) bool { return dboGMA2Files[i].FileNumber < dboGMA2Files[j].FileNumber })
|
|
for _, dboGMA2File := range dboGMA2Files {
|
|
//fmt.Printf("WriteFileIndex for %s number %d\n", dboGMA2File.FileName, dboGMA2File.FileNumber)
|
|
err = gmaWriter.WriteFileIndex(dboGMA2File.FileName, dboGMA2File.FileSize, dboGMA2File.CRC, dboGMA2File.NextType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
var httpClient *http.Client = &http.Client{
|
|
Timeout: 15 * time.Minute,
|
|
}
|
|
|
|
for _, dboGMA2File := range dboGMA2Files {
|
|
fmt.Printf("WriteFile for %s number %d = %s\n", dboGMA2File.FileName, dboGMA2File.FileNumber, dboGMA2File.UploadID)
|
|
resp, err := httpClient.Get(fmt.Sprintf("http://127.0.0.1:13371/fetch/%s", dboGMA2File.UploadID))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
err = gmaWriter.WriteFile(resp.Body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
gmaWriter.FileHandle.Seek(0, 2)
|
|
fmt.Printf("Writing Footer CRC %d\n\n", dboGMA.FooterAddonCRC)
|
|
gmaWriter.WriteFooterCRC(dboGMA.FooterAddonCRC)
|
|
|
|
gmaWriter.FileHandle.Seek(0, 0)
|
|
writeHash, err := gmaWriter.GetSHA256()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
fmt.Printf("Rewrite Hash is %s %s\n", writeHash, destPath)
|
|
fmt.Printf("Original Hash is %s %s\n", dboGMA.GMAHash, dboGMA.OriginalPath)
|
|
fmt.Println()
|
|
writeStat, err := os.Stat(destPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
writeSize := writeStat.Size()
|
|
if writeSize != dboGMA.GMASize {
|
|
//fail
|
|
return fmt.Errorf("RewriteCheck failed, original=%s (%d bytes), rewrite=%s (%d bytes)", dboGMA.GMAHash, dboGMA.GMASize, writeHash, writeSize)
|
|
}
|
|
if writeHash != dboGMA.GMAHash {
|
|
//fail
|
|
return fmt.Errorf("RewriteCheck failed, original=%s (%d bytes), rewrite=%s (%d bytes)", dboGMA.GMAHash, dboGMA.GMASize, writeHash, writeSize)
|
|
}
|
|
}
|
|
// TODO: 4... profit?
|
|
|
|
dboGMA.ProcessingEnd = time.Now()
|
|
dboGMA.ProcessingDuration = dboGMA.ProcessingEnd.Sub(dboGMA.ProcessingStart).Milliseconds()
|
|
dboGMA.Success = true
|
|
|
|
_, err = colGMA.UpdateDocument(arangoCTX, dboGMA.ID, dboGMA)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
failedProcessing = false
|
|
return nil
|
|
}
|