You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 lines
16 KiB
Go

1 year ago
package main
import (
"context"
"crypto/tls"
1 year ago
"flag"
1 year ago
"fmt"
"log"
"net/http"
"os"
"path/filepath"
"runtime/debug"
"sort"
"strings"
"time"
"git.cheetah.cat/worksucc/gma-puzzles/common"
"git.cheetah.cat/worksucc/gma-puzzles/gma"
adriver "github.com/arangodb/go-driver"
ahttp "github.com/arangodb/go-driver/http"
"github.com/twinj/uuid"
_ "net/http/pprof"
)
var (
1 year ago
arangoDB adriver.Database
arangoCTX context.Context
//colChunk adriver.Collection
colFile adriver.Collection
//colFile2Chunk adriver.Collection
colGMA adriver.Collection
colGMA2File adriver.Collection
1 year ago
)
func ConnectDB(baseURL string, arangoUser string, arangoPWD string, arangoDatabase string) (driver adriver.Database, ctx context.Context, err error) {
log.Println("connectDB:", "Starting Connection Process...")
// Retry Loop for Failed Connections
for i := 0; i < 6; i++ {
if i == 5 {
1 year ago
return driver, ctx, fmt.Errorf("connectdb unable to connect to database %d times!", i)
1 year ago
} else if i > 0 {
time.Sleep(30 * time.Second)
}
// Connect to ArangoDB URL
conn, err := ahttp.NewConnection(ahttp.ConnectionConfig{
Endpoints: []string{baseURL},
TLSConfig: &tls.Config{ /*...*/ },
})
if err != nil {
log.Println("connectDB:", "Cannot Connect to ArangoDB!", err)
continue
}
// Connect Driver to User
client, err := adriver.NewClient(adriver.ClientConfig{
Connection: conn,
Authentication: adriver.BasicAuthentication(arangoUser, arangoPWD),
})
if err != nil {
log.Println("connectDB:", "Cannot Authenticate ArangoDB User!", err)
continue
}
// Create Context for Database Access
ctx = context.Background()
driver, err = client.Database(ctx, arangoDatabase)
if err != nil {
log.Println("connectDB:", "Cannot Load ArangoDB Database!", err)
continue
}
log.Println("connectDB:", "Connection Sucessful!")
return driver, ctx, nil
}
return driver, ctx, fmt.Errorf("connectDB: FUCK HOW DID THIS EXCUTE?")
}
func InitDatabase() (err error) {
arangoDB, arangoCTX, err = ConnectDB("http://192.168.45.8:8529/", "gma-inator", "gma-inator", "gma-inator")
1 year ago
/*colChunk, err = arangoDB.Collection(arangoCTX, "chunk")
1 year ago
if err != nil {
return err
1 year ago
}*/
1 year ago
colFile, err = arangoDB.Collection(arangoCTX, "file")
if err != nil {
return err
}
colGMA, err = arangoDB.Collection(arangoCTX, "gma")
if err != nil {
return err
}
1 year ago
/*colFile2Chunk, err = arangoDB.Collection(arangoCTX, "file_chunk_map")
1 year ago
if err != nil {
return err
1 year ago
}*/
1 year ago
colGMA2File, err = arangoDB.Collection(arangoCTX, "gma_file_map")
if err != nil {
return err
}
return nil
}
1 year ago
var JobPoolSize int = 5
var ConcurrencyLimit int = 5
var WorkerJobPool chan string
1 year ago
func main() {
1 year ago
folderPathP := flag.String("path", "/mnt/SC9000/TemporaryTestingShit2", "a string")
1 year ago
debug.SetMemoryLimit(6e9)
1 year ago
flag.Parse()
folderPath := *folderPathP
1 year ago
go func() {
log.Println(http.ListenAndServe("0.0.0.0:6060", nil))
}()
err := InitDatabase()
if err != nil {
panic(err)
}
1 year ago
/*
err = colGMA.Truncate(arangoCTX)
if err != nil {
panic(err)
}
err = colFile.Truncate(arangoCTX)
if err != nil {
panic(err)
}
err = colGMA2File.Truncate(arangoCTX)
if err != nil {
panic(err)
}*/
1 year ago
// /mnt/worksucc/san2/gma/2/1/2/3/2123406190.1591573904.gma
//fileHandle, err := os.Open("2143898000.1593250551.bin.gma") //2143898000.1593250551.bin")
//gma, err := gma.NewReader("2143898000.1593250551.bin.gma")
1 year ago
//folderPath := "/mnt/SC9000/TemporaryTestingShit2/" //"/mnt/worksucc/san1/gma/2/5/4/8/"
1 year ago
folderPathTarget := "/mnt/SC9000/ProcessedGMATest/" //"/mnt/worksucc/san1/gma/2/5/4/8/"
1 year ago
//
1 year ago
entries, err := os.ReadDir(folderPath)
if err != nil {
panic(err)
}
skipBla := false
1 year ago
var WorkerJobPool []string
1 year ago
for _, e := range entries {
if !e.IsDir() && skipBla == true {
1 year ago
if e.Name() == "2547463094.1626322945.gma" {
1 year ago
skipBla = false
1 year ago
} else {
continue
1 year ago
}
}
if !e.IsDir() {
1 year ago
WorkerJobPool = append(WorkerJobPool, filepath.Join(folderPath, e.Name()))
1 year ago
}
}
1 year ago
/*
sem := common.NewSemaphore(ConcurrencyLimit)
wg := sync.WaitGroup{}
*/
for _, jobFile := range WorkerJobPool {
//wg.Add(1)
//go func(jobFile string, wg *sync.WaitGroup) {
// sem.Acquire() // Wait for worker to have slot open
err = ProcessGMA(jobFile)
if err != nil {
fmt.Printf("\nERROR: %v\n", err)
//panic(err)
continue
}
os.Rename(jobFile, filepath.Join(folderPathTarget, filepath.Base(jobFile)))
// sem.Release() // Release the slot
// wg.Done() // Finish job
//}(job, &wg)
}
// Wait for all jobs to finish
//wg.Wait()
1 year ago
}
1 year ago
func undoBatch(undoBatch bool, gmaID string, fileIDs []string, gma2FileIDs []string) (err error) {
1 year ago
fmt.Printf("undoBatch(%x, %s)\n", undoBatch, gmaID)
1 year ago
_, err = colGMA.RemoveDocument(arangoCTX, gmaID)
if err != nil {
return err
}
_, _, err = colFile.RemoveDocuments(arangoCTX, fileIDs)
if err != nil {
return err
}
_, _, err = colGMA2File.RemoveDocuments(arangoCTX, gma2FileIDs)
if err != nil {
return err
}
return nil
}
func ProcessGMA(filePath string) (err error) {
var (
1 year ago
fileIDs []string
gma2FileIDs []string
1 year ago
)
dboGMA := common.DB_GMA{}
dboGMA.BatchID = uuid.NewV4().String() // use this for rapid unscheduled dissassembly
dboGMA.OriginalPath = filePath
dboGMA.ProcessingStart = time.Now()
fileStat, err := os.Stat(filePath)
if err != nil {
return err
}
dboGMA.StatModTime = fileStat.ModTime()
dboGMA.GMASize = fileStat.Size()
1 year ago
if dboGMA.GMASize < 200 {
return fmt.Errorf("GMA File too small, skipping")
}
1 year ago
fmt.Printf("Opening %s\n", filePath)
gmaReader, err := gma.NewReader(filePath)
if err != nil {
return err
}
defer gmaReader.Close()
dboGMA.GMAHash, err = gmaReader.GetSHA256()
if err != nil {
return err
}
dboGMA.ID = dboGMA.GMAHash
gmaReader.FileHandle.Seek(0, 0)
gmaTempPath := filepath.Join("/home/cheetah/dev/gma-puzzles/temp", dboGMA.ID)
defer os.RemoveAll(gmaTempPath) // clean up under any circumstances
dboIDExists, err := colGMA.DocumentExists(arangoCTX, dboGMA.ID)
if err != nil {
return err
}
if dboIDExists {
return fmt.Errorf("GMA with ID %s exists", dboGMA.ID)
}
header, err := gmaReader.ReadHeader()
if err != nil {
return err
}
dboGMA.Header = header
1 year ago
fmt.Printf("AddonVersion=%d\n", header.AddonVersion)
fmt.Printf("FormatVersion=%d\n", header.FormatVersion)
fmt.Printf("FormatVersionDiscardByte=%d\n", header.FormatVersionDiscardByte)
1 year ago
//fmt.Printf("r.cursorOffset = %d\n", gmaReader.GetOffset())
firstType, files, err := gmaReader.ReadFiles()
if err != nil {
return err
}
dboGMA.FirstType = firstType
//fmt.Printf("r.cursorOffset = %d\n", gmaReader.GetOffset())
var (
dboGMA2Files []common.DB_GMA2File
dboFiles []common.DB_File
)
for _, file := range files {
fmt.Printf("%s CRC: %d Offset: %d Size: %d NextType: %d FileNumber: %d\n", file.FileName, file.CRC, file.Offset, file.FileSize, file.NextType, file.FileNumber)
if file.NextType > uint32(file.FileNumber+10) { // Something is fucked
fmt.Printf("Current Cursor %d", gmaReader.GetOffset())
1 year ago
for _, otherFile := range files[file.FileNumber:] {
fmt.Printf("OTHERFILE %s CRC: %d Offset: %d Size: %d NextType: %d FileNumber: %d\n", otherFile.FileName, otherFile.CRC, otherFile.Offset, otherFile.FileSize, otherFile.NextType, otherFile.FileNumber)
}
1 year ago
return fmt.Errorf("GMA Header corrupted, NextType %d, FileNumber %d", file.NextType, file.FileNumber)
}
destPath := filepath.Join(gmaTempPath, "contents", file.FileName)
dir := filepath.Dir(destPath)
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
return err
}
destFile, err := os.Create(destPath)
if err != nil {
return err
}
defer destFile.Close()
extractMeta, err := gmaReader.ExtractFileTo(file, destFile)
if err != nil {
return err
}
if extractMeta.ExtractedCRC != extractMeta.OriginalMeta.CRC {
fmt.Printf("gma(%s) checksum in meta (%d) differs from read (%d) [%s]\n", filePath, extractMeta.OriginalMeta.CRC, extractMeta.ExtractedCRC, extractMeta.OriginalMeta.FileName)
}
//fmt.Printf("ExtractedMeta %s CRC: %d SHA256: %s\n", file.FileName, extractMeta.ExtractedCRC, extractMeta.ExtractedSHA256)
dboFile := common.DB_File{
ID: extractMeta.ExtractedSHA256, // ID is the SHA256, i guess that is good enough?
BatchID: dboGMA.BatchID,
InitialPath: file.FileName,
CRC: file.CRC,
Size: file.FileSize,
Hash: extractMeta.ExtractedSHA256,
Extension: filepath.Ext(file.FileName),
}
dboGMA2File := common.DB_GMA2File{
ID: fmt.Sprintf("%s_%s", dboGMA.ID, extractMeta.ExtractedSHA256),
BatchID: dboGMA.BatchID,
File: fmt.Sprintf("file/%s", extractMeta.ExtractedSHA256),
GMA: fmt.Sprintf("gma/%s", dboGMA.ID),
FileNumber: extractMeta.OriginalMeta.FileNumber,
FileName: extractMeta.OriginalMeta.FileName,
Offset: extractMeta.OriginalMeta.Offset,
FileSize: extractMeta.OriginalMeta.FileSize,
CRCMatch: extractMeta.ExtractedCRC == extractMeta.OriginalMeta.CRC,
CRC: extractMeta.OriginalMeta.CRC,
NextType: extractMeta.OriginalMeta.NextType,
LocalFileName: destPath,
UploadID: extractMeta.ExtractedSHA256,
}
//fmt.Println(dboFile)
// Add fileIDs from new unknowns
dboFiles = append(dboFiles, dboFile)
//fmt.Println(dboGMA2File)
gma2FileIDs = append(gma2FileIDs, dboGMA2File.ID)
dboGMA2Files = append(dboGMA2Files, dboGMA2File)
}
lastFile := files[len(files)-1]
dboGMA.FooterAddonCRC, err = gmaReader.ReadAddonCRC(lastFile.Offset + lastFile.FileSize)
if err != nil {
return err
}
dboGMA.ProcessingEnd = time.Now()
dboGMA.ProcessingDuration = dboGMA.ProcessingEnd.Sub(dboGMA.ProcessingStart).Milliseconds()
// TODO: Calculate dboGMA.OptimizedSize
dboGMA.OptimizedSize = 0
_, err = colGMA.CreateDocument(arangoCTX, dboGMA)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
importStartTime := time.Now()
var newUnknownFiles []string
chunkSize := 5
for {
if len(dboFiles) == 0 {
break
}
// necessary check to avoid slicing beyond
// slice capacity
if len(dboFiles) < chunkSize {
chunkSize = len(dboFiles)
}
// process and work withj
metaSlice, errorSlice, _ := colFile.CreateDocuments(arangoCTX, dboFiles[0:chunkSize])
1 year ago
fmt.Println("Metaslice")
fmt.Println(metaSlice)
1 year ago
for _, meta := range metaSlice {
if !meta.ID.IsEmpty() {
newUnknownFiles = append(newUnknownFiles, meta.Key)
fileIDs = append(fileIDs, meta.Key)
}
}
//fmt.Println("ErrorSlice")
//fmt.Println(errorSlice)
for _, createError := range errorSlice {
if createError != nil && strings.Contains(createError.Error(), "unique constraint violated - in index primary of type primary over '_key'") {
} else if createError != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return createError
}
}
dboFiles = dboFiles[chunkSize:]
}
fmt.Println()
fmt.Printf("Imported dboFiles into Arango and now we have %d new files from %d addon files\n", len(newUnknownFiles), len(files))
deltaFileSize := int64(0)
for _, unknownFile := range newUnknownFiles {
unknownFileID := fmt.Sprintf("file/%s", unknownFile)
for _, dboGMA2File := range dboGMA2Files {
if unknownFileID == dboGMA2File.File {
deltaFileSize += dboGMA2File.FileSize
}
}
}
dboGMA.OptimizedSize = deltaFileSize
fmt.Printf("Delta Storage %d bytes\n", deltaFileSize)
_, err = colGMA2File.ImportDocuments(arangoCTX, dboGMA2Files, &adriver.ImportDocumentOptions{
OnDuplicate: adriver.ImportOnDuplicateIgnore,
//FromPrefix: "gma/",
//ToPrefix: "file/",
Complete: true, // will make it fail if any error occurs (and hopefully reverse the trans-action)
})
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return fmt.Errorf("ImportDocuments File fail: %v", err)
}
//fmt.Printf("Code: %d, Created: %d, Ignored: %d, Errors: %d", statsImportGMA2File.Code, statsImportGMA2File.Created, statsImportGMA2File.Ignored, statsImportGMA2File.Errors)
fmt.Printf("Import Duration %dms\n", time.Since(importStartTime).Milliseconds())
fmt.Println()
// TODO: upload all unknownNewFiles to StorageServer
var httpClient *http.Client = http.DefaultClient
for _, unknownFile := range newUnknownFiles {
unknownFileID := fmt.Sprintf("file/%s", unknownFile)
for _, dboGMA2File := range dboGMA2Files {
if unknownFileID == dboGMA2File.File {
1 year ago
fmt.Printf("Uploading %s (local %s) to Storage\n", dboGMA2File.UploadID, dboGMA2File.LocalFileName)
1 year ago
err = common.MultipartUpload(httpClient, fmt.Sprintf("http://127.0.0.1:13371/stash/%s/%d", dboGMA2File.UploadID, dboGMA2File.FileSize), dboGMA2File.LocalFileName)
if err != nil {
1 year ago
fmt.Println("oopsie")
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
}
}
}
// TODO : fetch all files from storageServer
// TODO : write new gma from arangoinfo
// TODO : compare hashes
{
1 year ago
fmt.Println("rewriting gma")
1 year ago
destPath := filepath.Join(gmaTempPath, "rewrite.gma")
dir := filepath.Dir(destPath)
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
gmaWriter, err := gma.NewWriter(destPath)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
defer gmaWriter.Close()
//fmt.Printf("Writing Header with FormatVersion: %d\n", dboGMA.Header.FormatVersion)
err = gmaWriter.WriteHeader(dboGMA.Header)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
err = gmaWriter.WriteFirstType(dboGMA.FirstType)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
sort.SliceStable(dboGMA2Files, func(i, j int) bool { return dboGMA2Files[i].FileNumber < dboGMA2Files[j].FileNumber })
for _, dboGMA2File := range dboGMA2Files {
//fmt.Printf("WriteFileIndex for %s number %d\n", dboGMA2File.FileName, dboGMA2File.FileNumber)
err = gmaWriter.WriteFileIndex(dboGMA2File.FileName, dboGMA2File.FileSize, dboGMA2File.CRC, dboGMA2File.NextType)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
}
1 year ago
var httpClient *http.Client = &http.Client{
Timeout: 15 * time.Minute,
}
1 year ago
for _, dboGMA2File := range dboGMA2Files {
1 year ago
fmt.Printf("WriteFile for %s number %d = %s\n", dboGMA2File.FileName, dboGMA2File.FileNumber, dboGMA2File.UploadID)
1 year ago
resp, err := httpClient.Get(fmt.Sprintf("http://127.0.0.1:13371/fetch/%s", dboGMA2File.UploadID))
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
defer resp.Body.Close()
err = gmaWriter.WriteFile(resp.Body)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
}
gmaWriter.FileHandle.Seek(0, 2)
1 year ago
fmt.Printf("Writing Footer CRC %d\n\n", dboGMA.FooterAddonCRC)
1 year ago
gmaWriter.WriteFooterCRC(dboGMA.FooterAddonCRC)
gmaWriter.FileHandle.Seek(0, 0)
writeHash, err := gmaWriter.GetSHA256()
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
fmt.Printf("Rewrite Hash is %s %s\n", writeHash, destPath)
fmt.Printf("Original Hash is %s %s\n", dboGMA.GMAHash, dboGMA.OriginalPath)
fmt.Println()
writeStat, err := os.Stat(destPath)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
writeSize := writeStat.Size()
if writeSize != dboGMA.GMASize {
//fail
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return fmt.Errorf("RewriteCheck failed, original=%s (%d bytes), rewrite=%s (%d bytes)", dboGMA.GMAHash, dboGMA.GMASize, writeHash, writeSize)
}
if writeHash != dboGMA.GMAHash {
//fail
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return fmt.Errorf("RewriteCheck failed, original=%s (%d bytes), rewrite=%s (%d bytes)", dboGMA.GMAHash, dboGMA.GMASize, writeHash, writeSize)
}
}
// TODO: 4... profit?
dboGMA.ProcessingEnd = time.Now()
dboGMA.ProcessingDuration = dboGMA.ProcessingEnd.Sub(dboGMA.ProcessingStart).Milliseconds()
dboGMA.Success = true
_, err = colGMA.UpdateDocument(arangoCTX, dboGMA.ID, dboGMA)
if err != nil {
1 year ago
undoBatch(true, dboGMA.ID, fileIDs, gma2FileIDs)
1 year ago
return err
}
return nil
}