mirror of
https://github.com/ftl/tetra-pei.git
synced 2025-04-03 20:27:30 +02:00
add support for all the 8-bit encodings that Go supports out of the box (plus UTF-16 BE)
This commit is contained in:
parent
721f893540
commit
309a623d24
3 changed files with 95 additions and 30 deletions
5
go.mod
5
go.mod
|
@ -2,7 +2,10 @@ module github.com/ftl/tetra-pei
|
||||||
|
|
||||||
go 1.17
|
go 1.17
|
||||||
|
|
||||||
require github.com/stretchr/testify v1.7.0
|
require (
|
||||||
|
github.com/stretchr/testify v1.7.0
|
||||||
|
golang.org/x/text v0.3.7
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/davecgh/go-spew v1.1.0 // indirect
|
github.com/davecgh/go-spew v1.1.0 // indirect
|
||||||
|
|
3
go.sum
3
go.sum
|
@ -5,6 +5,9 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||||
|
|
115
sds/text.go
115
sds/text.go
|
@ -2,8 +2,11 @@ package sds
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/text/encoding"
|
||||||
|
"golang.org/x/text/encoding/charmap"
|
||||||
|
"golang.org/x/text/encoding/unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
/* Text related types and functions */
|
/* Text related types and functions */
|
||||||
|
@ -11,12 +14,67 @@ import (
|
||||||
// TextEncoding enum according to [AI] 29.5.4.1
|
// TextEncoding enum according to [AI] 29.5.4.1
|
||||||
type TextEncoding byte
|
type TextEncoding byte
|
||||||
|
|
||||||
// All supported text encoding schemes, according to [AI] table 29.29
|
// All defined text encoding schemes, according to [AI] table 29.29
|
||||||
const (
|
const (
|
||||||
Packed7Bit TextEncoding = 0
|
Packed7Bit TextEncoding = iota
|
||||||
ISO8859_1 TextEncoding = 1
|
ISO8859_1
|
||||||
|
ISO8859_2
|
||||||
|
ISO8859_3
|
||||||
|
ISO8859_4
|
||||||
|
ISO8859_5
|
||||||
|
ISO8859_6
|
||||||
|
ISO8859_7
|
||||||
|
ISO8859_8
|
||||||
|
ISO8859_9
|
||||||
|
ISO8859_10
|
||||||
|
ISO8859_13
|
||||||
|
ISO8859_14
|
||||||
|
ISO8859_15
|
||||||
|
CodePage437
|
||||||
|
CodePage737
|
||||||
|
CodePage850
|
||||||
|
CodePage852
|
||||||
|
CodePage855
|
||||||
|
CodePage857
|
||||||
|
CodePage860
|
||||||
|
CodePage861
|
||||||
|
CodePage863
|
||||||
|
CodePage865
|
||||||
|
CodePage866
|
||||||
|
CodePage869
|
||||||
|
UTF16BE
|
||||||
|
VISCII
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TextCodecs contains encoding.Encoding instances for all supported text encoding schemes.
|
||||||
|
// Beware that not all defined schemes are actually supported here.
|
||||||
|
var TextCodecs = map[TextEncoding]encoding.Encoding{
|
||||||
|
ISO8859_1: charmap.ISO8859_1,
|
||||||
|
ISO8859_2: charmap.ISO8859_2,
|
||||||
|
ISO8859_3: charmap.ISO8859_3,
|
||||||
|
ISO8859_4: charmap.ISO8859_4,
|
||||||
|
ISO8859_5: charmap.ISO8859_5,
|
||||||
|
ISO8859_6: charmap.ISO8859_6,
|
||||||
|
ISO8859_7: charmap.ISO8859_7,
|
||||||
|
ISO8859_8: charmap.ISO8859_8,
|
||||||
|
ISO8859_9: charmap.ISO8859_9,
|
||||||
|
ISO8859_10: charmap.ISO8859_10,
|
||||||
|
ISO8859_13: charmap.ISO8859_13,
|
||||||
|
ISO8859_14: charmap.ISO8859_14,
|
||||||
|
ISO8859_15: charmap.ISO8859_15,
|
||||||
|
CodePage437: charmap.CodePage437,
|
||||||
|
CodePage850: charmap.CodePage850,
|
||||||
|
CodePage852: charmap.CodePage852,
|
||||||
|
CodePage855: charmap.CodePage855,
|
||||||
|
CodePage860: charmap.CodePage860,
|
||||||
|
CodePage863: charmap.CodePage863,
|
||||||
|
CodePage865: charmap.CodePage865,
|
||||||
|
CodePage866: charmap.CodePage866,
|
||||||
|
UTF16BE: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||||
|
}
|
||||||
|
|
||||||
|
var fallbackCodec encoding.Encoding = charmap.ISO8859_1 // be lenient and use ISO8859-1 as fallback if anything goes havoc
|
||||||
|
|
||||||
// TextBytes returns the length in bytes of an encoded text with
|
// TextBytes returns the length in bytes of an encoded text with
|
||||||
// the given number of characters and the given encoding
|
// the given number of characters and the given encoding
|
||||||
func TextBytes(encoding TextEncoding, length int) int {
|
func TextBytes(encoding TextEncoding, length int) int {
|
||||||
|
@ -95,39 +153,40 @@ func (h TextHeader) Length() int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// DecodePayloadText decodes the actual text content using the given encoding scheme according to [AI] 29.5.4
|
// DecodePayloadText decodes the actual text content using the given encoding scheme according to [AI] 29.5.4
|
||||||
func DecodePayloadText(encoding TextEncoding, bytes []byte) (string, error) {
|
func DecodePayloadText(textEncoding TextEncoding, bytes []byte) (string, error) {
|
||||||
switch encoding {
|
var decoder *encoding.Decoder
|
||||||
case ISO8859_1: // only ISO8859-1 at the moment
|
codec, ok := TextCodecs[textEncoding]
|
||||||
return decodeISO8859_1(bytes)
|
if ok {
|
||||||
default: // be lenient and use ISO8859-1 as fallback
|
decoder = codec.NewDecoder()
|
||||||
log.Printf("encoding 0x%x is currently not supported, using ISO8859-1 as fallback", encoding)
|
} else { // we have no matching codec, but be lenient and use the fallback
|
||||||
return decodeISO8859_1(bytes)
|
decoder = fallbackCodec.NewDecoder()
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func decodeISO8859_1(bytes []byte) (string, error) {
|
utf8, err := decoder.Bytes(bytes)
|
||||||
utf8Buf := make([]rune, len(bytes))
|
return string(utf8), err
|
||||||
for i, b := range bytes {
|
|
||||||
utf8Buf[i] = rune(b)
|
|
||||||
}
|
|
||||||
return string(utf8Buf), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func AppendEncodedPayloadText(bytes []byte, bits int, text string, encoding TextEncoding) ([]byte, int) {
|
// AppendEncodedPayloadText encodes the given payload text using the given text encoding and appends the result to the given byte slice.
|
||||||
|
func AppendEncodedPayloadText(bytes []byte, bits int, text string, textEncoding TextEncoding) ([]byte, int) {
|
||||||
var encodedBytes []byte
|
var encodedBytes []byte
|
||||||
var encodedBits int
|
var encodedBits int
|
||||||
switch encoding {
|
var err error
|
||||||
case ISO8859_1: // only ISO8859-1 at the moment
|
|
||||||
encodedBytes, encodedBits = encodeISO8859_1(text)
|
var encoder *encoding.Encoder
|
||||||
default: // be lenient and use ISO8859-1 as fallback
|
codec, ok := TextCodecs[textEncoding]
|
||||||
encodedBytes, encodedBits = encodeISO8859_1(text)
|
if ok {
|
||||||
|
encoder = codec.NewEncoder()
|
||||||
|
} else { // we have no matching codec, but be lenient and use the fallback
|
||||||
|
encoder = fallbackCodec.NewEncoder()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
encodedBytes, err = encoder.Bytes([]byte(text))
|
||||||
|
if err != nil { // something went wrong, but be lenient and use the fallback
|
||||||
|
encodedBytes = []byte(text)
|
||||||
|
}
|
||||||
|
encodedBits = len(encodedBytes) * 8
|
||||||
|
|
||||||
bytes = append(bytes, encodedBytes...)
|
bytes = append(bytes, encodedBytes...)
|
||||||
bits += encodedBits
|
bits += encodedBits
|
||||||
return bytes, bits
|
return bytes, bits
|
||||||
}
|
}
|
||||||
|
|
||||||
func encodeISO8859_1(text string) ([]byte, int) {
|
|
||||||
return []byte(text), len(text) * 8
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue