You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tetra-pei/sds/text.go

227 lines
5.7 KiB
Go

package sds
import (
"fmt"
"regexp"
"time"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
)
/* Text related types and functions */
// TextEncoding enum according to [AI] 29.5.4.1
type TextEncoding byte
// All defined text encoding schemes, according to [AI] table 29.29
const (
Packed7Bit TextEncoding = iota
ISO8859_1
ISO8859_2
ISO8859_3
ISO8859_4
ISO8859_5
ISO8859_6
ISO8859_7
ISO8859_8
ISO8859_9
ISO8859_10
ISO8859_13
ISO8859_14
ISO8859_15
CodePage437
CodePage737
CodePage850
CodePage852
CodePage855
CodePage857
CodePage860
CodePage861
CodePage863
CodePage865
CodePage866
CodePage869
UTF16BE
VISCII
)
// TextCodecs contains encoding.Encoding instances for all supported text encoding schemes.
// Beware that not all defined schemes are actually supported here.
var TextCodecs = map[TextEncoding]encoding.Encoding{
ISO8859_1: charmap.ISO8859_1,
ISO8859_2: charmap.ISO8859_2,
ISO8859_3: charmap.ISO8859_3,
ISO8859_4: charmap.ISO8859_4,
ISO8859_5: charmap.ISO8859_5,
ISO8859_6: charmap.ISO8859_6,
ISO8859_7: charmap.ISO8859_7,
ISO8859_8: charmap.ISO8859_8,
ISO8859_9: charmap.ISO8859_9,
ISO8859_10: charmap.ISO8859_10,
ISO8859_13: charmap.ISO8859_13,
ISO8859_14: charmap.ISO8859_14,
ISO8859_15: charmap.ISO8859_15,
CodePage437: charmap.CodePage437,
CodePage850: charmap.CodePage850,
CodePage852: charmap.CodePage852,
CodePage855: charmap.CodePage855,
CodePage860: charmap.CodePage860,
CodePage863: charmap.CodePage863,
CodePage865: charmap.CodePage865,
CodePage866: charmap.CodePage866,
UTF16BE: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
}
var fallbackCodec encoding.Encoding = charmap.ISO8859_1 // be lenient and use ISO8859-1 as fallback if anything goes havoc
// TextBytes returns the length in bytes of an encoded text with
// the given number of characters and the given encoding
func TextBytes(encoding TextEncoding, length int) int {
bits := TextBytesToBits(encoding, length)
bytes := bits / 8
if bits%8 > 0 {
bytes++
}
return bytes
}
// TextBytesToBits returns the length in bits of an encoded text with
// the given number of characters and the given encoding
func TextBytesToBits(encoding TextEncoding, length int) int {
switch encoding {
case Packed7Bit:
return length*8 - length
default:
return length * 8
}
}
// ParseTextHeader in text messages and concatenated text messages.
func ParseTextHeader(bytes []byte) (TextHeader, error) {
if len(bytes) < 1 {
return TextHeader{}, fmt.Errorf("text header too short: %d", len(bytes))
}
var result TextHeader
timestampUsed := (bytes[0] & 0x80) == 0x80
if timestampUsed && len(bytes) < 7 {
return TextHeader{}, fmt.Errorf("text header with timestamp too short: %d", len(bytes))
}
result.Encoding = TextEncoding(bytes[0] & 0x7F)
var timestamp time.Time
var err error
if timestampUsed {
timestamp, err = DecodeTimestamp(bytes[1:4])
if err != nil {
return TextHeader{}, err
}
}
result.Timestamp = timestamp
return result, nil
}
// TextHeader represents the meta information for text used in text messages according to [AI] 29.5.3.3
// and concatenated text messages according to [AI] 29.5.10.3
type TextHeader struct {
Encoding TextEncoding
Timestamp time.Time
}
// Encode this text header
func (h TextHeader) Encode(bytes []byte, bits int) ([]byte, int) {
bytes = append(bytes, byte(h.Encoding))
bits += 8
if !h.Timestamp.IsZero() {
bytes[len(bytes)-1] |= 0x80
bytes = append(bytes, EncodeTimestampUTC(h.Timestamp)...)
bits += 24
}
return bytes, bits
}
// Length returns the length of this text header in bytes.
func (h TextHeader) Length() int {
if h.Timestamp.IsZero() {
return 1
}
return 4
}
// DecodePayloadText decodes the actual text content using the given encoding scheme according to [AI] 29.5.4
func DecodePayloadText(textEncoding TextEncoding, bytes []byte) (string, error) {
var decoder *encoding.Decoder
codec, ok := TextCodecs[textEncoding]
if ok {
decoder = codec.NewDecoder()
} else { // we have no matching codec, but be lenient and use the fallback
decoder = fallbackCodec.NewDecoder()
}
utf8, err := decoder.Bytes(bytes)
return string(utf8), err
}
// AppendEncodedPayloadText encodes the given payload text using the given text encoding and appends the result to the given byte slice.
func AppendEncodedPayloadText(bytes []byte, bits int, text string, textEncoding TextEncoding) ([]byte, int) {
var encodedBytes []byte
var encodedBits int
var err error
var encoder *encoding.Encoder
codec, ok := TextCodecs[textEncoding]
if ok {
encoder = codec.NewEncoder()
} else { // we have no matching codec, but be lenient and use the fallback
encoder = fallbackCodec.NewEncoder()
}
encodedBytes, err = encoder.Bytes([]byte(text))
if err != nil { // something went wrong, but be lenient and use the fallback
encodedBytes = []byte(text)
}
encodedBits = len(encodedBytes) * 8
bytes = append(bytes, encodedBytes...)
bits += encodedBits
return bytes, bits
}
var leadingOPTA = regexp.MustCompile(`^[A-Za-z ]+#[0-9]{16}`)
func SplitLeadingOPTA(s string) (string, string) {
opta := leadingOPTA.FindString(s)
return opta, s[len(opta):]
}
func RemoveLeadingOPTA(s string) string {
_, result := SplitLeadingOPTA(s)
return result
}
var trailingITSI = regexp.MustCompile(`((\x1a\x00)|(\x0d\x0d))([0-9]{16})$`)
func SplitTrailingITSI(s string) (string, string) {
groups := trailingITSI.FindStringSubmatch(s)
var itsi string
var matchLen int
if len(groups) == 0 {
itsi = ""
matchLen = 0
} else {
itsi = groups[len(groups)-1]
matchLen = len(groups[0])
}
return s[0 : len(s)-matchLen], itsi
}
func RemoveTrailingITSI(s string) string {
result, _ := SplitTrailingITSI(s)
return result
}