Files
masterlog/pseudonymizer.go
2025-11-10 05:20:25 +01:00

134 lines
3.7 KiB
Go

package masterlog
import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"sync"
)
// Pseudonymizer handles deterministic pseudonymization of sensitive data
type Pseudonymizer struct {
secret []byte
mu sync.RWMutex
// sensitiveFields is a set of field names that should be pseudonymized
sensitiveFields map[string]bool
// hashLength is the length of the pseudonymized value (in hex characters)
hashLength int
}
// NewPseudonymizer creates a new pseudonymizer with the given secret
// The secret should be kept secure and consistent across application restarts
// to ensure deterministic pseudonymization
func NewPseudonymizer(secret []byte) *Pseudonymizer {
return &Pseudonymizer{
secret: secret,
sensitiveFields: make(map[string]bool),
hashLength: 8, // Default: 8 hex characters (4 bytes)
}
}
// NewPseudonymizerFromString creates a new pseudonymizer from a string secret
func NewPseudonymizerFromString(secret string) *Pseudonymizer {
return NewPseudonymizer([]byte(secret))
}
func NewPseudonymizerFromEnv(envVar string) *Pseudonymizer {
secret := os.Getenv(envVar)
if secret == "" {
panic(fmt.Errorf("secret not found in environment variable %s", envVar))
}
return NewPseudonymizerFromString(secret)
}
// SetHashLength sets the length of the pseudonymized hash (in hex characters)
// Default is 8. Must be between 4 and 64 (max SHA256 hex length)
func (p *Pseudonymizer) SetHashLength(length int) error {
if length < 4 || length > 64 {
return fmt.Errorf("hash length must be between 4 and 64, got %d", length)
}
p.mu.Lock()
defer p.mu.Unlock()
p.hashLength = length
return nil
}
// AddSensitiveField marks a field name as sensitive, so it will be pseudonymized
func (p *Pseudonymizer) AddSensitiveField(fieldName string) {
p.mu.Lock()
defer p.mu.Unlock()
p.sensitiveFields[fieldName] = true
}
// AddSensitiveFields marks multiple field names as sensitive
func (p *Pseudonymizer) AddSensitiveFields(fieldNames ...string) {
p.mu.Lock()
defer p.mu.Unlock()
for _, name := range fieldNames {
p.sensitiveFields[name] = true
}
}
// RemoveSensitiveField removes a field from the sensitive fields list
func (p *Pseudonymizer) RemoveSensitiveField(fieldName string) {
p.mu.Lock()
defer p.mu.Unlock()
delete(p.sensitiveFields, fieldName)
}
// IsSensitive checks if a field name is marked as sensitive
func (p *Pseudonymizer) IsSensitive(fieldName string) bool {
p.mu.RLock()
defer p.mu.RUnlock()
return p.sensitiveFields[fieldName]
}
// Pseudonymize deterministically pseudonymizes a value using HMAC-SHA256
// The same input will always produce the same output (deterministic)
func (p *Pseudonymizer) Pseudonymize(value interface{}) string {
p.mu.RLock()
hashLength := p.hashLength
secret := p.secret
p.mu.RUnlock()
// Convert value to string
valueStr := fmt.Sprintf("%v", value)
// Create HMAC-SHA256 hash
h := hmac.New(sha256.New, secret)
h.Write([]byte(valueStr))
hash := h.Sum(nil)
// Convert to hex and truncate to desired length
hexHash := hex.EncodeToString(hash)
if len(hexHash) > hashLength {
hexHash = hexHash[:hashLength]
}
return hexHash
}
// PseudonymizeFields applies pseudonymization to sensitive fields in a map
func (p *Pseudonymizer) PseudonymizeFields(fields map[string]interface{}) map[string]interface{} {
p.mu.RLock()
sensitiveFields := make(map[string]bool)
for k, v := range p.sensitiveFields {
sensitiveFields[k] = v
}
p.mu.RUnlock()
result := make(map[string]interface{})
for key, value := range fields {
if sensitiveFields[key] {
// Pseudonymize sensitive fields
result[key] = p.Pseudonymize(value)
} else {
// Keep non-sensitive fields as-is
result[key] = value
}
}
return result
}