package masterlog import ( "crypto/hmac" "crypto/sha256" "encoding/hex" "fmt" "os" "sync" ) // Pseudonymizer handles deterministic pseudonymization of sensitive data type Pseudonymizer struct { secret []byte mu sync.RWMutex // sensitiveFields is a set of field names that should be pseudonymized sensitiveFields map[string]bool // hashLength is the length of the pseudonymized value (in hex characters) hashLength int } // NewPseudonymizer creates a new pseudonymizer with the given secret // The secret should be kept secure and consistent across application restarts // to ensure deterministic pseudonymization func NewPseudonymizer(secret []byte) *Pseudonymizer { return &Pseudonymizer{ secret: secret, sensitiveFields: make(map[string]bool), hashLength: 8, // Default: 8 hex characters (4 bytes) } } // NewPseudonymizerFromString creates a new pseudonymizer from a string secret func NewPseudonymizerFromString(secret string) *Pseudonymizer { return NewPseudonymizer([]byte(secret)) } func NewPseudonymizerFromEnv(envVar string) *Pseudonymizer { secret := os.Getenv(envVar) if secret == "" { panic(fmt.Errorf("secret not found in environment variable %s", envVar)) } return NewPseudonymizerFromString(secret) } // SetHashLength sets the length of the pseudonymized hash (in hex characters) // Default is 8. Must be between 4 and 64 (max SHA256 hex length) func (p *Pseudonymizer) SetHashLength(length int) error { if length < 4 || length > 64 { return fmt.Errorf("hash length must be between 4 and 64, got %d", length) } p.mu.Lock() defer p.mu.Unlock() p.hashLength = length return nil } // AddSensitiveField marks a field name as sensitive, so it will be pseudonymized func (p *Pseudonymizer) AddSensitiveField(fieldName string) { p.mu.Lock() defer p.mu.Unlock() p.sensitiveFields[fieldName] = true } // AddSensitiveFields marks multiple field names as sensitive func (p *Pseudonymizer) AddSensitiveFields(fieldNames ...string) { p.mu.Lock() defer p.mu.Unlock() for _, name := range fieldNames { p.sensitiveFields[name] = true } } // RemoveSensitiveField removes a field from the sensitive fields list func (p *Pseudonymizer) RemoveSensitiveField(fieldName string) { p.mu.Lock() defer p.mu.Unlock() delete(p.sensitiveFields, fieldName) } // IsSensitive checks if a field name is marked as sensitive func (p *Pseudonymizer) IsSensitive(fieldName string) bool { p.mu.RLock() defer p.mu.RUnlock() return p.sensitiveFields[fieldName] } // Pseudonymize deterministically pseudonymizes a value using HMAC-SHA256 // The same input will always produce the same output (deterministic) func (p *Pseudonymizer) Pseudonymize(value interface{}) string { p.mu.RLock() hashLength := p.hashLength secret := p.secret p.mu.RUnlock() // Convert value to string valueStr := fmt.Sprintf("%v", value) // Create HMAC-SHA256 hash h := hmac.New(sha256.New, secret) h.Write([]byte(valueStr)) hash := h.Sum(nil) // Convert to hex and truncate to desired length hexHash := hex.EncodeToString(hash) if len(hexHash) > hashLength { hexHash = hexHash[:hashLength] } return hexHash } // PseudonymizeFields applies pseudonymization to sensitive fields in a map func (p *Pseudonymizer) PseudonymizeFields(fields map[string]interface{}) map[string]interface{} { p.mu.RLock() sensitiveFields := make(map[string]bool) for k, v := range p.sensitiveFields { sensitiveFields[k] = v } p.mu.RUnlock() result := make(map[string]interface{}) for key, value := range fields { if sensitiveFields[key] { // Pseudonymize sensitive fields result[key] = p.Pseudonymize(value) } else { // Keep non-sensitive fields as-is result[key] = value } } return result }