<?php
/** cms-core/backend/security/xss.php - ULTRA SECURE VERSION */

/**
 * XSS Prevention - OWASP Compliant
 * Cross-Site Scripting Protection with aggressive filtering
 */

class XSS {
    
    /**
     * Dangerous patterns that must be blocked
     */
    private static $dangerousPatterns = [
        // Script tags
        '/<script\b[^>]*>.*?<\/script>/is',
        '/<script\b[^>]*>/is',
        
        // Event handlers
        '/\bon\w+\s*=/is',  // onclick, onerror, onload, etc.
        
        // JavaScript protocols
        '/javascript:/is',
        '/vbscript:/is',
        '/data:text\/html/is',
        
        // Meta refresh
        '/<meta[^>]*refresh/is',
        
        // iframes
        '/<iframe\b[^>]*>/is',
        
        // Object/Embed
        '/<(object|embed)\b[^>]*>/is',
        
        // Form actions
        '/<form[^>]*action\s*=\s*["\']?javascript:/is',
        
        // Base tag
        '/<base\b[^>]*>/is',
        
        // Link with javascript
        '/<link[^>]*href\s*=\s*["\']?javascript:/is',
        
        // SVG with scripts
        '/<svg\b[^>]*onload/is',
        
        // Expression (old IE)
        '/expression\s*\(/is',
        
        // Import
        '/@import/is',
        
        // Marquee with events
        '/<marquee\b[^>]*on\w+/is',
        
        // Details with events
        '/<details\b[^>]*on\w+/is',
        
        // Body with events
        '/<body\b[^>]*on\w+/is',
        
        // Input with autofocus and events
        '/<input\b[^>]*(?:on\w+|autofocus)/is',
    ];
    
    /**
     * Bereinigt String von potenziell gefährlichem Code - AGGRESSIVE
     */
    public static function clean($data) {
        if (is_array($data)) {
            return array_map([self::class, 'clean'], $data);
        }
        
        if (!is_string($data)) {
            return $data;
        }
        
        // Step 1: Entferne NULL-Bytes
        $data = str_replace(chr(0), '', $data);
        
        // Step 2: Entferne alle gefährlichen Patterns
        foreach (self::$dangerousPatterns as $pattern) {
            $data = preg_replace($pattern, '', $data);
        }
        
        // Step 3: Strip ALL tags except whitelisted
        $allowedTags = '<p><br><strong><em><u><h1><h2><h3><h4><h5><h6><ul><ol><li><blockquote><code><pre>';
        $data = strip_tags($data, $allowedTags);
        
        // Step 4: HTML-Entities encodieren für Attribute
        // This is the final safety net
        $data = htmlspecialchars($data, ENT_QUOTES | ENT_HTML5, 'UTF-8');
        
        return $data;
    }
    
    /**
     * ULTRA AGGRESSIVE - Removes ALL HTML, only allows plain text
     */
    public static function cleanStrict($data) {
        if (is_array($data)) {
            return array_map([self::class, 'cleanStrict'], $data);
        }
        
        if (!is_string($data)) {
            return $data;
        }
        
        // Remove all HTML tags
        $data = strip_tags($data);
        
        // Encode everything
        $data = htmlspecialchars($data, ENT_QUOTES | ENT_HTML5, 'UTF-8');
        
        return $data;
    }
    
    /**
     * Bereinigt HTML-Inhalt (erlaubt sichere Tags) - FOR RICH TEXT EDITORS
     */
    public static function cleanHTML($html) {
        // Erlaubte Tags für Rich-Text-Editor
        $allowedTags = '<p><br><strong><em><u><h1><h2><h3><h4><h5><h6><ul><ol><li><a><img><blockquote><code><pre><span><div>';
        
        // Strip gefährliche Tags FIRST
        foreach (self::$dangerousPatterns as $pattern) {
            $html = preg_replace($pattern, '', $html);
        }
        
        // Strip non-allowed tags
        $html = strip_tags($html, $allowedTags);
        
        // Bereinige Attribute (erlaube nur sichere)
        $html = preg_replace_callback(
            '/<(\w+)([^>]*)>/i',
            function($matches) {
                $tag = $matches[1];
                $attributes = $matches[2];
                
                // Erlaubte Attribute pro Tag
                $allowedAttrs = [
                    'a' => ['href', 'title', 'target'],
                    'img' => ['src', 'alt', 'title', 'width', 'height'],
                    'span' => ['style'],
                    'div' => ['style'],
                    'all' => ['class', 'id']
                ];
                
                $attrs = $allowedAttrs[$tag] ?? [];
                $attrs = array_merge($attrs, $allowedAttrs['all']);
                
                // Filtere Attribute
                $cleanAttrs = '';
                if (preg_match_all('/(\w+)=["\']([^"\']*)["\']/', $attributes, $attrMatches, PREG_SET_ORDER)) {
                    foreach ($attrMatches as $attr) {
                        $attrName = strtolower($attr[1]);
                        $attrValue = $attr[2];
                        
                        // Block ALL event handlers
                        if (preg_match('/^on/i', $attrName)) {
                            continue;
                        }
                        
                        // Nur erlaubte Attribute
                        if (in_array($attrName, $attrs)) {
                            // Prüfe auf Javascript in href/src
                            if (($attrName === 'href' || $attrName === 'src')) {
                                if (preg_match('/(javascript|vbscript|data:text\/html)/i', $attrValue)) {
                                    continue;
                                }
                            }
                            
                            // Check for expression() in style
                            if ($attrName === 'style' && preg_match('/expression\s*\(/i', $attrValue)) {
                                continue;
                            }
                            
                            $cleanAttrs .= ' ' . $attrName . '="' . htmlspecialchars($attrValue, ENT_QUOTES) . '"';
                        }
                    }
                }
                
                return '<' . $tag . $cleanAttrs . '>';
            },
            $html
        );
        
        return $html;
    }
    
    /**
     * Validiert URL gegen XSS
     */
    public static function validateURL($url) {
        // Entferne Whitespace
        $url = trim($url);
        
        // Prüfe auf gefährliche Protokolle
        if (preg_match('/^(javascript|data|vbscript|file|about):/i', $url)) {
            return false;
        }
        
        // Allow only http, https, mailto, tel
        if (!preg_match('/^(https?|mailto|tel):/i', $url) && $url[0] !== '/') {
            return false;
        }
        
        return true;
    }
    
    /**
     * Checks if string contains XSS attempts
     */
    public static function containsXSS($data) {
        if (!is_string($data)) {
            return false;
        }
        
        foreach (self::$dangerousPatterns as $pattern) {
            if (preg_match($pattern, $data)) {
                return true;
            }
        }
        
        return false;
    }
    
    /**
     * Bereinigt $_GET, $_POST, $_COOKIE Arrays
     */
    public static function cleanGlobals() {
        $_GET = self::clean($_GET);
        $_POST = self::clean($_POST);
        $_COOKIE = self::clean($_COOKIE);
    }
    
    /**
     * Get safe filename (for uploads)
     */
    public static function cleanFilename($filename) {
        // Remove directory traversal
        $filename = basename($filename);
        
        // Remove special characters
        $filename = preg_replace('/[^a-zA-Z0-9._-]/', '_', $filename);
        
        // Prevent double extensions
        $filename = preg_replace('/\.+/', '.', $filename);
        
        return $filename;
    }
}