<?php
// phpcs:disable WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching, WordPress.DB.DirectDatabaseQuery.SchemaChange, WordPress.DB.SlowDBQuery, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_post__not_in, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude
declare(strict_types=1);

/**
 * LLMs.txt Service
 *
 * @package ProRank\Core\Services
 */

namespace ProRank\SEO\Core\Services;

defined( 'ABSPATH' ) || exit;

/**
 * Generates llms.txt and llms-full.txt files
 */
class LlmsTxtService {
    
    /**
     * Site information
     *
     * @var array
     */
    private array $site_info;
    
    /**
     * Module settings
     *
     * @var array
     */
    private array $settings;
    
    /**
     * Constructor
     */
    public function __construct() {
        $this->site_info = [
            'name' => get_bloginfo('name'),
            'description' => get_bloginfo('description'),
            'url' => home_url(),
            'language' => get_locale(),
        ];
        
        $this->settings = $this->get_settings();
    }
    
    /**
     * Generate llms.txt index file
     *
     * @param bool $preview Preview mode (don't cache)
     * @return string|null
     */
    public function generate_index(bool $preview = false): ?string {
        $content = [];
        
        // Site title
        $content[] = '# ' . $this->site_info['name'];
        $content[] = '';
        
        // Site description
        if (!empty($this->site_info['description'])) {
            $content[] = '> ' . $this->site_info['description'];
            $content[] = '';
        }
        
        // Site URL and basic info
        $content[] = 'Website: ' . $this->site_info['url'];
        $content[] = 'Language: ' . $this->get_language_name($this->site_info['language']);
        $content[] = '';
        
        // Get pages based on mode
        $pages = $this->settings['mode'] === 'manual' 
            ? $this->get_manual_pages() 
            : $this->get_automatic_pages();
        
        // Group pages by type
        $grouped_pages = $this->group_pages($pages);
        
        // Add essential pages section
        if (!empty($grouped_pages['essential'])) {
            $content[] = '## Essential Pages';
            $content[] = '';
            foreach ($grouped_pages['essential'] as $page) {
                $content[] = $this->format_page_link($page);
            }
            $content[] = '';
        }
        
        // Add main content section
        if (!empty($grouped_pages['content'])) {
            $content[] = '## Content';
            $content[] = '';
            foreach ($grouped_pages['content'] as $page) {
                $content[] = $this->format_page_link($page);
            }
            $content[] = '';
        }
        
        // Add products section if WooCommerce
        if (!empty($grouped_pages['products'])) {
            $content[] = '## Products & Services';
            $content[] = '';
            foreach ($grouped_pages['products'] as $page) {
                $content[] = $this->format_page_link($page);
            }
            $content[] = '';
        }
        
        // Add optional section for less important pages
        if (!empty($grouped_pages['optional'])) {
            $content[] = '## Optional';
            $content[] = '';
            foreach ($grouped_pages['optional'] as $page) {
                $content[] = $this->format_page_link($page);
            }
            $content[] = '';
        }
        
        // Add metadata
        $content[] = '---';
        $content[] = '';
        $content[] = '_Generated: ' . wp_date('Y-m-d H:i:s T') . '_';
        $content[] = '_By: ProRank SEO Plugin_';
        
        return implode("\n", $content);
    }
    
    /**
     * Generate llms-full.txt complete documentation file
     *
     * @param bool $preview Preview mode (don't cache)
     * @return string|null
     */
    public function generate_full(bool $preview = false): ?string {
        $content = [];
        
        // Site header
        $content[] = '# ' . $this->site_info['name'];
        $content[] = '';
        
        if (!empty($this->site_info['description'])) {
            $content[] = '> ' . $this->site_info['description'];
            $content[] = '';
        }
        
        $content[] = 'Website: ' . $this->site_info['url'];
        $content[] = 'Language: ' . $this->get_language_name($this->site_info['language']);
        $content[] = '';
        $content[] = '---';
        $content[] = '';
        
        // Get pages
        $pages = $this->settings['mode'] === 'manual' 
            ? $this->get_manual_pages() 
            : $this->get_automatic_pages();
        
        // Add full content for each page
        foreach ($pages as $page) {
            $post = get_post($page['id']);
            if (!$post) {
                continue;
            }
            
            // Page title
            $content[] = '## ' . $post->post_title;
            $content[] = '';
            
            // Page URL
            $content[] = '_URL: ' . get_permalink($post) . '_';
            $content[] = '';
            
            // Convert content to markdown
            $markdown = $this->html_to_markdown($post->post_content);
            
            // Limit content length per page (to manage context windows)
            $max_length = 5000; // Characters per page
            if (strlen($markdown) > $max_length) {
                $markdown = substr($markdown, 0, $max_length) . '...';
            }
            
            $content[] = $markdown;
            $content[] = '';
            $content[] = '---';
            $content[] = '';
        }
        
        // Add metadata
        $content[] = '_Generated: ' . wp_date('Y-m-d H:i:s T') . '_';
        $content[] = '_Total pages included: ' . count($pages) . '_';
        $content[] = '_By: ProRank SEO Plugin_';
        
        return implode("\n", $content);
    }
    
    /**
     * Get pages for automatic mode
     *
     * @return array
     */
    private function get_automatic_pages(): array {
        $pages = [];
        
        // Get homepage
        $homepage_id = get_option('page_on_front');
        if ($homepage_id) {
            $pages[] = [
                'id' => $homepage_id,
                'type' => 'essential',
                'priority' => 1,
            ];
        }
        
        // Try to find common pages
        $common_pages = [
            'about' => ['about', 'about-us', 'who-we-are'],
            'contact' => ['contact', 'contact-us', 'get-in-touch'],
            'privacy' => ['privacy', 'privacy-policy'],
            'terms' => ['terms', 'terms-of-service', 'terms-and-conditions'],
        ];
        
        foreach ($common_pages as $type => $slugs) {
            foreach ($slugs as $slug) {
                $page = get_page_by_path($slug);
                if ($page && $page->post_status === 'publish') {
                    $pages[] = [
                        'id' => $page->ID,
                        'type' => 'essential',
                        'priority' => 2,
                    ];
                    break;
                }
            }
        }
        
        // Get shop page if WooCommerce
        if (class_exists('WooCommerce') && $this->settings['include_products']) {
            $shop_page_id = wc_get_page_id('shop');
            if ($shop_page_id > 0) {
                $pages[] = [
                    'id' => $shop_page_id,
                    'type' => 'essential',
                    'priority' => 2,
                ];
            }
        }
        
        // Get recent important content
        $content_types = [];
        
        if ($this->settings['include_posts']) {
            $content_types[] = 'post';
        }
        
        if ($this->settings['include_pages']) {
            $content_types[] = 'page';
        }
        
        if ($this->settings['include_products'] && class_exists('WooCommerce')) {
            $content_types[] = 'product';
        }
        
        if (!empty($content_types)) {
            $recent_posts = get_posts([
                'post_type' => $content_types,
                'posts_per_page' => $this->settings['max_pages'],
                'post_status' => 'publish',
                'orderby' => 'date',
                'order' => 'DESC',
                'meta_key' => '_wp_page_template',
                'meta_value' => 'default',
                'meta_compare' => '!=',
            ]);
            
            foreach ($recent_posts as $post) {
                // Skip if already added
                $exists = false;
                foreach ($pages as $existing) {
                    if ($existing['id'] === $post->ID) {
                        $exists = true;
                        break;
                    }
                }
                
                if (!$exists) {
                    $pages[] = [
                        'id' => $post->ID,
                        'type' => $post->post_type === 'product' ? 'products' : 'content',
                        'priority' => 3,
                    ];
                }
            }
        }
        
        // Limit to max pages setting
        usort($pages, function($a, $b) {
            return $a['priority'] <=> $b['priority'];
        });
        
        return array_slice($pages, 0, $this->settings['max_pages']);
    }
    
    /**
     * Get pages for manual mode
     *
     * @return array
     */
    private function get_manual_pages(): array {
        $pages = [];
        
        if (empty($this->settings['pages'])) {
            return $this->get_automatic_pages(); // Fallback to automatic
        }
        
        foreach ($this->settings['pages'] as $page_config) {
            if (empty($page_config['page_id'])) {
                continue;
            }
            
            $post = get_post($page_config['page_id']);
            if (!$post || $post->post_status !== 'publish') {
                continue;
            }
            
            $pages[] = [
                'id' => $post->ID,
                'type' => $page_config['type'] ?? 'content',
                'priority' => $this->get_type_priority($page_config['type'] ?? 'content'),
            ];
        }
        
        return $pages;
    }
    
    /**
     * Group pages by type
     *
     * @param array $pages Pages array
     * @return array
     */
    private function group_pages(array $pages): array {
        $grouped = [
            'essential' => [],
            'content' => [],
            'products' => [],
            'optional' => [],
        ];
        
        foreach ($pages as $page) {
            $post = get_post($page['id']);
            if (!$post) {
                continue;
            }
            
            $page_data = [
                'id' => $post->ID,
                'title' => $post->post_title,
                'url' => get_permalink($post),
                'excerpt' => $this->get_excerpt($post),
            ];
            
            $type = $page['type'] ?? 'content';
            
            if ($type === 'essential' || in_array($type, ['about', 'contact', 'terms', 'privacy', 'shop'])) {
                $grouped['essential'][] = $page_data;
            } elseif ($type === 'products' || $post->post_type === 'product') {
                $grouped['products'][] = $page_data;
            } elseif ($page['priority'] > 3) {
                $grouped['optional'][] = $page_data;
            } else {
                $grouped['content'][] = $page_data;
            }
        }
        
        return $grouped;
    }
    
    /**
     * Format page link for llms.txt
     *
     * @param array $page Page data
     * @return string
     */
    private function format_page_link(array $page): string {
        $link = '- [' . $page['title'] . '](' . $page['url'] . ')';
        
        if (!empty($page['excerpt'])) {
            $link .= ': ' . $page['excerpt'];
        }
        
        return $link;
    }
    
    /**
     * Get excerpt from post
     *
     * @param \WP_Post $post Post object
     * @return string
     */
    private function get_excerpt(\WP_Post $post): string {
        if (!empty($post->post_excerpt)) {
            return wp_strip_all_tags($post->post_excerpt);
        }
        
        $content = wp_strip_all_tags($post->post_content);
        $content = str_replace("\n", ' ', $content);
        $content = preg_replace('/\s+/', ' ', $content);
        
        if (strlen($content) > 150) {
            $content = substr($content, 0, 147) . '...';
        }
        
        return $content;
    }
    
    /**
     * Convert HTML to Markdown
     *
     * @param string $html HTML content
     * @return string
     */
    private function html_to_markdown(string $html): string {
        // Remove shortcodes
        $html = strip_shortcodes($html);
        
        // Convert blocks to HTML if Gutenberg
        if (has_blocks($html)) {
            $html = do_blocks($html);
        }
        
        // Basic HTML to Markdown conversion
        $replacements = [
            // Headers
            '/<h1[^>]*>(.*?)<\/h1>/is' => '# $1',
            '/<h2[^>]*>(.*?)<\/h2>/is' => '## $1',
            '/<h3[^>]*>(.*?)<\/h3>/is' => '### $1',
            '/<h4[^>]*>(.*?)<\/h4>/is' => '#### $1',
            '/<h5[^>]*>(.*?)<\/h5>/is' => '##### $1',
            '/<h6[^>]*>(.*?)<\/h6>/is' => '###### $1',
            
            // Text formatting
            '/<strong[^>]*>(.*?)<\/strong>/is' => '**$1**',
            '/<b[^>]*>(.*?)<\/b>/is' => '**$1**',
            '/<em[^>]*>(.*?)<\/em>/is' => '_$1_',
            '/<i[^>]*>(.*?)<\/i>/is' => '_$1_',
            
            // Links
            '/<a[^>]+href="([^"]+)"[^>]*>(.*?)<\/a>/is' => '[$2]($1)',
            
            // Lists
            '/<ul[^>]*>/is' => '',
            '/<\/ul>/is' => "\n",
            '/<ol[^>]*>/is' => '',
            '/<\/ol>/is' => "\n",
            '/<li[^>]*>(.*?)<\/li>/is' => '- $1',
            
            // Paragraphs and breaks
            '/<p[^>]*>/is' => '',
            '/<\/p>/is' => "\n\n",
            '/<br[^>]*>/is' => "\n",
            
            // Remove remaining HTML
            '/<[^>]+>/is' => '',
        ];
        
        foreach ($replacements as $pattern => $replacement) {
            $html = preg_replace($pattern, $replacement, $html);
        }
        
        // Clean up whitespace
        $html = trim($html);
        $html = preg_replace('/\n{3,}/', "\n\n", $html);
        
        // Decode HTML entities
        $html = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8');
        
        return $html;
    }
    
    /**
     * Get language name from locale
     *
     * @param string $locale Locale code
     * @return string
     */
    private function get_language_name(string $locale): string {
        $languages = [
            'en_US' => 'English (US)',
            'en_GB' => 'English (UK)',
            'es_ES' => 'Spanish',
            'fr_FR' => 'French',
            'de_DE' => 'German',
            'it_IT' => 'Italian',
            'pt_BR' => 'Portuguese (Brazil)',
            'ru_RU' => 'Russian',
            'ja' => 'Japanese',
            'zh_CN' => 'Chinese (Simplified)',
        ];
        
        return $languages[$locale] ?? $locale;
    }
    
    /**
     * Get type priority
     *
     * @param string $type Page type
     * @return int
     */
    private function get_type_priority(string $type): int {
        $priorities = [
            'about' => 1,
            'contact' => 1,
            'terms' => 2,
            'privacy' => 2,
            'shop' => 2,
            'essential' => 2,
            'content' => 3,
            'products' => 3,
            'optional' => 4,
        ];
        
        return $priorities[$type] ?? 3;
    }
    
    /**
     * Get module settings
     *
     * @return array
     */
    private function get_settings(): array {
        $defaults = [
            'enabled' => false,
            'mode' => 'automatic',
            'pages' => [],
            'include_posts' => true,
            'include_pages' => true,
            'include_products' => true,
            'max_pages' => 20,
        ];
        
        $settings = get_option('prorank_llms_txt_settings', []);
        return wp_parse_args($settings, $defaults);
    }
}