<?php
/**
 * Internal Linking Module
 *
 * Provides basic internal linking suggestions and editor integration.
 *
 * @package ProRank\SEO\Modules\Content
 * @since   1.0.0
 */

declare(strict_types=1);

namespace ProRank\SEO\Modules\Content;

defined( 'ABSPATH' ) || exit;

use ProRank\SEO\Core\RestApi\LinkingController;
use ProRank\SEO\Modules\BaseModule;

class InternalLinkingModule extends BaseModule {
    protected string $slug = 'internal-linking';

    protected string $name = 'Internal Linking';

    protected string $description = 'Get basic internal linking suggestions while editing content.';

    protected string $feature_tier = 'free';

    protected ?string $parent_slug = 'on-page-seo';

    public function init_hooks(): void {
        add_action('rest_api_init', [$this, 'register_rest_controller'], 5);
        add_filter('wp_insert_post_data', [$this, 'maybe_auto_link_pre_save'], 20, 2);

        if (class_exists('\\ProRank\\SEO\\Admin\\EditorIntegration')) {
            \ProRank\SEO\Admin\EditorIntegration::init();
        }
    }

    public function register_rest_controller(): void {
        if (class_exists(LinkingController::class)) {
            (new LinkingController())->register_routes();
        }
    }

    public function maybe_auto_link_pre_save(array $data, array $postarr): array {
        static $processing = [];

        if (defined('DOING_AUTOSAVE') && DOING_AUTOSAVE) {
            return $data;
        }

        $post_id = isset($postarr['ID']) ? (int) $postarr['ID'] : 0;
        if ($post_id && (wp_is_post_autosave($post_id) || wp_is_post_revision($post_id))) {
            return $data;
        }

        $processing_key = $post_id ?: md5((string) ($data['post_content'] ?? ''));
        if (!empty($processing[$processing_key])) {
            return $data;
        }

        $post_status = $data['post_status'] ?? '';
        if (!in_array($post_status, ['publish', 'draft', 'pending', 'future', 'private'], true)) {
            return $data;
        }

        if ($post_id > 0) {
            if (!current_user_can('edit_post', $post_id)) {
                return $data;
            }
        } else {
            if (!current_user_can('edit_posts')) {
                return $data;
            }
        }

        $settings = function_exists('prorank_get_internal_linking_settings')
            ? \prorank_get_internal_linking_settings()
            : [];

        if (empty($settings['enabled']) || empty($settings['auto_linking'])) {
            return $data;
        }

        $post_type = $data['post_type'] ?? '';
        $allowed_post_types = $settings['link_post_types'] ?? ['post', 'page'];
        if (!is_array($allowed_post_types)) {
            $allowed_post_types = array_filter((array) $allowed_post_types);
        }
        if (!empty($allowed_post_types) && $post_type !== '' && !in_array($post_type, $allowed_post_types, true)) {
            return $data;
        }

        $content = (string) ($data['post_content'] ?? '');
        if ($content === '') {
            return $data;
        }

        $word_count = str_word_count(wp_strip_all_tags($content));
        if ($word_count < (int) ($settings['min_word_count'] ?? 0)) {
            return $data;
        }

        $rules = get_option('prorank_auto_link_rules', []);
        if (!is_array($rules) || empty($rules)) {
            return $data;
        }

        $active_rules = array_filter($rules, function($rule) {
            return isset($rule['enabled']) && $rule['enabled'];
        });

        if (empty($active_rules)) {
            return $data;
        }

        usort($active_rules, function($a, $b) {
            return ($b['priority'] ?? 0) - ($a['priority'] ?? 0);
        });

        $processing[$processing_key] = true;

        $result = $this->apply_auto_link_rules($content, $settings, $active_rules, $post_id);
        if ($result['links_added'] > 0 && $result['content'] !== $content) {
            $data['post_content'] = $result['content'];

            update_option('prorank_auto_link_last_run', current_time('mysql'));
            update_option(
                'prorank_auto_links_created',
                get_option('prorank_auto_links_created', 0) + $result['links_added']
            );

            foreach ($result['rules_used'] as $rule_id) {
                $current = get_option('prorank_auto_link_count_' . $rule_id, 0);
                update_option('prorank_auto_link_count_' . $rule_id, $current + 1);
            }
        }

        unset($processing[$processing_key]);

        return $data;
    }

    private function apply_auto_link_rules(string $content, array $settings, array $rules, int $post_id): array {
        $max_links_per_post = (int) ($settings['max_links_per_post'] ?? 3);
        if ($max_links_per_post < 1) {
            $max_links_per_post = 1;
        }

        $max_links_per_paragraph = (int) ($settings['max_links_per_paragraph'] ?? 2);
        if ($max_links_per_paragraph < 1) {
            $max_links_per_paragraph = 1;
        }

        $links_added = 0;
        $used_anchors = [];
        $rules_used = [];

        foreach ($rules as $rule) {
            if ($links_added >= $max_links_per_post) {
                break;
            }

            $keyword = trim((string) ($rule['keyword'] ?? ''));
            $target_url = trim((string) ($rule['target_url'] ?? ''));

            if ($keyword === '' || $target_url === '') {
                continue;
            }

            if (!empty($rule['exclude_posts'])) {
                $excluded = array_map('trim', explode(',', (string) $rule['exclude_posts']));
                if (in_array((string) $post_id, $excluded, true)) {
                    continue;
                }
            }

            if (stripos(wp_strip_all_tags($content), $keyword) === false) {
                continue;
            }

            if (in_array(strtolower($keyword), $used_anchors, true)) {
                continue;
            }

            $max_per_rule = (int) ($rule['max_links_per_post'] ?? 1);
            if ($max_per_rule < 1) {
                $max_per_rule = 1;
            }

            $link_html = $this->build_auto_link_html($target_url, $keyword, $rule, $settings);
            $result = $this->safe_link_insert(
                $content,
                $keyword,
                $link_html,
                min($max_per_rule, $max_links_per_post - $links_added),
                $max_links_per_paragraph,
                $used_anchors
            );

            if ($result['count'] > 0) {
                $content = $result['content'];
                $links_added += $result['count'];
                if (!empty($rule['id'])) {
                    $rules_used[$rule['id']] = true;
                }
            }
        }

        return [
            'content' => $content,
            'links_added' => $links_added,
            'rules_used' => array_keys($rules_used),
        ];
    }

    private function build_auto_link_html(string $target_url, string $keyword, array $rule, array $settings): string {
        $target_host = wp_parse_url($target_url, PHP_URL_HOST);
        $site_host = wp_parse_url(home_url(), PHP_URL_HOST);
        $is_external = !empty($target_host) && !empty($site_host) && !hash_equals($site_host, $target_host);

        $open_in_new_tab = $is_external
            ? (!empty($settings['open_external_new_tab']) || !empty($settings['open_in_new_tab']))
            : (!empty($settings['open_internal_new_tab']) || !empty($settings['open_in_new_tab']));

        $rel_parts = [];
        if ($open_in_new_tab) {
            $rel_parts[] = 'noopener';
            $rel_parts[] = 'noreferrer';
        }
        if ($is_external && !empty($settings['add_nofollow'])) {
            $rel_parts[] = 'nofollow';
        }

        $rel_attr = $rel_parts
            ? ' rel="' . esc_attr(implode(' ', array_unique($rel_parts))) . '"'
            : '';
        $target_attr = $open_in_new_tab ? ' target="_blank"' : '';
        $title_attr = '';

        if (!empty($settings['add_title_attribute'])) {
            $title_value = $rule['target_title'] ?? $keyword;
            $title_attr = ' title="' . esc_attr($title_value) . '"';
        }

        return sprintf(
            '<a href="%s"%s%s%s>$0</a>',
            esc_url($target_url),
            $target_attr,
            $rel_attr,
            $title_attr
        );
    }

    private function safe_link_insert(
        string $content,
        string $keyword,
        string $link_html,
        int $max_per_post,
        int $max_per_paragraph = 2,
        array &$used_anchors = []
    ): array {
        $skip_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'nav', 'footer', 'header', 'button', 'a', 'script', 'style', 'code', 'pre'];
        $pattern = '/\b(' . preg_quote($keyword, '/') . ')\b/iu';
        $total_count = 0;
        $paragraph_link_counts = [];

        $dom = new \DOMDocument();
        libxml_use_internal_errors(true);

        $wrapped = '<div>' . wp_check_invalid_utf8($content) . '</div>';
        $dom->loadHTML('<?xml encoding="utf-8" ?>' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
        libxml_clear_errors();

        $xpath = new \DOMXPath($dom);
        $text_nodes = $xpath->query('//text()');

        foreach ($text_nodes as $text_node) {
            if ($total_count >= $max_per_post) {
                break;
            }

            $parent = $text_node->parentNode;
            if (!$parent) {
                continue;
            }

            $current = $parent;
            $should_skip = false;
            while ($current && $current->nodeName !== 'div') {
                if (in_array(strtolower($current->nodeName), $skip_tags, true)) {
                    $should_skip = true;
                    break;
                }
                $current = $current->parentNode;
            }
            if ($should_skip) {
                continue;
            }

            $paragraph_id = spl_object_id($parent);
            if (!isset($paragraph_link_counts[$paragraph_id])) {
                $paragraph_link_counts[$paragraph_id] = 0;
            }
            if ($paragraph_link_counts[$paragraph_id] >= $max_per_paragraph) {
                continue;
            }

            $paragraph_text = $parent->textContent ?? '';
            if (strlen(trim($paragraph_text)) < 50) {
                continue;
            }

            $text = $text_node->nodeValue;
            if (!preg_match($pattern, $text)) {
                continue;
            }

            $anchor_lower = strtolower($keyword);
            if (in_array($anchor_lower, $used_anchors, true)) {
                continue;
            }

            $new_text = preg_replace_callback(
                $pattern,
                function ($matches) use ($link_html, &$total_count, $max_per_post, &$paragraph_link_counts, $paragraph_id, $max_per_paragraph, &$used_anchors) {
                    if ($total_count >= $max_per_post) {
                        return $matches[0];
                    }
                    if ($paragraph_link_counts[$paragraph_id] >= $max_per_paragraph) {
                        return $matches[0];
                    }

                    $total_count++;
                    $paragraph_link_counts[$paragraph_id]++;
                    $used_anchors[] = strtolower($matches[1]);

                    return str_replace('$0', $matches[1], $link_html);
                },
                $text,
                1
            );

            if ($new_text !== $text) {
                $fragment = $dom->createDocumentFragment();
                $fragment->appendXML($new_text);
                $parent->replaceChild($fragment, $text_node);
            }
        }

        $result = $dom->saveHTML();
        $result = preg_replace('/^.*?<div>(.*)<\/div>.*$/s', '$1', $result);

        return [
            'content' => $result,
            'count' => $total_count,
        ];
    }
}
