<?php
/**
 * Headline Analyzer - Google-Aligned Title Analysis
 *
 * Analyzes titles for rewrite risk and relevance based on Google Search Central guidance.
 * NOT CTR heuristics - focuses on what Google documents as rewrite/relevance signals.
 *
 * @package ProRank\SEO\Modules\Content
 * @since   1.0.0
 * @see     https://developers.google.com/search/docs/appearance/title-link
 * @see     https://developers.google.com/search/docs/fundamentals/creating-helpful-content
 */

declare(strict_types=1);

namespace ProRank\SEO\Modules\Content;

defined( 'ABSPATH' ) || exit;

/**
 * HeadlineAnalyzer class
 *
 * Provides Google-aligned headline analysis with 5 checks:
 * 1. Rewrite Risk - H1 mismatch, length issues
 * 2. Content Match - Title/content entity coverage
 * 3. Uniqueness - Duplicate/boilerplate detection
 * 4. Stuffing - Keyword repetition, separator abuse
 * 5. Schema Alignment - Article headline consistency
 */
class HeadlineAnalyzer {

    /**
     * English stopwords for tokenization
     *
     * @var array<string>
     */
    private const STOPWORDS = [
        'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for',
        'from', 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on',
        'that', 'the', 'to', 'was', 'were', 'will', 'with', 'this',
        'but', 'they', 'have', 'had', 'what', 'when', 'where', 'who',
        'which', 'why', 'how', 'all', 'each', 'every', 'both', 'few',
        'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not',
        'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just',
        'can', 'should', 'now', 'i', 'me', 'my', 'we', 'our', 'you',
        'your', 'his', 'her', 'their', 'its', 'been', 'being', 'do',
        'does', 'did', 'doing', 'would', 'could', 'might', 'must',
        'shall', 'into', 'through', 'during', 'before', 'after',
        'above', 'below', 'between', 'under', 'again', 'further',
        'then', 'once', 'here', 'there', 'any', 'about', 'out', 'up',
    ];

    /**
     * Per-character pixel widths for SERP truncation estimation
     * Based on Arial 16px (Google SERP font approximation)
     *
     * @var array<string, float>
     */
    private const CHAR_WIDTHS = [
        // Wide characters (~11-13px)
        'w' => 12.0, 'm' => 12.0, 'W' => 13.0, 'M' => 13.0,
        // Medium-wide (~9-10px)
        'o' => 9.0, 'e' => 9.0, 'a' => 9.0, 'd' => 9.0, 'g' => 9.0,
        'h' => 9.0, 'n' => 9.0, 'p' => 9.0, 'q' => 9.0, 'u' => 9.0,
        'v' => 8.0, 'x' => 8.0, 'y' => 8.0, 'z' => 8.0, 'b' => 9.0,
        'c' => 8.0, 'k' => 8.0, 's' => 8.0,
        'O' => 10.0, 'Q' => 10.0, 'D' => 10.0, 'G' => 10.0, 'H' => 10.0,
        'N' => 10.0, 'U' => 10.0, 'A' => 10.0, 'V' => 10.0, 'X' => 10.0,
        'Y' => 10.0, 'Z' => 10.0, 'B' => 10.0, 'C' => 10.0, 'E' => 10.0,
        'K' => 10.0, 'P' => 10.0, 'R' => 10.0, 'S' => 10.0, 'T' => 10.0,
        'F' => 9.0, 'J' => 7.0, 'L' => 8.0,
        // Narrow characters (~4-6px)
        'i' => 4.0, 'l' => 4.0, 'I' => 5.0, 'j' => 4.0, 't' => 5.0,
        'f' => 5.0, 'r' => 6.0,
        // Numbers (~8px)
        '0' => 8.0, '1' => 8.0, '2' => 8.0, '3' => 8.0, '4' => 8.0,
        '5' => 8.0, '6' => 8.0, '7' => 8.0, '8' => 8.0, '9' => 8.0,
        // Punctuation
        ' ' => 4.0, '.' => 4.0, ',' => 4.0, ':' => 4.0, ';' => 4.0,
        '-' => 5.0, '–' => 8.0, '—' => 12.0, '|' => 4.0,
        '!' => 4.0, '?' => 8.0, "'" => 3.0, '"' => 6.0,
    ];

    /**
     * SERP title display width limit in pixels (desktop)
     */
    private const SERP_WIDTH_LIMIT = 580;

    /**
     * Overall score weights
     */
    private const WEIGHT_REWRITE = 0.40;
    private const WEIGHT_CONTENT = 0.30;
    private const WEIGHT_UNIQUE = 0.15;
    private const WEIGHT_STUFFING = 0.10;
    private const WEIGHT_SCHEMA = 0.05;

    /**
     * Analyze a headline/title
     *
     * @param string $title   The title to analyze.
     * @param int    $post_id Optional post ID for context.
     * @return array{
     *   title: string,
     *   overall_score: int,
     *   rewrite_risk: array,
     *   content_match: array,
     *   uniqueness: array,
     *   stuffing: array,
     *   schema: array,
     *   length: array,
     *   issues: array,
     *   suggestions: array,
     *   language_warning: bool
     * }
     */
    public static function analyze( string $title, int $post_id = 0 ): array {
        $h1 = self::get_h1( $post_id );
        $intro = self::get_intro( $post_id, 200 );
        $entities = self::extract_entities( $post_id );
        $schema_headline = self::get_schema_headline( $post_id );
        $is_non_latin = self::is_non_latin( $title );

        // Run all 5 Google-aligned checks
        $rewrite_risk = self::analyze_rewrite_risk( $title, $h1, $post_id );
        $content_match = self::analyze_content_match( $title, $h1, $intro, $entities );
        $uniqueness = self::analyze_uniqueness( $title, $post_id );
        $stuffing = self::analyze_stuffing( $title );
        $schema = self::analyze_schema( $title, $schema_headline );

        // Calculate overall score with weights
        $overall = self::calculate_overall_score(
            $rewrite_risk['score'],
            $content_match['score'],
            $uniqueness['score'],
            $stuffing['score'],
            $schema['score'],
            $schema['applicable']
        );

        // Compile all issues and suggestions
        $issues = self::compile_issues( $rewrite_risk, $content_match, $uniqueness, $stuffing, $schema );
        $suggestions = self::generate_suggestions( $issues, $title, $h1 );

        $pixel_width = self::calculate_pixel_width( $title );

        return [
            'title'            => $title,
            'overall_score'    => $overall,
            'rewrite_risk'     => $rewrite_risk,
            'content_match'    => $content_match,
            'uniqueness'       => $uniqueness,
            'stuffing'         => $stuffing,
            'schema'           => $schema,
            'length'           => [
                'chars'       => mb_strlen( $title ),
                'words'       => str_word_count( $title ),
                'pixel_width' => $pixel_width,
                'truncated'   => $pixel_width > self::SERP_WIDTH_LIMIT,
            ],
            'issues'           => $issues,
            'suggestions'      => $suggestions,
            'language_warning' => $is_non_latin,
        ];
    }

    /**
     * 1. Title Link Rewrite Risk (0-100)
     *
     * Estimates risk of Google rewriting the title.
     * Does NOT include duplicate/boilerplate penalties (handled in Uniqueness).
     *
     * @param string      $title   Title to analyze.
     * @param string|null $h1      H1 heading text.
     * @param int         $post_id Post ID.
     * @return array{score: int, level: string, issues: array}
     */
    private static function analyze_rewrite_risk( string $title, ?string $h1, int $post_id ): array {
        $score = 100;
        $issues = [];

        // H1 mismatch check
        if ( $h1 !== null && $h1 !== '' ) {
            $similarity = self::jaccard_similarity( $title, $h1 );
            if ( $similarity < 0.2 ) {
                $score -= 30;
                $issues[] = [
                    'type'     => 'h1_mismatch',
                    'severity' => 'high',
                    'message'  => __( 'Title significantly different from H1 heading', 'prorank-seo' ),
                    'data'     => [ 'similarity' => round( $similarity, 2 ) ],
                ];
            } elseif ( $similarity < 0.4 ) {
                $score -= 15;
                $issues[] = [
                    'type'     => 'h1_mismatch',
                    'severity' => 'medium',
                    'message'  => __( 'Title doesn\'t align well with H1 heading', 'prorank-seo' ),
                    'data'     => [ 'similarity' => round( $similarity, 2 ) ],
                ];
            }
        }

        // Length checks using pixel width (chars as fallback)
        $pixel_width = self::calculate_pixel_width( $title );
        $char_len = mb_strlen( $title );

        if ( $char_len < 20 || $pixel_width < 200 ) {
            $score -= 10;
            $issues[] = [
                'type'     => 'too_short',
                'severity' => 'medium',
                'message'  => __( 'Title too short - Google may supplement with other content', 'prorank-seo' ),
                'data'     => [ 'chars' => $char_len, 'pixels' => $pixel_width ],
            ];
        } elseif ( $char_len > 120 || $pixel_width > 900 ) {
            $score -= 20;
            $issues[] = [
                'type'     => 'very_long',
                'severity' => 'high',
                'message'  => __( 'Title very long - will be truncated in search results', 'prorank-seo' ),
                'data'     => [ 'chars' => $char_len, 'pixels' => $pixel_width ],
            ];
        } elseif ( $char_len > 80 || $pixel_width > 600 ) {
            $score -= 10;
            $issues[] = [
                'type'     => 'long',
                'severity' => 'low',
                'message'  => __( 'Title may be truncated in search results', 'prorank-seo' ),
                'data'     => [ 'chars' => $char_len, 'pixels' => $pixel_width ],
            ];
        }

        $level = 'low';
        if ( $score < 60 ) {
            $level = 'high';
        } elseif ( $score < 80 ) {
            $level = 'medium';
        }

        return [
            'score'  => max( 0, $score ),
            'level'  => $level,
            'issues' => $issues,
        ];
    }

    /**
     * 2. Content Match / Entity Coverage (0-100)
     *
     * Ensures title reflects actual page content (Helpful Content guidance).
     * Formula: score = 100 × (0.7 × precision + 0.3 × recall)
     *
     * @param string      $title    Title to analyze.
     * @param string|null $h1       H1 heading text.
     * @param string      $intro    First 200 words of content.
     * @param array       $entities Top entities from content.
     * @return array{score: int, precision: float, recall: float, matched_terms: array, issues: array}
     */
    private static function analyze_content_match( string $title, ?string $h1, string $intro, array $entities ): array {
        $title_tokens = self::tokenize( $title );
        $content_tokens = array_unique( array_merge(
            self::tokenize( $h1 ?? '' ),
            self::tokenize( $intro ),
            array_map( 'strtolower', $entities )
        ) );

        if ( empty( $title_tokens ) ) {
            return [
                'score'         => 0,
                'precision'     => 0.0,
                'recall'        => 0.0,
                'matched_terms' => [],
                'issues'        => [],
            ];
        }

        // Precision: how many title words appear in content?
        $matches = array_intersect( $title_tokens, $content_tokens );
        $precision = count( $matches ) / count( $title_tokens );

        // Recall: how many top entities appear in title?
        $top_entities = array_slice( $entities, 0, 5 );
        $top_entities_lower = array_map( 'strtolower', $top_entities );
        $entity_matches = ! empty( $top_entities_lower )
            ? count( array_intersect( $title_tokens, $top_entities_lower ) ) / count( $top_entities_lower )
            : 1.0;
        $recall = $entity_matches;

        $score = (int) round( 100 * ( 0.7 * $precision + 0.3 * $recall ) );

        $issues = [];

        // Low precision: title terms not in content
        if ( $precision < 0.5 ) {
            $issues[] = [
                'type'     => 'content_mismatch',
                'severity' => 'high',
                'message'  => __( 'Title contains terms not supported by page content', 'prorank-seo' ),
                'data'     => [ 'precision' => round( $precision, 2 ) ],
            ];
        } elseif ( $precision < 0.7 ) {
            $issues[] = [
                'type'     => 'weak_content_match',
                'severity' => 'medium',
                'message'  => __( 'Some title terms are weakly supported by content', 'prorank-seo' ),
                'data'     => [ 'precision' => round( $precision, 2 ) ],
            ];
        }

        // Low recall: top content entities missing from title
        if ( ! empty( $top_entities ) && $recall < 0.4 ) {
            $issues[] = [
                'type'     => 'missing_entities',
                'severity' => 'medium',
                'message'  => __( 'Title misses key topics from your content', 'prorank-seo' ),
                'data'     => [ 'recall' => round( $recall, 2 ) ],
            ];
        }

        // Overall low score warning
        if ( $score < 60 && empty( $issues ) ) {
            $issues[] = [
                'type'     => 'low_content_match',
                'severity' => 'medium',
                'message'  => __( 'Title-content alignment could be improved', 'prorank-seo' ),
                'data'     => [ 'score' => $score ],
            ];
        }

        return [
            'score'         => $score,
            'precision'     => round( $precision, 2 ),
            'recall'        => round( $recall, 2 ),
            'matched_terms' => array_values( $matches ),
            'issues'        => $issues,
        ];
    }

    /**
     * 3. Uniqueness Scan (0-100)
     *
     * Detects duplicate and templated titles across the site.
     * Boilerplate penalty is ONLY applied here (not in Rewrite Risk).
     *
     * @param string $title   Title to analyze.
     * @param int    $post_id Post ID to exclude from duplicate search.
     * @return array{score: int, is_unique: bool, duplicates: array, is_template: bool, issues: array}
     */
    private static function analyze_uniqueness( string $title, int $post_id ): array {
        $duplicates = self::find_duplicates( $title, $post_id );
        $is_template = self::is_boilerplate_template( $title, $post_id );

        $score = 100;
        $issues = [];

        // Duplicate penalty: 100 - min(40, 10 × (N-1))
        if ( ! empty( $duplicates ) ) {
            $penalty = min( 40, 10 * count( $duplicates ) );
            $score -= $penalty;
            $issues[] = [
                'type'     => 'duplicate',
                'severity' => count( $duplicates ) > 2 ? 'high' : 'medium',
                'message'  => sprintf(
                    /* translators: %d: number of duplicate pages */
                    __( 'Title is duplicated on %d other page(s)', 'prorank-seo' ),
                    count( $duplicates )
                ),
                'data'     => [ 'count' => count( $duplicates ), 'pages' => array_slice( $duplicates, 0, 5 ) ],
            ];
        }

        // Boilerplate template penalty: -10
        if ( $is_template ) {
            $score -= 10;
            $issues[] = [
                'type'     => 'boilerplate',
                'severity' => 'low',
                'message'  => __( 'Title follows a boilerplate template pattern', 'prorank-seo' ),
                'data'     => [],
            ];
        }

        return [
            'score'       => max( 0, $score ),
            'is_unique'   => empty( $duplicates ) && ! $is_template,
            'duplicates'  => $duplicates,
            'is_template' => $is_template,
            'issues'      => $issues,
        ];
    }

    /**
     * 4. Stuffing Guardrail (0-100)
     *
     * Detects keyword stuffing patterns (Spam Policies).
     *
     * @param string $title Title to analyze.
     * @return array{score: int, is_stuffed: bool, issues: array}
     */
    private static function analyze_stuffing( string $title ): array {
        $score = 100;
        $issues = [];
        $tokens = self::tokenize( $title );
        $total = count( $tokens );

        if ( $total > 0 ) {
            // Repetition ratio check
            $counts = array_count_values( $tokens );
            $max_freq = max( $counts );
            $ratio = $max_freq / $total;

            if ( $ratio > 0.35 ) {
                $score -= 20;
                $repeated = array_search( $max_freq, $counts, true );
                $issues[] = [
                    'type'     => 'repetition',
                    'severity' => 'high',
                    'message'  => sprintf(
                        /* translators: 1: repeated word, 2: count, 3: ratio */
                        __( '"%1$s" repeated %2$d times (ratio: %3$s)', 'prorank-seo' ),
                        $repeated,
                        $max_freq,
                        number_format( $ratio, 2 )
                    ),
                    'data'     => [ 'word' => $repeated, 'count' => $max_freq, 'ratio' => round( $ratio, 2 ) ],
                ];
            } elseif ( $ratio > 0.25 ) {
                $score -= 10;
                $repeated = array_search( $max_freq, $counts, true );
                $issues[] = [
                    'type'     => 'repetition',
                    'severity' => 'medium',
                    'message'  => sprintf(
                        /* translators: 1: repeated word, 2: ratio */
                        __( '"%1$s" appears frequently (ratio: %2$s)', 'prorank-seo' ),
                        $repeated,
                        number_format( $ratio, 2 )
                    ),
                    'data'     => [ 'word' => $repeated, 'count' => $max_freq, 'ratio' => round( $ratio, 2 ) ],
                ];
            }
        }

        // Separator abuse check
        $sep_count = preg_match_all( '/[|–—:\-]/', $title );
        if ( $sep_count > 4 ) {
            $score -= 20;
            $issues[] = [
                'type'     => 'separators',
                'severity' => 'high',
                'message'  => sprintf(
                    /* translators: %d: separator count */
                    __( 'Too many separators (%d) - appears spammy', 'prorank-seo' ),
                    $sep_count
                ),
                'data'     => [ 'count' => $sep_count ],
            ];
        } elseif ( $sep_count > 2 ) {
            $score -= 10;
            $issues[] = [
                'type'     => 'separators',
                'severity' => 'medium',
                'message'  => sprintf(
                    /* translators: %d: separator count */
                    __( 'Multiple separators (%d) detected', 'prorank-seo' ),
                    $sep_count
                ),
                'data'     => [ 'count' => $sep_count ],
            ];
        }

        // Keyword stacking pattern (same word repeated with separators)
        if ( preg_match( '/(\b\w{3,}\b).*[|–—\-].*\1/iu', $title, $matches ) ) {
            $score -= 15;
            $issues[] = [
                'type'     => 'stacking',
                'severity' => 'medium',
                'message'  => __( 'Keyword repeated around separators (stacking pattern)', 'prorank-seo' ),
                'data'     => [ 'word' => $matches[1] ?? '' ],
            ];
        }

        return [
            'score'      => max( 0, $score ),
            'is_stuffed' => $score < 80,
            'issues'     => $issues,
        ];
    }

    /**
     * 5. Schema Headline Alignment (0-100, N/A if no Article schema)
     *
     * Ensures Article structured data headline matches title.
     *
     * @param string      $title           Title to analyze.
     * @param string|null $schema_headline Article schema headline.
     * @return array{score: int, applicable: bool, similarity: float|null, schema_headline: string|null, issues: array}
     */
    private static function analyze_schema( string $title, ?string $schema_headline ): array {
        if ( empty( $schema_headline ) ) {
            return [
                'score'           => 100,
                'applicable'      => false,
                'similarity'      => null,
                'schema_headline' => null,
                'message'         => __( 'No Article schema present', 'prorank-seo' ),
                'issues'          => [],
            ];
        }

        $similarity = self::jaccard_similarity( $title, $schema_headline );

        // Score: ≥0.8 → 100, <0.4 → 10, else linear
        if ( $similarity >= 0.8 ) {
            $score = 100;
        } elseif ( $similarity < 0.4 ) {
            $score = 10;
        } else {
            // Linear interpolation: 0.4 → 10, 0.8 → 100
            $score = (int) round( 10 + ( ( $similarity - 0.4 ) / 0.4 ) * 90 );
        }

        $issues = [];
        if ( $similarity < 0.4 ) {
            $issues[] = [
                'type'     => 'schema_mismatch',
                'severity' => 'high',
                'message'  => __( 'Schema headline differs significantly from SEO title', 'prorank-seo' ),
                'data'     => [ 'similarity' => round( $similarity, 2 ), 'schema_headline' => $schema_headline ],
            ];
        } elseif ( $similarity < 0.8 ) {
            $issues[] = [
                'type'     => 'schema_mismatch',
                'severity' => 'low',
                'message'  => __( 'Schema headline could be more aligned with SEO title', 'prorank-seo' ),
                'data'     => [ 'similarity' => round( $similarity, 2 ), 'schema_headline' => $schema_headline ],
            ];
        }

        return [
            'score'           => $score,
            'applicable'      => true,
            'similarity'      => round( $similarity, 2 ),
            'schema_headline' => $schema_headline,
            'issues'          => $issues,
        ];
    }

    /**
     * Calculate overall score with weights
     *
     * @param int  $rewrite          Rewrite risk score.
     * @param int  $match            Content match score.
     * @param int  $unique           Uniqueness score.
     * @param int  $stuff            Stuffing score.
     * @param int  $schema           Schema score.
     * @param bool $schema_applicable Whether schema check applies.
     * @return int Overall score 0-100.
     */
    private static function calculate_overall_score(
        int $rewrite,
        int $match,
        int $unique,
        int $stuff,
        int $schema,
        bool $schema_applicable
    ): int {
        if ( $schema_applicable ) {
            return (int) round(
                self::WEIGHT_REWRITE * $rewrite +
                self::WEIGHT_CONTENT * $match +
                self::WEIGHT_UNIQUE * $unique +
                self::WEIGHT_STUFFING * $stuff +
                self::WEIGHT_SCHEMA * $schema
            );
        }

        // Redistribute schema weight proportionally
        $total_weight = self::WEIGHT_REWRITE + self::WEIGHT_CONTENT + self::WEIGHT_UNIQUE + self::WEIGHT_STUFFING;
        return (int) round(
            ( self::WEIGHT_REWRITE / $total_weight ) * $rewrite +
            ( self::WEIGHT_CONTENT / $total_weight ) * $match +
            ( self::WEIGHT_UNIQUE / $total_weight ) * $unique +
            ( self::WEIGHT_STUFFING / $total_weight ) * $stuff
        );
    }

    /**
     * Calculate Jaccard similarity between two texts
     *
     * @param string $a First text.
     * @param string $b Second text.
     * @return float Similarity 0-1.
     */
    private static function jaccard_similarity( string $a, string $b ): float {
        $set_a = self::tokenize( $a );
        $set_b = self::tokenize( $b );

        if ( empty( $set_a ) && empty( $set_b ) ) {
            return 1.0;
        }

        $intersection = count( array_intersect( $set_a, $set_b ) );
        $union = count( array_unique( array_merge( $set_a, $set_b ) ) );

        return $union > 0 ? $intersection / $union : 0.0;
    }

    /**
     * Tokenize text and remove stopwords
     *
     * @param string $text Input text.
     * @return array<string> Tokens.
     */
    private static function tokenize( string $text ): array {
        $text = mb_strtolower( $text );
        $text = preg_replace( '/[^\p{L}\p{N}\s]/u', '', $text );
        $words = preg_split( '/\s+/', trim( (string) $text ), -1, PREG_SPLIT_NO_EMPTY );

        if ( ! is_array( $words ) ) {
            return [];
        }

        return array_values( array_diff( $words, self::STOPWORDS ) );
    }

    /**
     * Calculate pixel width using per-character weights
     *
     * @param string $text Input text.
     * @return int Estimated pixel width.
     */
    private static function calculate_pixel_width( string $text ): int {
        $width = 0.0;
        $default_width = 8.0; // Default for unknown chars

        $chars = preg_split( '//u', $text, -1, PREG_SPLIT_NO_EMPTY );
        if ( ! is_array( $chars ) ) {
            return (int) ( mb_strlen( $text ) * $default_width );
        }

        foreach ( $chars as $char ) {
            $width += self::CHAR_WIDTHS[ $char ] ?? $default_width;
        }

        return (int) round( $width );
    }

    /**
     * Check if text contains non-Latin scripts
     *
     * @param string $text Input text.
     * @return bool True if non-Latin script detected.
     */
    private static function is_non_latin( string $text ): bool {
        // Check for common non-Latin Unicode ranges
        // CJK, Cyrillic, Arabic, Hebrew, Thai, etc.
        return (bool) preg_match( '/[\x{0400}-\x{04FF}]|[\x{0600}-\x{06FF}]|[\x{0590}-\x{05FF}]|[\x{4E00}-\x{9FFF}]|[\x{3040}-\x{309F}]|[\x{30A0}-\x{30FF}]|[\x{AC00}-\x{D7AF}]|[\x{0E00}-\x{0E7F}]/u', $text );
    }

    /**
     * Get H1 heading from post content
     *
     * @param int $post_id Post ID.
     * @return string|null H1 text or null.
     */
    private static function get_h1( int $post_id ): ?string {
        if ( $post_id <= 0 ) {
            return null;
        }

        $post = get_post( $post_id );
        if ( ! $post ) {
            return null;
        }

        $content = $post->post_content;
        if ( preg_match( '/<h1[^>]*>(.*?)<\/h1>/is', $content, $matches ) ) {
            return wp_strip_all_tags( $matches[1] );
        }

        // Fallback: check for Gutenberg heading block
        if ( preg_match( '/<!-- wp:heading {"level":1} -->\s*<h1[^>]*>(.*?)<\/h1>/is', $content, $matches ) ) {
            return wp_strip_all_tags( $matches[1] );
        }

        return null;
    }

    /**
     * Get intro text (first N words) from post content
     *
     * @param int $post_id   Post ID.
     * @param int $word_limit Word limit.
     * @return string Intro text.
     */
    private static function get_intro( int $post_id, int $word_limit = 200 ): string {
        if ( $post_id <= 0 ) {
            return '';
        }

        $post = get_post( $post_id );
        if ( ! $post ) {
            return '';
        }

        $content = wp_strip_all_tags( $post->post_content );
        $content = preg_replace( '/\s+/', ' ', $content );
        $words = explode( ' ', trim( (string) $content ) );

        return implode( ' ', array_slice( $words, 0, $word_limit ) );
    }

    /**
     * Extract top entities/keywords from post content
     *
     * @param int $post_id Post ID.
     * @return array<string> Top entities.
     */
    private static function extract_entities( int $post_id ): array {
        if ( $post_id <= 0 ) {
            return [];
        }

        $post = get_post( $post_id );
        if ( ! $post ) {
            return [];
        }

        $content = wp_strip_all_tags( $post->post_content );
        $tokens = self::tokenize( $content );

        if ( empty( $tokens ) ) {
            return [];
        }

        // Simple term frequency
        $counts = array_count_values( $tokens );
        arsort( $counts );

        // Filter out very short words and return top 10
        $entities = [];
        foreach ( $counts as $word => $count ) {
            if ( mb_strlen( $word ) >= 3 && $count >= 2 ) {
                $entities[] = $word;
                if ( count( $entities ) >= 10 ) {
                    break;
                }
            }
        }

        return $entities;
    }

    /**
     * Get Article schema headline if present
     *
     * @param int $post_id Post ID.
     * @return string|null Schema headline or null.
     */
    private static function get_schema_headline( int $post_id ): ?string {
        if ( $post_id <= 0 ) {
            return null;
        }

        // Check for ProRank schema settings
        $schema_headline = get_post_meta( $post_id, '_prorank_schema_headline', true );
        if ( ! empty( $schema_headline ) ) {
            return $schema_headline;
        }

        // Check for other common schema meta keys
        $alternate_keys = [
            '_schema_headline',
            '_yoast_wpseo_schema_article_headline',
            'rank_math_schema_article_headline',
        ];

        foreach ( $alternate_keys as $key ) {
            $value = get_post_meta( $post_id, $key, true );
            if ( ! empty( $value ) ) {
                return $value;
            }
        }

        return null;
    }

    /**
     * Count duplicate titles on site
     *
     * @param string $title      Title to check.
     * @param int    $exclude_id Post ID to exclude.
     * @return int Number of duplicates.
     */
    private static function count_duplicates( string $title, int $exclude_id ): int {
        return count( self::find_duplicates( $title, $exclude_id ) );
    }

    /**
     * Find posts with duplicate titles
     *
     * @param string $title      Title to check.
     * @param int    $exclude_id Post ID to exclude.
     * @return array<array{id: int, title: string, url: string}> Duplicate posts.
     */
    private static function find_duplicates( string $title, int $exclude_id ): array {
        global $wpdb;

        $normalized_title = mb_strtolower( trim( $title ) );

        if ( empty( $normalized_title ) ) {
            return [];
        }

        // Check SEO titles first
        // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
        $seo_duplicates = $wpdb->get_results(
            $wpdb->prepare(
                "SELECT p.ID, p.post_title, pm.meta_value as seo_title
                FROM {$wpdb->posts} p
                INNER JOIN {$wpdb->postmeta} pm ON p.ID = pm.post_id
                WHERE pm.meta_key = '_prorank_title'
                AND LOWER(pm.meta_value) = %s
                AND p.ID != %d
                AND p.post_status = 'publish'
                LIMIT 10",
                $normalized_title,
                $exclude_id
            ),
            ARRAY_A
        );

        // Also check regular post titles
        // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
        $title_duplicates = $wpdb->get_results(
            $wpdb->prepare(
                "SELECT ID, post_title
                FROM {$wpdb->posts}
                WHERE LOWER(post_title) = %s
                AND ID != %d
                AND post_status = 'publish'
                AND post_type IN ('post', 'page', 'product')
                LIMIT 10",
                $normalized_title,
                $exclude_id
            ),
            ARRAY_A
        );

        $duplicates = [];
        $seen_ids = [];

        foreach ( $seo_duplicates as $row ) {
            if ( ! isset( $seen_ids[ $row['ID'] ] ) ) {
                $duplicates[] = [
                    'id'    => (int) $row['ID'],
                    'title' => $row['seo_title'],
                    'url'   => get_permalink( $row['ID'] ),
                ];
                $seen_ids[ $row['ID'] ] = true;
            }
        }

        foreach ( $title_duplicates as $row ) {
            if ( ! isset( $seen_ids[ $row['ID'] ] ) ) {
                $duplicates[] = [
                    'id'    => (int) $row['ID'],
                    'title' => $row['post_title'],
                    'url'   => get_permalink( $row['ID'] ),
                ];
                $seen_ids[ $row['ID'] ] = true;
            }
        }

        return $duplicates;
    }

    /**
     * Check if title follows a boilerplate template pattern
     *
     * @param string $title   Title to check.
     * @param int    $post_id Post ID.
     * @return bool True if boilerplate pattern detected.
     */
    private static function is_boilerplate_template( string $title, int $post_id ): bool {
        global $wpdb;

        // Get site name to check for common patterns
        $site_name = get_bloginfo( 'name' );
        $site_name_lower = mb_strtolower( $site_name );

        // Common boilerplate patterns
        $patterns = [
            // Site name suffix patterns
            '/ - ' . preg_quote( $site_name, '/' ) . '$/i',
            '/ \| ' . preg_quote( $site_name, '/' ) . '$/i',
            // Generic patterns
            '/^(Page|Post|Article) \d+/i',
            '/^Untitled/i',
            // Category/tag prefixes
            '/^(Category|Tag|Archive):/i',
        ];

        foreach ( $patterns as $pattern ) {
            if ( preg_match( $pattern, $title ) ) {
                return true;
            }
        }

        // Check if >30% of posts share the same prefix/suffix (>12 chars)
        $title_lower = mb_strtolower( $title );
        $title_len = mb_strlen( $title_lower );

        if ( $title_len < 20 ) {
            return false;
        }

        // Check prefix (first 15 chars)
        $prefix = mb_substr( $title_lower, 0, 15 );

        // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
        $prefix_count = $wpdb->get_var(
            $wpdb->prepare(
                "SELECT COUNT(*) FROM {$wpdb->posts}
                WHERE LOWER(post_title) LIKE %s
                AND post_status = 'publish'
                AND post_type IN ('post', 'page')
                AND ID != %d",
                $wpdb->esc_like( $prefix ) . '%',
                $post_id
            )
        );

        // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
        $total_posts = $wpdb->get_var(
            "SELECT COUNT(*) FROM {$wpdb->posts}
            WHERE post_status = 'publish'
            AND post_type IN ('post', 'page')"
        );

        if ( $total_posts > 10 && ( $prefix_count / $total_posts ) > 0.3 ) {
            return true;
        }

        return false;
    }

    /**
     * Compile all issues from checks
     *
     * @param array $rewrite Rewrite risk results.
     * @param array $match   Content match results.
     * @param array $unique  Uniqueness results.
     * @param array $stuff   Stuffing results.
     * @param array $schema  Schema results.
     * @return array All issues sorted by severity.
     */
    private static function compile_issues( array $rewrite, array $match, array $unique, array $stuff, array $schema ): array {
        $all_issues = array_merge(
            $rewrite['issues'] ?? [],
            $match['issues'] ?? [],
            $unique['issues'] ?? [],
            $stuff['issues'] ?? [],
            $schema['issues'] ?? []
        );

        // Sort by severity: high, medium, low
        $severity_order = [ 'high' => 0, 'medium' => 1, 'low' => 2 ];
        usort( $all_issues, function ( $a, $b ) use ( $severity_order ) {
            $a_order = $severity_order[ $a['severity'] ?? 'low' ] ?? 2;
            $b_order = $severity_order[ $b['severity'] ?? 'low' ] ?? 2;
            return $a_order - $b_order;
        } );

        return $all_issues;
    }

    /**
     * Generate actionable suggestions based on issues
     *
     * @param array       $issues All detected issues.
     * @param string      $title  Current title.
     * @param string|null $h1     H1 heading.
     * @return array<array{type: string, message: string, priority: string}> Suggestions.
     */
    private static function generate_suggestions( array $issues, string $title, ?string $h1 ): array {
        $suggestions = [];

        foreach ( $issues as $issue ) {
            switch ( $issue['type'] ) {
                case 'h1_mismatch':
                    if ( $h1 ) {
                        $suggestions[] = [
                            'type'     => 'align_h1',
                            'message'  => sprintf(
                                /* translators: %s: H1 heading text */
                                __( 'Consider aligning title with your H1: "%s"', 'prorank-seo' ),
                                mb_substr( $h1, 0, 60 ) . ( mb_strlen( $h1 ) > 60 ? '...' : '' )
                            ),
                            'priority' => 'high',
                        ];
                    }
                    break;

                case 'too_short':
                    $suggestions[] = [
                        'type'     => 'extend_title',
                        'message'  => __( 'Add more descriptive words to make the title more specific', 'prorank-seo' ),
                        'priority' => 'medium',
                    ];
                    break;

                case 'very_long':
                case 'long':
                    $suggestions[] = [
                        'type'     => 'shorten_title',
                        'message'  => __( 'Shorten title to under 60 characters to avoid truncation', 'prorank-seo' ),
                        'priority' => $issue['type'] === 'very_long' ? 'high' : 'medium',
                    ];
                    break;

                case 'content_mismatch':
                    $suggestions[] = [
                        'type'     => 'align_content',
                        'message'  => __( 'Update title to better reflect the main topics of your content', 'prorank-seo' ),
                        'priority' => 'medium',
                    ];
                    break;

                case 'duplicate':
                    $suggestions[] = [
                        'type'     => 'make_unique',
                        'message'  => __( 'Make this title unique by adding specific details', 'prorank-seo' ),
                        'priority' => 'high',
                    ];
                    break;

                case 'boilerplate':
                    $suggestions[] = [
                        'type'     => 'remove_template',
                        'message'  => __( 'Remove templated prefix/suffix to create a unique title', 'prorank-seo' ),
                        'priority' => 'low',
                    ];
                    break;

                case 'repetition':
                case 'stacking':
                    $suggestions[] = [
                        'type'     => 'reduce_repetition',
                        'message'  => __( 'Use synonyms or remove repeated keywords', 'prorank-seo' ),
                        'priority' => 'high',
                    ];
                    break;

                case 'separators':
                    $suggestions[] = [
                        'type'     => 'reduce_separators',
                        'message'  => __( 'Use fewer separators (|, -, :) for a cleaner title', 'prorank-seo' ),
                        'priority' => 'medium',
                    ];
                    break;

                case 'schema_mismatch':
                    $suggestions[] = [
                        'type'     => 'align_schema',
                        'message'  => __( 'Update Article schema headline to match your SEO title', 'prorank-seo' ),
                        'priority' => $issue['severity'] === 'high' ? 'high' : 'low',
                    ];
                    break;
            }
        }

        // Remove duplicate suggestions
        $unique_suggestions = [];
        $seen_types = [];
        foreach ( $suggestions as $suggestion ) {
            if ( ! isset( $seen_types[ $suggestion['type'] ] ) ) {
                $unique_suggestions[] = $suggestion;
                $seen_types[ $suggestion['type'] ] = true;
            }
        }

        return $unique_suggestions;
    }
}
