<?php
/**
 * Internal linking suggestions using OpenAI.
 *
 * Analyzes post content and suggests relevant internal links from existing
 * site content. Pro feature only.
 *
 * @package TopRanker_AI
 * @since   1.0.0
 */

// Exit if accessed directly.
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

/**
 * TopRanker Internal Links class.
 *
 * @since 1.0.0
 */
class TopRanker_Internal_Links {

	/**
	 * Maximum candidate posts to send to AI.
	 *
	 * @since 1.0.0
	 * @var   int
	 */
	const MAX_CANDIDATES = 50;

	/**
	 * Maximum links to suggest.
	 *
	 * @since 1.0.0
	 * @var   int
	 */
	const MAX_SUGGESTIONS = 5;

	/**
	 * Minimum links to suggest.
	 *
	 * @since 1.0.0
	 * @var   int
	 */
	const MIN_SUGGESTIONS = 3;

	/**
	 * Maximum excerpt length for candidates.
	 *
	 * @since 1.0.0
	 * @var   int
	 */
	const EXCERPT_LENGTH = 100;

	/**
	 * API instance.
	 *
	 * @since 1.0.0
	 * @var   TopRanker_API|null
	 */
	private $api = null;

	/**
	 * Optimizer instance.
	 *
	 * @since 1.0.0
	 * @var   TopRanker_Optimizer|null
	 */
	private $optimizer = null;

	/**
	 * Constructor.
	 *
	 * @since 1.0.0
	 */
	public function __construct() {
		// Classes will be initialized lazily when needed.
	}

	/**
	 * Get the API instance.
	 *
	 * @since  1.0.0
	 * @return TopRanker_API
	 */
	private function get_api() {
		if ( null === $this->api ) {
			$this->api = new TopRanker_API();
		}
		return $this->api;
	}

	/**
	 * Get the optimizer instance.
	 *
	 * @since  1.0.0
	 * @return TopRanker_Optimizer
	 */
	private function get_optimizer() {
		if ( null === $this->optimizer ) {
			$this->optimizer = new TopRanker_Optimizer();
		}
		return $this->optimizer;
	}

	/**
	 * Generate internal linking suggestions for a post.
	 *
	 * @since 1.0.0
	 * @param int|WP_Post $post           Post ID or WP_Post object.
	 * @param string      $focus_keyphrase Optional. Focus keyphrase for relevance.
	 * @return array|WP_Error Array of link suggestions or WP_Error on failure.
	 */
	public function generate_suggestions( $post, $focus_keyphrase = '' ) {
		$post = get_post( $post );

		if ( ! $post ) {
			return new WP_Error(
				'invalid_post',
				__( 'Invalid post.', 'topranker-ai' )
			);
		}

		// Get prepared content.
		$optimizer       = $this->get_optimizer();
		$content         = $optimizer->prepare_content( $post, 4000 );
		$context_prefix  = $optimizer->build_context_prefix( $post );

		if ( empty( $content ) && empty( $post->post_title ) ) {
			return new WP_Error(
				'no_content',
				__( 'Post has no content to analyze.', 'topranker-ai' )
			);
		}

		// Get candidate posts for linking.
		$candidates = $this->get_candidate_posts( $post );

		if ( empty( $candidates ) ) {
			return new WP_Error(
				'no_candidates',
				__( 'No candidate posts found for internal linking. Your site needs more published content.', 'topranker-ai' )
			);
		}

		// Build the prompt.
		$prompt = $this->build_prompt( $post, $content, $candidates, $focus_keyphrase );

		// Call the API.
		$api      = $this->get_api();
		$messages = array(
			array(
				'role'    => 'system',
				'content' => $context_prefix,
			),
			array(
				'role'    => 'user',
				'content' => $prompt,
			),
		);

		$response = $api->chat_completion( $messages );

		if ( is_wp_error( $response ) ) {
			return $response;
		}

		// Parse the response.
		$suggestions = $this->parse_response( $response['content'], $candidates );

		if ( is_wp_error( $suggestions ) ) {
			// Retry once.
			$response = $api->chat_completion( $messages );

			if ( is_wp_error( $response ) ) {
				return $response;
			}

			$suggestions = $this->parse_response( $response['content'], $candidates );

			if ( is_wp_error( $suggestions ) ) {
				return $suggestions;
			}
		}

		// Cache the suggestions.
		$this->cache_suggestions( $post->ID, $suggestions );

		return array(
			'post_id'     => $post->ID,
			'suggestions' => $suggestions,
			'count'       => count( $suggestions ),
			'candidates'  => count( $candidates ),
		);
	}

	/**
	 * Get candidate posts for internal linking.
	 *
	 * Pre-filters posts to send to AI based on relevance.
	 *
	 * @since 1.0.0
	 * @param WP_Post $post Current post being optimized.
	 * @return array Array of candidate posts with title, URL, and excerpt.
	 */
	private function get_candidate_posts( $post ) {
		$candidates    = array();
		$excluded_ids  = array( $post->ID );
		$post_type     = $post->post_type;

		// Get enabled post types.
		$enabled_types = get_option( 'topranker_post_types', array( 'post', 'page' ) );
		if ( ! is_array( $enabled_types ) ) {
			$enabled_types = array( 'post', 'page' );
		}

		// Step 1: Get posts in the same categories.
		$categories = wp_get_post_categories( $post->ID, array( 'fields' => 'ids' ) );

		if ( ! empty( $categories ) && ! is_wp_error( $categories ) ) {
			$category_posts = get_posts(
				array(
					'post_type'      => $enabled_types,
					'post_status'    => 'publish',
					'posts_per_page' => 30,
					'category__in'   => $categories,
					'post__not_in'   => $excluded_ids,
					'orderby'        => 'relevance',
					'fields'         => 'ids',
				)
			);

			if ( ! empty( $category_posts ) ) {
				$excluded_ids = array_merge( $excluded_ids, $category_posts );
				$candidates   = array_merge( $candidates, $this->format_candidates( $category_posts ) );
			}
		}

		// Step 2: Get posts with the same tags.
		$tags = wp_get_post_tags( $post->ID, array( 'fields' => 'ids' ) );

		if ( ! empty( $tags ) && ! is_wp_error( $tags ) && count( $candidates ) < self::MAX_CANDIDATES ) {
			$remaining  = self::MAX_CANDIDATES - count( $candidates );
			$tag_posts  = get_posts(
				array(
					'post_type'      => $enabled_types,
					'post_status'    => 'publish',
					'posts_per_page' => min( 20, $remaining ),
					'tag__in'        => $tags,
					'post__not_in'   => $excluded_ids,
					'orderby'        => 'date',
					'order'          => 'DESC',
					'fields'         => 'ids',
				)
			);

			if ( ! empty( $tag_posts ) ) {
				$excluded_ids = array_merge( $excluded_ids, $tag_posts );
				$candidates   = array_merge( $candidates, $this->format_candidates( $tag_posts ) );
			}
		}

		// Step 3: Fill remaining slots with recent posts from same post type.
		if ( count( $candidates ) < self::MAX_CANDIDATES ) {
			$remaining    = self::MAX_CANDIDATES - count( $candidates );
			$recent_posts = get_posts(
				array(
					'post_type'      => $post_type,
					'post_status'    => 'publish',
					'posts_per_page' => $remaining,
					'post__not_in'   => $excluded_ids,
					'orderby'        => 'date',
					'order'          => 'DESC',
					'fields'         => 'ids',
				)
			);

			if ( ! empty( $recent_posts ) ) {
				$excluded_ids = array_merge( $excluded_ids, $recent_posts );
				$candidates   = array_merge( $candidates, $this->format_candidates( $recent_posts ) );
			}
		}

		// Step 4: Fill any remaining slots with other enabled post types.
		if ( count( $candidates ) < self::MAX_CANDIDATES ) {
			$remaining     = self::MAX_CANDIDATES - count( $candidates );
			$other_types   = array_diff( $enabled_types, array( $post_type ) );

			if ( ! empty( $other_types ) ) {
				$other_posts = get_posts(
					array(
						'post_type'      => $other_types,
						'post_status'    => 'publish',
						'posts_per_page' => $remaining,
						'post__not_in'   => $excluded_ids,
						'orderby'        => 'date',
						'order'          => 'DESC',
						'fields'         => 'ids',
					)
				);

				if ( ! empty( $other_posts ) ) {
					$candidates = array_merge( $candidates, $this->format_candidates( $other_posts ) );
				}
			}
		}

		return array_slice( $candidates, 0, self::MAX_CANDIDATES );
	}

	/**
	 * Format post IDs into candidate array.
	 *
	 * @since 1.0.0
	 * @param array $post_ids Array of post IDs.
	 * @return array Array of formatted candidates.
	 */
	private function format_candidates( $post_ids ) {
		$candidates = array();

		foreach ( $post_ids as $post_id ) {
			$candidate_post = get_post( $post_id );

			if ( ! $candidate_post ) {
				continue;
			}

			// Get excerpt or generate from content.
			$excerpt = $candidate_post->post_excerpt;

			if ( empty( $excerpt ) ) {
				$excerpt = wp_strip_all_tags( $candidate_post->post_content );
				$excerpt = wp_trim_words( $excerpt, 15, '...' );
			}

			// Truncate to max length.
			if ( mb_strlen( $excerpt ) > self::EXCERPT_LENGTH ) {
				$excerpt = mb_substr( $excerpt, 0, self::EXCERPT_LENGTH ) . '...';
			}

			$candidates[] = array(
				'id'      => $post_id,
				'title'   => $candidate_post->post_title,
				'url'     => get_permalink( $post_id ),
				'excerpt' => $excerpt,
			);
		}

		return $candidates;
	}

	/**
	 * Build the AI prompt for internal linking suggestions.
	 *
	 * @since 1.0.0
	 * @param WP_Post $post            Current post.
	 * @param string  $content         Prepared post content.
	 * @param array   $candidates      Array of candidate posts.
	 * @param string  $focus_keyphrase Focus keyphrase if available.
	 * @return string The formatted prompt.
	 */
	private function build_prompt( $post, $content, $candidates, $focus_keyphrase ) {
		$keyphrase_instruction = '';
		if ( ! empty( $focus_keyphrase ) ) {
			$keyphrase_instruction = sprintf(
				/* translators: %s: focus keyphrase */
				__( 'The post\'s focus keyphrase is: "%s"', 'topranker-ai' ),
				$focus_keyphrase
			);
		}

		// Format candidates for the prompt.
		$candidates_text = '';
		foreach ( $candidates as $index => $candidate ) {
			$candidates_text .= sprintf(
				"%d. %s\n   URL: %s\n   %s\n\n",
				$index + 1,
				$candidate['title'],
				$candidate['url'],
				$candidate['excerpt']
			);
		}

		$prompt = sprintf(
			/* translators: 1: Min suggestions, 2: Max suggestions, 3: Post title, 4: Post content, 5: Keyphrase instruction, 6: Candidates list */
			__(
				'Analyze this article and suggest %1$d-%2$d internal links to other pages on the same website.

CURRENT ARTICLE:
Title: %3$s
Content:
%4$s

%5$s

AVAILABLE PAGES TO LINK TO:
%6$s

REQUIREMENTS:
1. Suggest %1$d-%2$d internal links that are genuinely relevant and add value
2. For each link, identify a specific paragraph or sentence where it should be inserted
3. Provide natural anchor text (2-6 words) that fits the context
4. Only suggest links that make sense contextually - relevance is key
5. Prefer linking to related topics, supporting information, or deeper dives
6. Do NOT suggest linking to unrelated content just to fill the quota
7. The suggested paragraph snippet should be from the current article content

Respond with valid JSON only, in this exact format:
{
  "suggestions": [
    {
      "target_url": "URL of the page to link to",
      "target_title": "Title of the target page",
      "anchor_text": "The text to use as the link anchor",
      "paragraph_snippet": "A brief excerpt from the current article showing where to insert the link (max 100 chars)",
      "reason": "Brief explanation of why this link is relevant (max 50 chars)"
    }
  ]
}',
				'topranker-ai'
			),
			self::MIN_SUGGESTIONS,
			self::MAX_SUGGESTIONS,
			$post->post_title,
			$content,
			$keyphrase_instruction,
			$candidates_text
		);

		return $prompt;
	}

	/**
	 * Parse the AI response.
	 *
	 * @since 1.0.0
	 * @param string $content    Raw response content.
	 * @param array  $candidates Original candidates for URL validation.
	 * @return array|WP_Error Parsed suggestions or WP_Error.
	 */
	private function parse_response( $content, $candidates ) {
		$api    = $this->get_api();
		$parsed = $api->parse_json_response( $content );

		if ( is_wp_error( $parsed ) ) {
			return $parsed;
		}

		if ( ! isset( $parsed['suggestions'] ) || ! is_array( $parsed['suggestions'] ) ) {
			return new WP_Error(
				'invalid_response',
				__( 'Invalid response format from AI.', 'topranker-ai' )
			);
		}

		// Build URL lookup from candidates.
		$valid_urls = array();
		foreach ( $candidates as $candidate ) {
			$valid_urls[ $candidate['url'] ] = $candidate;
		}

		// Validate and sanitize suggestions.
		$suggestions = array();

		foreach ( $parsed['suggestions'] as $suggestion ) {
			if ( ! $this->validate_suggestion( $suggestion ) ) {
				continue;
			}

			// Verify the URL is from our candidates.
			$target_url = esc_url( $suggestion['target_url'] );

			if ( ! isset( $valid_urls[ $target_url ] ) ) {
				// Try to find a matching URL.
				$matched = false;
				foreach ( $valid_urls as $url => $candidate ) {
					if ( strpos( $target_url, $url ) !== false || strpos( $url, $target_url ) !== false ) {
						$target_url = $url;
						$matched    = true;
						break;
					}
				}

				if ( ! $matched ) {
					continue; // Skip invalid URLs.
				}
			}

			$suggestions[] = array(
				'target_url'        => $target_url,
				'target_title'      => sanitize_text_field( $suggestion['target_title'] ),
				'anchor_text'       => sanitize_text_field( $suggestion['anchor_text'] ),
				'paragraph_snippet' => sanitize_text_field( $suggestion['paragraph_snippet'] ),
				'reason'            => sanitize_text_field( $suggestion['reason'] ),
				'target_id'         => isset( $valid_urls[ $target_url ]['id'] ) ? $valid_urls[ $target_url ]['id'] : 0,
			);
		}

		if ( empty( $suggestions ) ) {
			return new WP_Error(
				'no_valid_suggestions',
				__( 'No valid internal link suggestions could be generated.', 'topranker-ai' )
			);
		}

		return $suggestions;
	}

	/**
	 * Validate a single suggestion structure.
	 *
	 * @since 1.0.0
	 * @param array $suggestion Suggestion array.
	 * @return bool True if valid.
	 */
	private function validate_suggestion( $suggestion ) {
		$required = array( 'target_url', 'target_title', 'anchor_text', 'paragraph_snippet', 'reason' );

		foreach ( $required as $field ) {
			if ( ! isset( $suggestion[ $field ] ) || ! is_string( $suggestion[ $field ] ) || empty( trim( $suggestion[ $field ] ) ) ) {
				return false;
			}
		}

		// Validate anchor text length (2-6 words typically).
		$word_count = str_word_count( $suggestion['anchor_text'] );
		if ( $word_count < 1 || $word_count > 10 ) {
			return false;
		}

		return true;
	}

	/**
	 * Cache suggestions in post meta.
	 *
	 * @since 1.0.0
	 * @param int   $post_id     Post ID.
	 * @param array $suggestions Array of suggestions.
	 * @return bool True on success.
	 */
	private function cache_suggestions( $post_id, $suggestions ) {
		$cache_data = array(
			'suggestions' => $suggestions,
			'timestamp'   => time(),
		);

		return (bool) update_post_meta( $post_id, '_topranker_internal_links_cache', $cache_data );
	}

	/**
	 * Get cached suggestions.
	 *
	 * @since 1.0.0
	 * @param int $post_id Post ID.
	 * @return array|null Cached suggestions or null.
	 */
	public function get_cached_suggestions( $post_id ) {
		$cached = get_post_meta( $post_id, '_topranker_internal_links_cache', true );

		if ( ! is_array( $cached ) || empty( $cached['suggestions'] ) ) {
			return null;
		}

		return $cached;
	}

	/**
	 * Clear cached suggestions.
	 *
	 * @since 1.0.0
	 * @param int $post_id Post ID.
	 * @return bool True on success.
	 */
	public function clear_cache( $post_id ) {
		return delete_post_meta( $post_id, '_topranker_internal_links_cache' );
	}

	/**
	 * Count internal links in post content.
	 *
	 * @since 1.0.0
	 * @param int|WP_Post $post Post ID or WP_Post object.
	 * @return int Number of internal links found.
	 */
	public function count_internal_links( $post ) {
		$post = get_post( $post );

		if ( ! $post ) {
			return 0;
		}

		$content  = $post->post_content;
		$site_url = home_url();

		// Find all links.
		if ( ! preg_match_all( '/<a[^>]+href=["\']([^"\']+)["\'][^>]*>/i', $content, $matches ) ) {
			return 0;
		}

		$internal_count = 0;

		foreach ( $matches[1] as $url ) {
			// Check if it's an internal link.
			if ( strpos( $url, $site_url ) === 0 || strpos( $url, '/' ) === 0 ) {
				// Skip anchor links.
				if ( strpos( $url, '#' ) === 0 ) {
					continue;
				}
				++$internal_count;
			}
		}

		return $internal_count;
	}

	/**
	 * Check if post has enough internal links.
	 *
	 * @since 1.0.0
	 * @param int|WP_Post $post      Post ID or WP_Post object.
	 * @param int         $min_links Minimum links required. Default 2.
	 * @return bool True if post has enough internal links.
	 */
	public function has_enough_internal_links( $post, $min_links = 2 ) {
		return $this->count_internal_links( $post ) >= $min_links;
	}

	/**
	 * Get posts linking to a specific post (backlinks).
	 *
	 * @since 1.0.0
	 * @param int $post_id Post ID to find backlinks for.
	 * @return array Array of posts linking to this post.
	 */
	public function get_backlinks( $post_id ) {
		global $wpdb;

		$permalink = get_permalink( $post_id );

		if ( ! $permalink ) {
			return array();
		}

		// Search for posts containing this URL.
		// phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery,WordPress.DB.DirectDatabaseQuery.NoCaching
		$results = $wpdb->get_col(
			$wpdb->prepare(
				"SELECT ID FROM {$wpdb->posts}
				WHERE post_status = 'publish'
				AND post_content LIKE %s
				AND ID != %d",
				'%' . $wpdb->esc_like( $permalink ) . '%',
				$post_id
			)
		);

		if ( empty( $results ) ) {
			return array();
		}

		$backlinks = array();

		foreach ( $results as $linking_post_id ) {
			$linking_post = get_post( $linking_post_id );

			if ( $linking_post ) {
				$backlinks[] = array(
					'id'    => $linking_post_id,
					'title' => $linking_post->post_title,
					'url'   => get_permalink( $linking_post_id ),
				);
			}
		}

		return $backlinks;
	}

	/**
	 * Get orphaned posts (no incoming internal links).
	 *
	 * @since 1.0.0
	 * @param string $post_type Post type to check. Default 'post'.
	 * @param int    $limit     Maximum posts to return. Default 10.
	 * @return array Array of orphaned posts.
	 */
	public function get_orphaned_posts( $post_type = 'post', $limit = 10 ) {
		$posts = get_posts(
			array(
				'post_type'      => $post_type,
				'post_status'    => 'publish',
				'posts_per_page' => 100, // Check more than limit.
				'orderby'        => 'date',
				'order'          => 'DESC',
			)
		);

		$orphaned = array();

		foreach ( $posts as $post ) {
			$backlinks = $this->get_backlinks( $post->ID );

			if ( empty( $backlinks ) ) {
				$orphaned[] = array(
					'id'    => $post->ID,
					'title' => $post->post_title,
					'url'   => get_permalink( $post->ID ),
				);

				if ( count( $orphaned ) >= $limit ) {
					break;
				}
			}
		}

		return $orphaned;
	}

	/**
	 * Apply a single link suggestion to post content.
	 *
	 * Note: This is a helper method. Actual insertion should be handled
	 * by the editor UI, as automatic insertion can be error-prone.
	 *
	 * @since 1.0.0
	 * @param int    $post_id    Post ID.
	 * @param string $anchor     Anchor text to find and convert to link.
	 * @param string $target_url URL to link to.
	 * @return bool|WP_Error True on success, false if anchor not found, WP_Error on failure.
	 */
	public function apply_link_to_content( $post_id, $anchor, $target_url ) {
		$post = get_post( $post_id );

		if ( ! $post ) {
			return new WP_Error(
				'invalid_post',
				__( 'Invalid post.', 'topranker-ai' )
			);
		}

		$content = $post->post_content;

		// Check if anchor text exists in content (not already linked).
		if ( strpos( $content, $anchor ) === false ) {
			return false;
		}

		// Check if anchor is already linked.
		if ( preg_match( '/<a[^>]*>' . preg_quote( $anchor, '/' ) . '<\/a>/i', $content ) ) {
			return false;
		}

		// Create the link HTML.
		$link_html = sprintf(
			'<a href="%s">%s</a>',
			esc_url( $target_url ),
			esc_html( $anchor )
		);

		// Replace first occurrence only.
		$new_content = preg_replace(
			'/' . preg_quote( $anchor, '/' ) . '/',
			$link_html,
			$content,
			1
		);

		if ( $new_content === $content ) {
			return false;
		}

		// Update the post.
		$result = wp_update_post(
			array(
				'ID'           => $post_id,
				'post_content' => $new_content,
			),
			true
		);

		if ( is_wp_error( $result ) ) {
			return $result;
		}

		return true;
	}
}
