HEX
Server: Apache
System: Linux pdx1-shared-a1-38 6.6.104-grsec-jammy+ #3 SMP Tue Sep 16 00:28:11 UTC 2025 x86_64
User: mmickelson (3396398)
PHP: 8.1.31
Disabled: NONE
Upload Files
File: //usr/local/wp/vendor/wp-cli/import-command/src/Import_Command.php
<?php

class Import_Command extends WP_CLI_Command {

	public $processed_posts = array();

	/**
	 * Imports content from a given WXR file.
	 *
	 * Provides a command line interface to the WordPress Importer plugin, for
	 * performing data migrations.
	 *
	 * Use `define( 'IMPORT_DEBUG', true );` for more verbosity during importing.
	 *
	 * ## OPTIONS
	 *
	 * <file>...
	 * : Path to one or more valid WXR files for importing. Directories are also accepted.
	 *
	 * --authors=<authors>
	 * : How the author mapping should be handled. Options are 'create', 'mapping.csv', or 'skip'. The first will create any non-existent users from the WXR file. The second will read author mapping associations from a CSV, or create a CSV for editing if the file path doesn't exist. The CSV requires two columns, and a header row like "old_user_login,new_user_login". The last option will skip any author mapping.
	 *
	 * [--skip=<data-type>]
	 * : Skip importing specific data. Supported options are: 'attachment' and 'image_resize' (skip time-consuming thumbnail generation).
	 *
	 * ## EXAMPLES
	 *
	 *     # Import content from a WXR file
	 *     $ wp import example.wordpress.2016-06-21.xml --authors=create
	 *     Starting the import process...
	 *     Processing post #1 ("Hello world!") (post_type: post)
	 *     -- 1 of 1
	 *     -- Tue, 21 Jun 2016 05:31:12 +0000
	 *     -- Imported post as post_id #1
	 *     Success: Finished importing from 'example.wordpress.2016-06-21.xml' file.
	 */
	public function __invoke( $args, $assoc_args ) {
		$defaults   = array(
			'authors' => null,
			'skip'    => array(),
		);
		$assoc_args = wp_parse_args( $assoc_args, $defaults );

		if ( ! is_array( $assoc_args['skip'] ) ) {
			$assoc_args['skip'] = explode( ',', $assoc_args['skip'] );
		}

		$importer = $this->is_importer_available();
		if ( is_wp_error( $importer ) ) {
			WP_CLI::error( $importer );
		}

		$this->add_wxr_filters();

		WP_CLI::log( 'Starting the import process...' );

		$new_args = array();
		foreach ( $args as $arg ) {
			if ( is_dir( $arg ) ) {
				$dir   = WP_CLI\Utils\trailingslashit( $arg );
				$files = glob( $dir . '*.wxr' );
				if ( ! empty( $files ) ) {
					$new_args = array_merge( $new_args, $files );
				}

				$files = glob( $dir . '*.xml' );
				if ( ! empty( $files ) ) {
					$new_args = array_merge( $new_args, $files );
				}

				if ( empty( $files ) ) {
					WP_CLI::warning( "No files found in the import directory '$arg'." );
				}
			} else {
				if ( ! file_exists( $arg ) ) {
					WP_CLI::warning( "File '$arg' doesn't exist." );
					continue;
				}

				if ( is_readable( $arg ) ) {
					$new_args[] = $arg;
					continue;
				}

				WP_CLI::warning( "Cannot read file '$arg'." );
			}
		}

		if ( empty( $new_args ) ) {
			WP_CLI::error( 'Import failed due to missing or unreadable file/s.' );
		}

		$args = $new_args;

		foreach ( $args as $file ) {

			$ret = $this->import_wxr( $file, $assoc_args );

			if ( is_wp_error( $ret ) ) {
				WP_CLI::error( $ret );
			} else {
				WP_CLI::log( '' ); // WXR import ends with HTML, so make sure message is on next line
				WP_CLI::success( "Finished importing from '$file' file." );
			}
		}
	}

	/**
	 * Imports a WXR file.
	 */
	private function import_wxr( $file, $args ) {

		$wp_import                  = new WP_Import();
		$wp_import->processed_posts = $this->processed_posts;
		$import_data                = $wp_import->parse( $file );
		if ( is_wp_error( $import_data ) ) {
			return $import_data;
		}

		// Prepare the data to be used in process_author_mapping();
		$wp_import->get_authors_from_import( $import_data );

		// We no longer need the original data, so unset to avoid using excess
		// memory.
		unset( $import_data );

		$author_data = array();
		foreach ( $wp_import->authors as $wxr_author ) {
			$author = new \stdClass();
			// Always in the WXR
			$author->user_login = $wxr_author['author_login'];

			// Should be in the WXR; no guarantees
			if ( isset( $wxr_author['author_email'] ) ) {
				$author->user_email = $wxr_author['author_email'];
			}
			if ( isset( $wxr_author['author_display_name'] ) ) {
				$author->display_name = $wxr_author['author_display_name'];
			}
			if ( isset( $wxr_author['author_first_name'] ) ) {
				$author->first_name = $wxr_author['author_first_name'];
			}
			if ( isset( $wxr_author['author_last_name'] ) ) {
				$author->last_name = $wxr_author['author_last_name'];
			}

			$author_data[] = $author;
		}

		// Build the author mapping
		$author_mapping = $this->process_author_mapping( $args['authors'], $author_data );
		if ( is_wp_error( $author_mapping ) ) {
			return $author_mapping;
		}

		$author_in  = wp_list_pluck( $author_mapping, 'old_user_login' );
		$author_out = wp_list_pluck( $author_mapping, 'new_user_login' );
		unset( $author_mapping, $author_data );

		// $user_select needs to be an array of user IDs
		$user_select         = array();
		$invalid_user_select = array();
		foreach ( $author_out as $author_login ) {
			$user = get_user_by( 'login', $author_login );
			if ( $user ) {
				$user_select[] = $user->ID;
			} else {
				$invalid_user_select[] = $author_login;
			}
		}
		if ( ! empty( $invalid_user_select ) ) {
			return new WP_Error( 'invalid-author-mapping', sprintf( 'These user_logins are invalid: %s', implode( ',', $invalid_user_select ) ) );
		}

		unset( $author_out );

		// Drive the import
		$wp_import->fetch_attachments = ! in_array( 'attachment', $args['skip'], true );

		$_GET  = array(
			'import' => 'wordpress',
			'step'   => 2,
		);
		$_POST = array(
			'imported_authors'  => $author_in,
			'user_map'          => $user_select,
			'fetch_attachments' => $wp_import->fetch_attachments,
		);

		if ( in_array( 'image_resize', $args['skip'], true ) ) {
			add_filter( 'intermediate_image_sizes_advanced', array( $this, 'filter_set_image_sizes' ) );
		}

		$GLOBALS['wpcli_import_current_file'] = basename( $file );
		$wp_import->import( $file );
		$this->processed_posts += $wp_import->processed_posts;

		return true;
	}

	public function filter_set_image_sizes( $sizes ) {
		// Return null here to prevent the core image resizing logic from running.
		return null;
	}

	/**
	 * Defines useful verbosity filters for the WXR importer.
	 */
	private function add_wxr_filters() {

		add_filter(
			'wp_import_posts',
			function ( $posts ) {
				global $wpcli_import_counts;
				$wpcli_import_counts['current_post'] = 0;
				$wpcli_import_counts['total_posts']  = count( $posts );
				return $posts;
			},
			10
		);

		add_filter(
			'wp_import_post_comments',
			function ( $comments ) {
				global $wpcli_import_counts;
				$wpcli_import_counts['current_comment'] = 0;
				$wpcli_import_counts['total_comments']  = count( $comments );
				return $comments;
			}
		);

		add_filter(
			'wp_import_post_data_raw',
			function ( $post ) {
				global $wpcli_import_counts, $wpcli_import_current_file;

				$wpcli_import_counts['current_post']++;
				WP_CLI::log( '' );
				WP_CLI::log( '' );
				WP_CLI::log( sprintf( 'Processing post #%d ("%s") (post_type: %s)', $post['post_id'], $post['post_title'], $post['post_type'] ) );
				WP_CLI::log( sprintf( '-- %s of %s (in file %s)', number_format( $wpcli_import_counts['current_post'] ), number_format( $wpcli_import_counts['total_posts'] ), $wpcli_import_current_file ) );
				WP_CLI::log( '-- ' . date( 'r' ) ); // phpcs:ignore WordPress.DateTime.RestrictedFunctions.date_date

				return $post;
			}
		);

		add_action(
			'wp_import_insert_post',
			function ( $post_id ) {
				global $wpcli_import_counts;
				if ( is_wp_error( $post_id ) ) {
					WP_CLI::warning( '-- Error importing post: ' . $post_id->get_error_code() );
				} else {
					WP_CLI::log( "-- Imported post as post_id #{$post_id}" );
				}

				if ( 0 === ( $wpcli_import_counts['current_post'] % 500 ) ) {
					WP_CLI\Utils\wp_clear_object_cache();
					WP_CLI::log( '-- Cleared object cache.' );
				}
			}
		);

		add_action(
			'wp_import_insert_term',
			function ( $t, $import_term ) {
				WP_CLI::log( "-- Created term \"{$import_term['name']}\"" );
			},
			10,
			2
		);

		add_action(
			'wp_import_set_post_terms',
			function ( $tt_ids, $term_ids, $taxonomy ) {
				WP_CLI::log( '-- Added terms (' . implode( ',', $term_ids ) . ") for taxonomy \"{$taxonomy}\"" );
			},
			10,
			3
		);

		add_action(
			'wp_import_insert_comment',
			function ( $comment_id ) {
				global $wpcli_import_counts;
				$wpcli_import_counts['current_comment']++;
				WP_CLI::log( sprintf( '-- Added comment #%d (%s of %s)', $comment_id, number_format( $wpcli_import_counts['current_comment'] ), number_format( $wpcli_import_counts['total_comments'] ) ) );
			}
		);

		add_action(
			'import_post_meta',
			function ( $post_id, $key ) {
				WP_CLI::log( "-- Added post_meta $key" );
			},
			10,
			2
		);
	}

	/**
	 * Determines whether the requested importer is available.
	 */
	private function is_importer_available() {
		require_once ABSPATH . 'wp-admin/includes/plugin.php';

		if ( class_exists( 'WP_Import' ) ) {
			return true;
		}

		$plugins            = get_plugins();
		$wordpress_importer = 'wordpress-importer/wordpress-importer.php';
		if ( array_key_exists( $wordpress_importer, $plugins ) ) {
			$error_msg = "WordPress Importer needs to be activated. Try 'wp plugin activate wordpress-importer'.";
		} else {
			$error_msg = "WordPress Importer needs to be installed. Try 'wp plugin install wordpress-importer --activate'.";
		}

		return new WP_Error( 'importer-missing', $error_msg );
	}

	/**
	 * Processes how the authors should be mapped
	 *
	 * @param string            $authors_arg      The `--author` argument originally passed to command
	 * @param array             $author_data      An array of WP_User-esque author objects
	 * @return array|WP_Error   $author_mapping   Author mapping array if successful, WP_Error if something bad happened
	 */
	private function process_author_mapping( $authors_arg, $author_data ) {

		// Provided an author mapping file (method checks validity)
		if ( file_exists( $authors_arg ) ) {
			return $this->read_author_mapping_file( $authors_arg );
		}

		// Provided a file reference, but the file doesn't yet exist
		if ( false !== stripos( $authors_arg, '.csv' ) ) {
			return $this->create_author_mapping_file( $authors_arg, $author_data );
		}

		switch ( $authors_arg ) {
			// Create authors if they don't yet exist; maybe match on email or user_login
			case 'create':
				return $this->create_authors_for_mapping( $author_data );

			// Skip any sort of author mapping
			case 'skip':
				return array();

			default:
				return new WP_Error( 'invalid-argument', "'authors' argument is invalid." );
		}
	}

	/**
	 * Reads an author mapping file.
	 */
	private function read_author_mapping_file( $file ) {
		$author_mapping = array();

		foreach ( new \WP_CLI\Iterators\CSV( $file ) as $i => $author ) {
			if ( ! array_key_exists( 'old_user_login', $author ) || ! array_key_exists( 'new_user_login', $author ) ) {
				return new WP_Error( 'invalid-author-mapping', "Author mapping file isn't properly formatted." );
			}

			$author_mapping[] = $author;
		}

		return $author_mapping;
	}

	/**
	 * Creates an author mapping file, based on provided author data.
	 *
	 * @return WP_Error      The file was just now created, so some action needs to be taken
	 */
	private function create_author_mapping_file( $file, $author_data ) {

		if ( touch( $file ) ) {
			$author_mapping = array();
			foreach ( $author_data as $author ) {
				$author_mapping[] = array(
					'old_user_login' => $author->user_login,
					'new_user_login' => $this->suggest_user( $author->user_login, $author->user_email ),
				);
			}
			$file_resource = fopen( $file, 'w' );
			\WP_CLI\utils\write_csv( $file_resource, $author_mapping, array( 'old_user_login', 'new_user_login' ) );
			return new WP_Error( 'author-mapping-error', sprintf( 'Please update author mapping file before continuing: %s', $file ) );
		} else {
			return new WP_Error( 'author-mapping-error', "Couldn't create author mapping file." );
		}
	}

	/**
	 * Creates users if they don't exist, and build an author mapping file.
	 */
	private function create_authors_for_mapping( $author_data ) {

		$author_mapping = array();
		foreach ( $author_data as $author ) {

			if ( isset( $author->user_email ) ) {
				$user = get_user_by( 'email', $author->user_email );
				if ( $user instanceof WP_User ) {
					$author_mapping[] = array(
						'old_user_login' => $author->user_login,
						'new_user_login' => $user->user_login,
					);
					continue;
				}
			}

			$user = get_user_by( 'login', $author->user_login );
			if ( $user instanceof WP_User ) {
				$author_mapping[] = array(
					'old_user_login' => $author->user_login,
					'new_user_login' => $user->user_login,
				);
				continue;
			}

			$user = array(
				'user_login' => '',
				'user_email' => '',
				'user_pass'  => wp_generate_password(),
			);
			$user = array_merge( $user, (array) $author );

			$user_id = wp_insert_user( $user );
			if ( is_wp_error( $user_id ) ) {
				return $user_id;
			}

			$user             = get_user_by( 'id', $user_id );
			$author_mapping[] = array(
				'old_user_login' => $author->user_login,
				'new_user_login' => $user->user_login,
			);
		}
		return $author_mapping;
	}

	/**
	 * Suggests a blog user based on the levenshtein distance.
	 */
	private function suggest_user( $author_user_login, $author_user_email = '' ) {

		if ( ! isset( $this->blog_users ) ) {
			$this->blog_users = get_users();
		}

		$shortest    = -1;
		$shortestavg = array();

		$threshold = floor( ( strlen( $author_user_login ) / 100 ) * 10 ); // 10 % of the strlen are valid
		$closest   = '';
		foreach ( $this->blog_users as $user ) {
			// Before we resort to an algorithm, let's try for an exact match
			if ( $author_user_email && $user->user_email === $author_user_email ) {
				return $user->user_login;
			}

			$levs        = array();
			$levs[]      = levenshtein( $author_user_login, $user->display_name );
			$levs[]      = levenshtein( $author_user_login, $user->user_login );
			$levs[]      = levenshtein( $author_user_login, $user->user_email );
			$email_parts = explode( '@', $user->user_email );
			$email_login = array_shift( $email_parts );
			$levs[]      = levenshtein( $author_user_login, $email_login );
			rsort( $levs );
			$lev = array_pop( $levs );
			if ( 0 === $lev ) {
				$closest  = $user->user_login;
				$shortest = 0;
				break;
			}

			if ( ( $lev <= $shortest || $shortest < 0 ) && $lev <= $threshold ) {
				$closest  = $user->user_login;
				$shortest = $lev;
			}
			$shortestavg[] = $lev;
		}
		// in case all usernames have a common pattern
		if ( $shortest > ( array_sum( $shortestavg ) / count( $shortestavg ) ) ) {
			return '';
		}

		return $closest;
	}
}