Rebuilding WebCenter Collaboration Index By Project

Another tip of the hat to Brian Hak for this pretty awesome Hak (see what I did there?).

Last year, Brian was faced with a problem: Some documents in WebCenter Interaction Collaboration Server weren’t properly indexed, and his only option was the rebuild the ENTIRE index (a pain we’re all pretty familiar with). With many thousands of documents, the job would never finish, and end users would be frustrated with incomplete results while the job toiled away after wiping everything out.
collab-search-service

So he took it upon himself to write CollabSearchRebuilder. CollabSearchRebuilder allows you to rebuild specific subsets of documents without having to wipe out the whole index and start all over again.

Feel free to download the source and build files and check it out!

import java.sql.*;
import java.io.*;
import java.text.*;
import java.util.*;

import com.plumtree.collaboration.cssearch.*;
import com.bea.alui.search.indexmanager.impl.*;
import com.plumtree.core.util.*;
import com.plumtree.core.pdo.*;
import com.plumtree.core.pdo.query.*;
import com.plumtree.core.pdo.runtime.*;


/**
 * CollabSearchRebuilder hijacks the Collab IndexManager object to give you
 * more granular control over the rebuilding of your Collab search index.
 * Out of the box, Collab only allows you to do a complete delete/rebuild
 * of the Collab search index.  This is painful in large deployments where
 * the process may die before completion and you have to start over from 
 * scratch.
 *
 * CollabSearchRebuilder allows you, via command line params, to delete or
 * index documents from a specific project, a specific file, and/or over a
 * specific date range.
 *
 *
 * USAGE: CollabSearchRebuilder <DELETE|REINDEX> <JDBC URL> <JDBC USER> 
 * 	<JDBC PASSWORD> <Options>
 * Options (Must include at least one option):
 *		-mr <MOST_RECENT_DATE>
 *		-od (<OLDEST_DATE>|0)
 *		-p <PROJECT_ID>
 *		-p <FILE_ID>
 *
 * CollabSearchRebuilder will dump out a pipe delimited file listing all the 
 * documents it's going to take action and then ask user to confirm that they
 * want to continue.  Once user confirms that they want to continue, the 
 * programs submits files in batch to the Collab search for delete or index
 *
 * This utility rides on top of the Collab libraries and configuration files
 * and should be deployed on a box where you have Collab installed.  
 * Note, however, that Collab does not need to be running for the utility to
 * work correctly.  Code has been tested against Collab 10.3.0
 *
 * @Since: JDK 1.5
 * @Author: Bian Hak
 *
 */
public class CollabSearchRebuilder extends Object
{
	private String opType = null;
	private String mostRecentDate = null;
	private String oldestDate = null;
	private String projectId = null;
	private String fileId = null;
	private String jdbcURL = null;
	private String jdbcUser = null;
	private String jdbcPassword = null;

	//This is the hook we use into the Collab code
	private CollabIndexManager indexManager = null;

	//Make the object based on command line parameters
	public CollabSearchRebuilder(String opType, String jdbcURL, 
		String jdbcUser, String jdbcPassword, String mostRecentDate, 
		String oldestDate, String projectId, String fileId)
	{
		this.opType = opType;
		this.mostRecentDate = mostRecentDate;
		this.oldestDate = oldestDate;
		this.projectId = projectId;
		this.fileId = fileId;
		this.jdbcURL = jdbcURL;
		this.jdbcUser = jdbcUser;
		this.jdbcPassword = jdbcPassword;
	}

	/**
	 * Submit the list of files you want to delete to the Collab index Manager
	 */
	private void deleteFromIndex(ArrayList fileList) throws Exception
	{
		if(fileList==null || fileList.size() == 0)
		{
			System.err.println("No files to remove from index");
			return;
		}
		//Queue it up
		indexManager.multiEnqueueForDeleting((List)fileList, 
			com.plumtree.collaboration.docman.DMFile.class);
	}

	/**
	 * This is the real meat of CollabSearchRebuilder.  It just runs a
	 * query against the Collab database using the command line parameters
	 * you specific to create a list of files. Then, it queues the list of
	 * files up for deletion or re-indexing
	 */ 
	private void doIndex() throws Exception
	{
		//The list of files we're going to operate on
		ArrayList fileList = new ArrayList();
		//Hard-coded for Oracle here.  If you're motivated, could move this
		//to a config file or command line param to support other DBs
 		Class.forName ("oracle.jdbc.driver.OracleDriver");

        Connection con = null;
	   	Statement s = null;
		ResultSet rs = null;
		int counter = 0;
		String fileName = "";
		try
		{
			//Create the connection
			con = DriverManager.getConnection(jdbcURL, jdbcUser, jdbcPassword);
			s = con.createStatement();
			
			//Build the db query to figure out what files we're working on
			String sql = "select f.objectid as fileid, "
				+ "f.name as filename, "
				+ "f.modified as lastmodified, "
				+ "p.name as projectname, "
				+ "p.objectid as projectid "
				+ "from csfiles f, csprojects p "
				+ "where f.projectid=p.objectid ";

			//Tailor the where clause based on command line options
			if(mostRecentDate != null)
			{
				sql +=  " and f.modified < '" + mostRecentDate + "'";
			}
			if(oldestDate != null)
			{
				sql += " and f.modified > '" + oldestDate + "'";
			}
			if(fileId != null)
			{
				sql += " and f.objectid = " + fileId;
			}
			if(projectId != null)
			{
				sql += " and p.objectid = " + projectId ;
			}

			//Get results
			rs = s.executeQuery(sql);

			//We're going to dump out a log of the files we're working with
			//Create a timestamp for the log
			DateFormat df = 
				new SimpleDateFormat("yyyy-MM-dd_hh_mm_ss"); 
			fileName = "collabSearchRebuilder_" + opType;
			//And make the name of the logfile descriptive so we know what
			//command line options it was created with
			if(mostRecentDate != null)
			{
				fileName += "_mr_" + mostRecentDate;
			}
			if(oldestDate != null)
			{
				fileName += "_od_" + oldestDate;
			}
			if(projectId != null)
			{
				fileName += "_projectid_" + projectId;
			}
			if(fileId != null)
			{
				fileName += "_fileid_" + fileId;
			}
			fileName += "_" +  df.format(new java.util.Date()) + ".log";

			//Yep, it the I/O steam
			PrintWriter out = new PrintWriter(new FileOutputStream(
				fileName));
			//Reformat the Datestamp to pretty print in output file
			df = new SimpleDateFormat("MM-dd-yyyy hh:mm:ss");  
			out.println("File Id|File Name|Last Modified Date|"
				+ "Project Name");
			while(rs.next())
			{
				//Add the current doc to our request list
				fileList.add(new Integer(rs.getInt("fileid")));
				//Log info about the current doc
				out.println(
					rs.getInt("fileid") + "|"
					+ rs.getString("filename") + "|"
					+ df.format(rs.getDate("lastmodified")) 
					+ "|" + rs.getString("projectname"));
				counter ++;
			}
			//clean up the output file
			out.close();

			//Let the user know exactly what query we ran
			System.err.println("Grabbed document ids with the query: "
				+ sql);
		}
		catch(Exception e)
		{
			e.printStackTrace(System.err);
		}
		finally
		{
			//Clean up db stuff
			try
			{
				rs.close();
				s.close();
				con.close();
			}
			catch(Exception e){}
		}


		//If we're deleting documents from search index
		if(opType.equals("DELETE"))
		{
			//Let user know what we're doing
			System.err.println("Getting ready to delete " + counter 
				+ " files from the index");
			System.err.println("You can see a full list of files I'm going to delete from the index in the log: " + fileName);

			//Make them manually confirm action
			System.err.println("Are you sure you want to proceed? (y/n)");
			BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
			String input = "";	
			while(true)
			{
				input = in.readLine();
				if(input.equalsIgnoreCase("y"))
				{
					break;
				}
				if(input.equalsIgnoreCase("n"))
				{
					System.err.println("Cancelled operation.  No actions were taken on your search index.  Later!");
					System.exit(0);
				}
				System.err.println("Are you sure you want to proceed? (y/n)");
			}

			//delete that Shiz
			deleteFromIndex(fileList);
			boolean controller = true;

			//Just loop while there are still pending deletes
			//Let the user know how many deletes are pending every 5 seconds
			while(controller)
			{
				System.err.println("This many pending deletes: " 
					+ CollabIndexManager.indexManager.getActionQueueSize());
				if(CollabIndexManager.indexManager.getActionQueueSize() == 0)
				{
					controller = false;
				}
				Thread.sleep(5000L);
			}

			//Based on the way the threading works inside of Index manager,
			//we need to wait a while to ensure that the last batch of requests
			//gets passed to the index.  Based on out of the box collab settings
			//we need to wait 35 (indexThreadSleepTime + maxRequestInterval) 
			//seconds
			//NOTE: If you have non-standard collab settings, for the values
			//listed above, you probably will want to change how long you sleep
			System.err.println();
			System.err.println("I'm going to go to sleep for 45 seconds now");
			System.err.println();
			System.err.println();
			System.err.println("IMPORTANT: DO NOT KILL ME. I'll come back, I promise.  I just need to take a nap for 45 seconds because Collab will wait up to 35 seconds before it runs it's last batch of index requests and we want to make sure everything gets processed.");
			Thread.sleep(45000L);
			System.err.println();
			System.err.println("OK, I'm back...boy do I feel refreshed: power nap and all that 🙂  Anyhow....");
			System.err.println("Stopping Index Manager Thread");

			//Stop the indexing thread nicely so it can complete it's last batch
			IndexManager iManager = 
				(IndexManager)CollabIndexManager.indexManager;
			iManager.stop();

			//Feeback
			System.err.println("Successfully deleted " + counter
				+ " files from index");
			System.err.println();
			System.err.println("And....I'm out.  peace!");
			//Get out
			System.exit(0);
		}
		//Submitting files for indexing
		if(opType.equals("REINDEX"))
		{
			//Let the user know what's up
			System.err.println("Getting ready to add " + counter 
				+ " files to the index");
			System.err.println("You can see a full list of files I'm going to add to the index in the log: " + fileName);

			//And make them manually confirm 
			System.err.println("Are you sure you want to proceed? (y/n)");
			BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
			String input = "";	
			while(true)
			{
				input = in.readLine();
				if(input.equalsIgnoreCase("y"))
				{
					break;
				}
				if(input.equalsIgnoreCase("n"))
				{
					System.err.println("Cancelled operation.  No actions were taken on your search index.  Later!");
					System.exit(0);
				}
				System.err.println("Are you sure you want to proceed? (y/n)");
			}

			//Queue up the requests
			indexManager.multiEnqueueForAdding((List)fileList, com.plumtree.collaboration.docman.DMFile.class);

			//Now just loop until the request queue is empty and let user
			//know how many requests are still left every 5 seconds
			boolean controller = true;
			while(controller)
			{
				System.err.println("This many pending adds: " 
					+ CollabIndexManager.indexManager.getActionQueueSize());
				if(CollabIndexManager.indexManager.getActionQueueSize() == 0)
				{
					controller = false;
				}
				Thread.sleep(5000L);
			}


			//Based on the way the threading works inside of Index manager,
			//we need to wait a while to ensure that the last batch of requests
			//gets passed to the index.  Based on out of the box collab settings
			//we need to wait 35 (indexThreadSleepTime + maxRequestInterval) 
			//seconds
			//NOTE: If you have non-standard collab settings, for the values
			//listed above, you probably will want to change how long you sleep
			System.err.println();
			System.err.println("I'm going to go to sleep for 45 seconds now");
			System.err.println();
			System.err.println();
			System.err.println("IMPORTANT: DO NOT KILL ME. I'll come back, I promise.  I just need to take a nap for 45 seconds because Collab will wait up to 35 seconds before it runs it's last batch of index requests and we want to make sure everything gets processed.");
			Thread.sleep(45000L);
			System.err.println();
			System.err.println("OK, I'm back...boy do I feel refreshed: power nap and all that 🙂  Anyhow....");
			System.err.println("Stopping IndexManager Thread");

			//Stop indexing thread nicely so it can finish it's last run
			IndexManager iManager = (IndexManager)CollabIndexManager.indexManager;
			iManager.stop();

			//Feedback
			System.err.println("Successfully added " + counter 
				+ " files to the index");
			System.err.println();
			System.err.println("And....I'm out.  peace!");

			//Get out
			System.exit(0);
		}
	}

	/**
	 * initIndexManager needs to be called before running any index operations
	 * it just mimics the same behavior that collab follows to get us set up
	 * for indexing/deleting
	 */
	private void initIndexManager() throws Exception
	{
		ApplicationManager am  = ApplicationManager.getInstance();
		Config plumtreeConfig;
		plumtreeConfig = Config.instanceOf();
		Class.forName("com.plumtree.core.pdo.PDOManager");
           PDOClassHelper.instanceOf(com.plumtree.collaboration.project.Project.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.project.Role.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.project.Announcement.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.calendar.CalendarItem.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.calendar.Event.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.tasklist.Milestone.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.tasklist.Task.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.tasklist.TaskList.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.docman.DMFile.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.docman.DMFolder.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.discussion.forum.pdo.Forum.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.discussion.forum.pdo.ForumMessage.class);
            PDOClassHelper.instanceOf(com.plumtree.collaboration.project.ProjectFolder.class);
		QueryManager.instanceOf();

		indexManager = new CollabIndexManager();
		indexManager.initialize();	
	}

	/**
	 * Parse command line args and run code
	 */
	public static void main(String[] argv)
	{
		String usage = "USAGE: CollabSearchRebuilder <DELETE|REINDEX> <JDBC URL> <JDBC USER> <JDBC PASSWORD> <Options>";
		usage += "\nOptions (Must include at least one option):\n";
		usage += "\t-mr <MOST_RECENT_DATE>\n";
		usage += "\t-od (<OLDEST_DATE>|0)\n";
		usage += "\t-p <PROJECT_ID>\n";
		usage += "\t-p <FILE_ID>\n";

		String mostRecentDate = null;
		String oldestDate = null;
		String projectId = null;
		String fileId = null;
		boolean validArgs = false;

		//parse the command line args
		//Note that we don't do any validation of data passed on command line
		//here.  Make sure dates are well formated for your db and that numbers
		//for projectids and fileids are actually valid
		if(argv.length < 6)
		{
			System.err.println(usage);
			return;
		}
		if(!(argv[0].equals("DELETE") || argv[0].equals("REINDEX")))
		{
			System.err.println("First argument must be either DELETEor REINDEX");
			System.err.println(usage);
			return;
		}
		for(int i=4; i < argv.length; i++)
		{
			if(argv[i].equalsIgnoreCase("-mr"))
			{
				if(argv.length < (i+2))
				{
					System.err.println(usage);
					return;
				}	
				i++;
				mostRecentDate = argv[i];
				validArgs = true;
			}
			if(argv[i].equalsIgnoreCase("-od"))
			{
				if(argv.length < (i+2))
				{
					System.err.println(usage);
					return;
				}	
				i++;
				oldestDate = argv[i];
				validArgs = true;
			}
			if(argv[i].equalsIgnoreCase("-p"))
			{
				if(argv.length < (i+2))
				{
					System.err.println(usage);
					return;
				}	
				i++;
				projectId = argv[i];
				validArgs = true;
			}
			if(argv[i].equalsIgnoreCase("-f"))
			{
				if(argv.length < (i+2))
				{
					System.err.println(usage);
					return;
				}	
				i++;
				fileId = argv[i];
				validArgs = true;
			}
		}
		if(!validArgs)
		{
			System.err.println(usage);
			return;
		}
	
		//build the object	
		CollabSearchRebuilder builder = 
			new CollabSearchRebuilder(argv[0], argv[1], argv[2], argv[3], 
			mostRecentDate, oldestDate, projectId, fileId);
		try
		{
			//initialization
			builder.initIndexManager();
		}
		catch(Exception e)
		{
			System.err.println("Unable to initialize Collab Index Manager "
				+ " because: " + e.getMessage());	
			e.printStackTrace(System.err);
			return;
		}
		try
		{
			//run the operation
			builder.doIndex();
		}
		catch(Exception e)
		{
			System.err.println("Unable to build this portion of the index "
				+ " because: "  + e.getMessage());	
			e.printStackTrace(System.err);
			return;
		}
		System.err.println("Completed Collab " + argv[0] + " of files with dates  between " + argv[1] + " to " + argv[2]);
		return;
	}
}

Tags: , , , , ,

Leave a Reply