Commit c8111366 authored by Mahadev Konar's avatar Mahadev Konar
Browse files

ZOOKEEPER-1333. NPE in FileTxnSnapLog when restarting a cluster. (Patrick Hunt...

ZOOKEEPER-1333. NPE in FileTxnSnapLog when restarting a cluster. (Patrick Hunt and Andrew Mc Nair via mahadev)

git-svn-id: https://svn.apache.org/repos/asf/zookeeper/branches/branch-3.4@1221837 13f79535-47bb-0310-9956-ffa450edef68
parent be70165c
Release 3.4.2 - TBD
Release 3.4.2 - 2011-12-21
Backward compatible changes:
......@@ -7,6 +7,8 @@ BUGFIXES:
ZOOKEEPER-1323. c client doesn't compile on freebsd
(michi mutsuzaki via phunt)
ZOOKEEPER-1333. NPE in FileTxnSnapLog when restarting a cluster.
(Patrick Hunt and Andrew Mc Nair via mahadev)
Release 3.4.1 - 2011-12-12
......
......@@ -20,6 +20,7 @@ package org.apache.zookeeper.server;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
......@@ -30,21 +31,17 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.nio.ByteBuffer;
import org.apache.jute.Index;
import org.apache.jute.InputArchive;
import org.apache.jute.OutputArchive;
import org.apache.jute.Record;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.Code;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Quotas;
import org.apache.zookeeper.StatsTrack;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.KeeperException.Code;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Watcher.Event;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
......@@ -54,20 +51,17 @@ import org.apache.zookeeper.common.PathTrie;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.data.StatPersisted;
import org.apache.zookeeper.txn.CheckVersionTxn;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.DeleteTxn;
import org.apache.zookeeper.txn.ErrorTxn;
import org.apache.zookeeper.txn.MultiTxn;
import org.apache.zookeeper.txn.SetACLTxn;
import org.apache.zookeeper.txn.SetDataTxn;
import org.apache.zookeeper.txn.CheckVersionTxn;
import org.apache.zookeeper.txn.Txn;
import org.apache.zookeeper.txn.MultiTxn;
import org.apache.zookeeper.txn.TxnHeader;
import org.apache.zookeeper.proto.CreateRequest;
import org.apache.zookeeper.proto.DeleteRequest;
import org.apache.zookeeper.proto.SetACLRequest;
import org.apache.zookeeper.proto.SetDataRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class maintains the tree data structure. It doesn't have any networking
......@@ -815,6 +809,7 @@ public class DataTree {
debug = "Set data transaction for "
+ setDataTxn.getPath()
+ " to new value=" + Arrays.toString(setDataTxn.getData());
rc.path = setDataTxn.getPath();
rc.stat = setData(setDataTxn.getPath(), setDataTxn
.getData(), setDataTxn.getVersion(), header
.getZxid(), header.getTime());
......@@ -823,6 +818,7 @@ public class DataTree {
SetACLTxn setACLTxn = (SetACLTxn) txn;
debug = "Set ACL transaction for "
+ setACLTxn.getPath();
rc.path = setACLTxn.getPath();
rc.stat = setACL(setACLTxn.getPath(), setACLTxn.getAcl(),
setACLTxn.getVersion());
break;
......@@ -928,7 +924,8 @@ public class DataTree {
if (rc.zxid > lastProcessedZxid) {
lastProcessedZxid = rc.zxid;
}
/**
/*
* Snapshots are taken lazily. It can happen that the child
* znodes of a parent are created after the parent
* is serialized. Therefore, while replaying logs during restore, a
......
......@@ -34,7 +34,6 @@ import org.apache.zookeeper.server.Request;
import org.apache.zookeeper.server.ZooTrace;
import org.apache.zookeeper.server.persistence.TxnLog.TxnIterator;
import org.apache.zookeeper.txn.CreateSessionTxn;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.TxnHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -150,7 +149,7 @@ public class FileTxnSnapLog {
processTransaction(hdr,dt,sessions, itr.getTxn());
} catch(KeeperException.NoNodeException e) {
throw new IOException("Failed to process transaction type: " +
hdr.getType() + " error: " + e.getMessage());
hdr.getType() + " error: " + e.getMessage(), e);
}
listener.onTxnLoaded(hdr, itr.getTxn());
if (!itr.next())
......@@ -197,20 +196,22 @@ public class FileTxnSnapLog {
rc = dt.processTxn(hdr, txn);
}
if(rc.err != Code.OK.intValue()) {
if(rc.err == Code.NONODE.intValue()) {
/**
* This should never happen. A NONODE can never show up in the
* transaction logs. This is more indicative of a corrupt transaction
* log. Refer ZOOKEEPER-1333 for more info.
*/
if (rc.err != Code.OK.intValue()) {
if (hdr.getType() == OpCode.create && rc.err == Code.NONODE.intValue()) {
int lastSlash = rc.path.lastIndexOf('/');
String parentName = rc.path.substring(0, lastSlash);
LOG.error("Failed to set parent cversion for: " +
parentName);
throw new KeeperException.NoNodeException(parentName);
}
else {
LOG.error("Failed to set parent cversion for {}", parentName);
throw new KeeperException.NoNodeException(parentName);
} else {
LOG.debug("Ignoring processTxn failure hdr: " + hdr.getType() +
" : error: " + rc.err);
}
}
}
}
/**
......
......@@ -20,42 +20,41 @@ package org.apache.zookeeper.test;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.jute.BinaryInputArchive;
import org.apache.jute.BinaryOutputArchive;
import org.apache.jute.Record;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.MultiTransactionRecord;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.PortAssignment;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.server.DataNode;
import org.apache.zookeeper.server.DataTree;
import org.apache.zookeeper.server.ServerCnxnFactory;
import org.apache.zookeeper.server.SyncRequestProcessor;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.persistence.FileHeader;
import org.apache.zookeeper.server.persistence.FileTxnLog;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.persistence.TxnLog.TxnIterator;
import org.apache.zookeeper.txn.TxnHeader;
import org.junit.Assert;
import org.junit.Test;
import org.apache.zookeeper.server.DataTree;
import org.apache.zookeeper.server.DataNode;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.DeleteTxn;
import org.apache.zookeeper.txn.MultiTxn;
import org.apache.zookeeper.txn.Txn;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.jute.BinaryOutputArchive;
import org.apache.jute.Record;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import org.apache.jute.BinaryInputArchive;
import org.apache.zookeeper.server.persistence.FileHeader;
import org.apache.zookeeper.txn.TxnHeader;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class LoadFromLogTest extends ZKTestCase implements Watcher {
private static String HOSTPORT = "127.0.0.1:" + PortAssignment.unique();
......@@ -100,8 +99,8 @@ public class LoadFromLogTest extends ZKTestCase implements Watcher {
ClientBase.waitForServerDown(HOSTPORT, CONNECTION_TIMEOUT));
// now verify that the FileTxnLog reads every transaction only once
File logDir = new File(tmpDir, FileTxnSnapLog.version + FileTxnSnapLog.VERSION);
FileTxnLog txnLog = new FileTxnLog(logDir);
File logDir = new File(tmpDir, FileTxnSnapLog.version + FileTxnSnapLog.VERSION);
FileTxnLog txnLog = new FileTxnLog(logDir);
TxnIterator itr = txnLog.read(0);
long expectedZxid = 0;
......@@ -368,4 +367,71 @@ public class LoadFromLogTest extends ZKTestCase implements Watcher {
(children.length == NUM_MESSAGES));
f.shutdown();
}
/**
* Test we can restore a snapshot that has errors and data ahead of the zxid
* of the snapshot file.
*/
@Test
public void testRestoreWithTransactionErrors() throws Exception {
// setup a single server cluster
File tmpDir = ClientBase.createTmpDir();
ClientBase.setupTestEnv();
ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
SyncRequestProcessor.setSnapCount(10000);
final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
f.startup(zks);
Assert.assertTrue("waiting for server being up ", ClientBase
.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
ZooKeeper zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, this);
long start = System.currentTimeMillis();
while (!connected) {
long end = System.currentTimeMillis();
if (end - start > 5000) {
Assert.assertTrue("Could not connect with server in 5 seconds",
false);
}
try {
Thread.sleep(200);
} catch (Exception e) {
LOG.warn("Intrrupted");
}
}
// generate some transactions
try {
for (int i = 0; i < NUM_MESSAGES; i++) {
try {
zk.create("/invaliddir/test-", new byte[0],
Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
} catch(NoNodeException e) {
//Expected
}
}
} finally {
zk.close();
}
// force the zxid to be behind the content
zks.getZKDatabase().setlastProcessedZxid(
zks.getZKDatabase().getDataTreeLastProcessedZxid() - 10);
LOG.info("Set lastProcessedZxid to "
+ zks.getZKDatabase().getDataTreeLastProcessedZxid());
// Force snapshot and restore
zks.takeSnapshot();
zks.shutdown();
f.shutdown();
zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
SyncRequestProcessor.setSnapCount(10000);
f = ServerCnxnFactory.createFactory(PORT, -1);
f.startup(zks);
Assert.assertTrue("waiting for server being up ", ClientBase
.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
f.shutdown();
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment