这个类是FileTxnLog和FileSnap的包装类
属性
//the directory containing the
//the transaction logs
final File dataDir;
//the directory containing the
//the snapshot directory
final File snapDir;
TxnLog txnLog;
SnapShot snapLog;
private final boolean autoCreateDB;
private final boolean trustEmptySnapshot;
public static final int VERSION = 2;
public static final String version = "version-";
private static final Logger LOG = LoggerFactory.getLogger(FileTxnSnapLog.class);
public static final String ZOOKEEPER_DATADIR_AUTOCREATE = "zookeeper.datadir.autocreate";
public static final String ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT = "true";
static final String ZOOKEEPER_DB_AUTOCREATE = "zookeeper.db.autocreate";
private static final String ZOOKEEPER_DB_AUTOCREATE_DEFAULT = "true";
public static final String ZOOKEEPER_SNAPSHOT_TRUST_EMPTY = "zookeeper.snapshot.trust.empty";
private static final String EMPTY_SNAPSHOT_WARNING = "No snapshot found, but there are log entries. ";
内部类
public interface PlayBackListener {
void onTxnLoaded(TxnHeader hdr, Record rec);
}
构造函数
/**
* the constructor which takes the datadir and
* snapdir.通过datadir(事务日志)和snapdir(快照日志)构造FileTxnSnapLog
* @param dataDir the transaction directory
* @param snapDir the snapshot directory
*/
public FileTxnSnapLog(File dataDir, File snapDir) throws IOException {
LOG.debug("Opening datadir:{} snapDir:{}", dataDir, snapDir);
this.dataDir = new File(dataDir, version + VERSION);
this.snapDir = new File(snapDir, version + VERSION);
// by default create snap/log dirs, but otherwise complain instead
// See ZOOKEEPER-1161 for more details
boolean enableAutocreate = Boolean.parseBoolean(
System.getProperty(ZOOKEEPER_DATADIR_AUTOCREATE, ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT));
trustEmptySnapshot = Boolean.getBoolean(ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
LOG.info(ZOOKEEPER_SNAPSHOT_TRUST_EMPTY + " : " + trustEmptySnapshot);
if (!this.dataDir.exists()) {
if (!enableAutocreate) {
throw new DatadirException(String.format(
"Missing data directory %s, automatic data directory creation is disabled (%s is false)."
+ " Please create this directory manually.",
this.dataDir,
ZOOKEEPER_DATADIR_AUTOCREATE));
}
if (!this.dataDir.mkdirs()) {
throw new DatadirException("Unable to create data directory " + this.dataDir);
}
}
if (!this.dataDir.canWrite()) {
throw new DatadirException("Cannot write to data directory " + this.dataDir);
}
if (!this.snapDir.exists()) {
// by default create this directory, but otherwise complain instead
// See ZOOKEEPER-1161 for more details
if (!enableAutocreate) {
throw new DatadirException(String.format(
"Missing snap directory %s, automatic data directory creation is disabled (%s is false)."
+ "Please create this directory manually.",
this.snapDir,
ZOOKEEPER_DATADIR_AUTOCREATE));
}
if (!this.snapDir.mkdirs()) {
throw new DatadirException("Unable to create snap directory " + this.snapDir);
}
}
if (!this.snapDir.canWrite()) {
throw new DatadirException("Cannot write to snap directory " + this.snapDir);
}
// check content of transaction log and snapshot dirs if they are two different directories
// See ZOOKEEPER-2967 for more details
if (!this.dataDir.getPath().equals(this.snapDir.getPath())) {
checkLogDir();
checkSnapDir();
}
txnLog = new FileTxnLog(this.dataDir);
snapLog = new FileSnap(this.snapDir);
autoCreateDB = Boolean.parseBoolean(
System.getProperty(ZOOKEEPER_DB_AUTOCREATE, ZOOKEEPER_DB_AUTOCREATE_DEFAULT));
}
/**
* this function restores the server
* database after reading from the
* snapshots and transaction logs
恢复数据datatree sessions
* @param dt the datatree to be restored
* @param sessions the sessions to be restored
* @param listener the playback listener to run on the
* database restoration
* @return the highest zxid restored
* @throws IOException
*/
public long restore(DataTree dt, Map<Long, Integer> sessions, PlayBackListener listener) throws IOException {
long snapLoadingStartTime = Time.currentElapsedTime();
long deserializeResult = snapLog.deserialize(dt, sessions);
ServerMetrics.getMetrics().STARTUP_SNAP_LOAD_TIME.add(Time.currentElapsedTime() - snapLoadingStartTime);
FileTxnLog txnLog = new FileTxnLog(dataDir);
boolean trustEmptyDB;
File initFile = new File(dataDir.getParent(), "initialize");
if (Files.deleteIfExists(initFile.toPath())) {
LOG.info("Initialize file found, an empty database will not block voting participation");
trustEmptyDB = true;
} else {
trustEmptyDB = autoCreateDB;
}
if (-1L == deserializeResult) {
/* this means that we couldn't find any snapshot, so we need to
* initialize an empty database (reported in ZOOKEEPER-2325) */
if (txnLog.getLastLoggedZxid() != -1) {
// ZOOKEEPER-3056: provides an escape hatch for users upgrading
// from old versions of zookeeper (3.4.x, pre 3.5.3).
if (!trustEmptySnapshot) {
throw new IOException(EMPTY_SNAPSHOT_WARNING + "Something is broken!");
} else {
LOG.warn(EMPTY_SNAPSHOT_WARNING + "This should only be allowed during upgrading.");
}
}
if (trustEmptyDB) {
/* TODO: (br33d) we should either put a ConcurrentHashMap on restore()
* or use Map on save() */
save(dt, (ConcurrentHashMap<Long, Integer>) sessions, false);
/* return a zxid of 0, since we know the database is empty */
return 0L;
} else {
/* return a zxid of -1, since we are possibly missing data */
LOG.warn("Unexpected empty data tree, setting zxid to -1");
dt.lastProcessedZxid = -1L;
return -1L;
}
}
long highestZxid = fastForwardFromEdits(dt, sessions, listener);
// The snapshotZxidDigest will reset after replaying the txn of the
// zxid in the snapshotZxidDigest, if it's not reset to null after
// restoring, it means either there are not enough txns to cover that
// zxid or that txn is missing
DataTree.ZxidDigest snapshotZxidDigest = dt.getDigestFromLoadedSnapshot();
if (snapshotZxidDigest != null) {
LOG.warn("Highest txn zxid 0x{} is not covering the snapshot "
+ "digest zxid 0x{}, which might lead to inconsistent state",
Long.toHexString(highestZxid),
Long.toHexString(snapshotZxidDigest.getZxid()));
}
return highestZxid;
}
/**通过事务文件恢复,并且只通过事务日志恢复
* This function will fast forward the server database to have the latest
* transactions in it. This is the same as restore, but only reads from
* the transaction logs and not restores from a snapshot.
* @param dt the datatree to write transactions to.
* @param sessions the sessions to be restored.
* @param listener the playback listener to run on the
* database transactions.
* @return the highest zxid restored.
* @throws IOException
*/
public long fastForwardFromEdits(
DataTree dt,
Map<Long, Integer> sessions,
PlayBackListener listener) throws IOException {
TxnIterator itr = txnLog.read(dt.lastProcessedZxid + 1);
long highestZxid = dt.lastProcessedZxid;
TxnHeader hdr;
int txnLoaded = 0;
long startTime = Time.currentElapsedTime();
try {
while (true) {
// iterator points to
// the first valid txn when initialized
hdr = itr.getHeader();
if (hdr == null) {
//empty logs
return dt.lastProcessedZxid;
}
if (hdr.getZxid() < highestZxid && highestZxid != 0) {
LOG.error("{}(highestZxid) > {}(next log) for type {}", highestZxid, hdr.getZxid(), hdr.getType());
} else {
highestZxid = hdr.getZxid();
}
try {
processTransaction(hdr, dt, sessions, itr.getTxn());
txnLoaded++;
} catch (KeeperException.NoNodeException e) {
throw new IOException("Failed to process transaction type: "
+ hdr.getType()
+ " error: "
+ e.getMessage(),
e);
}
listener.onTxnLoaded(hdr, itr.getTxn());
if (!itr.next()) {
break;
}
}
} finally {
if (itr != null) {
itr.close();
}
}
long loadTime = Time.currentElapsedTime() - startTime;
LOG.info("{} txns loaded in {} ms", txnLoaded, loadTime);
ServerMetrics.getMetrics().STARTUP_TXNS_LOADED.add(txnLoaded);
ServerMetrics.getMetrics().STARTUP_TXNS_LOAD_TIME.add(loadTime);
return highestZxid;
}
事务处理
/**
* process the transaction on the datatree
处理datatree上的事务
* @param hdr the hdr of the transaction
* @param dt the datatree to apply transaction to
* @param sessions the sessions to be restored
* @param txn the transaction to be applied
*/
public void processTransaction(
TxnHeader hdr,
DataTree dt,
Map<Long, Integer> sessions,
Record txn) throws KeeperException.NoNodeException {
ProcessTxnResult rc;
switch (hdr.getType()) {
case OpCode.createSession:
sessions.put(hdr.getClientId(), ((CreateSessionTxn) txn).getTimeOut());
if (LOG.isTraceEnabled()) {
ZooTrace.logTraceMessage(
LOG,
ZooTrace.SESSION_TRACE_MASK,
"playLog --- create session in log: 0x" + Long.toHexString(hdr.getClientId())
+ " with timeout: " + ((CreateSessionTxn) txn).getTimeOut());
}
// give dataTree a chance to sync its lastProcessedZxid
rc = dt.processTxn(hdr, txn);
break;
case OpCode.closeSession:
sessions.remove(hdr.getClientId());
if (LOG.isTraceEnabled()) {
ZooTrace.logTraceMessage(
LOG,
ZooTrace.SESSION_TRACE_MASK,
"playLog --- close session in log: 0x" + Long.toHexString(hdr.getClientId()));
}
rc = dt.processTxn(hdr, txn);
break;
default:
rc = dt.processTxn(hdr, txn);
}
/*
* Snapshots are lazily created. So when a snapshot is in progress,
* there is a chance for later transactions to make into the
* snapshot. Then when the snapshot is restored, NONODE/NODEEXISTS
* errors could occur. It should be safe to ignore these.
*/
if (rc.err != Code.OK.intValue()) {
LOG.debug("Ignoring processTxn failure hdr: {}, error: {}, path: {}", hdr.getType(), rc.err, rc.path);
}
}
完成之后后置监听器
private final PlayBackListener commitProposalPlaybackListener = new PlayBackListener() {
public void onTxnLoaded(TxnHeader hdr, Record txn) {
addCommittedProposal(hdr, txn);
}
};
/**
* maintains a list of last <i>committedLog</i>
* or so committed requests. This is used for
* fast follower synchronization.
* @param request committed request
*/
public void addCommittedProposal(Request request) {
WriteLock wl = logLock.writeLock();
try {
wl.lock();
if (committedLog.size() > commitLogCount) {
committedLog.remove();
minCommittedLog = committedLog.peek().packet.getZxid();
}
if (committedLog.isEmpty()) {
minCommittedLog = request.zxid;
maxCommittedLog = request.zxid;
}
byte[] data = SerializeUtils.serializeRequest(request);
QuorumPacket pp = new QuorumPacket(Leader.PROPOSAL, request.zxid, data, null);
Proposal p = new Proposal();
p.packet = pp;
p.request = request;
committedLog.add(p);
maxCommittedLog = p.packet.getZxid();
} finally {
wl.unlock();
}
}