From b7f30ca2ee4b46d496b2756261229a4b0da13a2f Mon Sep 17 00:00:00 2001 From: Mahadev Konar <mahadev@apache.org> Date: Wed, 18 Nov 2009 19:06:39 +0000 Subject: [PATCH] ZOOKEEPER-368. Observers: core functionality (henry robinson via mahadev) git-svn-id: https://svn.apache.org/repos/asf/hadoop/zookeeper/trunk@881882 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + docs/bookkeeperConfig.html | 3 + docs/bookkeeperOverview.html | 3 + docs/bookkeeperProgrammer.html | 3 + docs/bookkeeperStarted.html | 3 + docs/index.html | 6 + docs/index.pdf | 91 ++-- docs/javaExample.html | 3 + docs/linkmap.html | 9 + docs/linkmap.pdf | 32 +- docs/recipes.html | 3 + docs/releasenotes.html | 3 + docs/zookeeperAdmin.html | 3 + docs/zookeeperHierarchicalQuorums.html | 3 + docs/zookeeperInternals.html | 3 + docs/zookeeperJMX.html | 3 + docs/zookeeperObservers.html | 399 ++++++++++++++++++ docs/zookeeperObservers.pdf | 276 ++++++++++++ docs/zookeeperOver.html | 3 + docs/zookeeperProgrammers.html | 3 + docs/zookeeperQuotas.html | 3 + docs/zookeeperStarted.html | 3 + docs/zookeeperTutorial.html | 3 + .../src/documentation/content/xdocs/index.xml | 1 + .../src/documentation/content/xdocs/site.xml | 1 + .../content/xdocs/zookeeperObservers.xml | 174 ++++++++ .../apache/zookeeper/server/ObserverBean.java | 45 ++ .../server/quorum/AuthFastLeaderElection.java | 18 +- .../server/quorum/FastLeaderElection.java | 2 +- .../zookeeper/server/quorum/Leader.java | 59 ++- .../server/quorum/LeaderElection.java | 39 +- .../server/quorum/LearnerHandler.java | 21 +- .../zookeeper/server/quorum/Observer.java | 147 +++++++ .../server/quorum/ObserverMXBean.java | 37 ++ .../quorum/ObserverRequestProcessor.java | 123 ++++++ .../quorum/ObserverZooKeeperServer.java | 114 +++++ .../server/quorum/QuorumCnxManager.java | 7 +- .../zookeeper/server/quorum/QuorumPeer.java | 146 ++++++- .../server/quorum/QuorumPeerConfig.java | 46 +- .../server/quorum/QuorumPeerMain.java | 1 + .../zookeeper/server/quorum/QuorumStats.java | 5 +- .../zookeeper/server/quorum/ObserverTest.java | 236 +++++++++++ .../server/quorum/QuorumPeerMainTest.java | 64 +-- .../server/quorum/QuorumPeerTestBase.java | 100 +++++ .../zookeeper/test/AsyncHammerTest.java | 20 + .../test/HierarchicalQuorumTest.java | 70 ++- .../test/ObserverHierarchicalQuorumTest.java | 59 +++ .../test/ObserverQuorumHammerTest.java | 43 ++ .../apache/zookeeper/test/ObserverTest.java | 242 +++++++++++ .../org/apache/zookeeper/test/QuorumBase.java | 23 +- .../zookeeper/test/QuorumHammerTest.java | 6 +- 51 files changed, 2523 insertions(+), 189 deletions(-) create mode 100644 docs/zookeeperObservers.html create mode 100644 docs/zookeeperObservers.pdf create mode 100644 src/docs/src/documentation/content/xdocs/zookeeperObservers.xml create mode 100644 src/java/main/org/apache/zookeeper/server/ObserverBean.java create mode 100644 src/java/main/org/apache/zookeeper/server/quorum/Observer.java create mode 100644 src/java/main/org/apache/zookeeper/server/quorum/ObserverMXBean.java create mode 100644 src/java/main/org/apache/zookeeper/server/quorum/ObserverRequestProcessor.java create mode 100644 src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java create mode 100644 src/java/test/org/apache/zookeeper/server/quorum/ObserverTest.java create mode 100644 src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java create mode 100644 src/java/test/org/apache/zookeeper/test/ObserverHierarchicalQuorumTest.java create mode 100644 src/java/test/org/apache/zookeeper/test/ObserverQuorumHammerTest.java create mode 100644 src/java/test/org/apache/zookeeper/test/ObserverTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 85eb0439..a31b4d04 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -152,6 +152,8 @@ NEW FEATURES: ZOOKEEPER-550. Java Queue Recipe. (steven cheng via mahadev) + ZOOKEEPER-368. Observers: core functionality (henry robinson via mahadev) + Release 3.2.0 - 2009-06-30 Non-backward compatible changes: diff --git a/docs/bookkeeperConfig.html b/docs/bookkeeperConfig.html index 42c92cab..cb20eb13 100644 --- a/docs/bookkeeperConfig.html +++ b/docs/bookkeeperConfig.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/bookkeeperOverview.html b/docs/bookkeeperOverview.html index 90d6c6a5..36898b7f 100644 --- a/docs/bookkeeperOverview.html +++ b/docs/bookkeeperOverview.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/bookkeeperProgrammer.html b/docs/bookkeeperProgrammer.html index f7cbf9d4..7b0f50af 100644 --- a/docs/bookkeeperProgrammer.html +++ b/docs/bookkeeperProgrammer.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/bookkeeperStarted.html b/docs/bookkeeperStarted.html index 0698f313..ad0a354b 100644 --- a/docs/bookkeeperStarted.html +++ b/docs/bookkeeperStarted.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/index.html b/docs/index.html index 7ee1b6d2..da3c6e9d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> @@ -288,6 +291,9 @@ document.write("Last Published: " + document.lastModified); <a href="zookeeperHierarchicalQuorums.html">Hierarchical quorums</a> </li> +<li> +<a href="zookeeperObservers.html">Observers</a> - non-voting ensemble members that easily improve ZooKeeper's scalability</li> + </ul> </li> diff --git a/docs/index.pdf b/docs/index.pdf index 9628c68a..059bc981 100644 --- a/docs/index.pdf +++ b/docs/index.pdf @@ -169,10 +169,10 @@ endobj >> endobj 20 0 obj -<< /Length 1747 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 1867 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -GatU48T3WI'YaHGkggH80GGg*<%il7d^3^cbY2N>5aMR_1U\3)oLO"]r;2Z2pdCOUN$9(%dC53bmr>mB/oD`/k7`N^ji[^PRt^<>p#gZcAu[Y]&W1Pk/U-&Y=+:lM^\@`tQG4u">V<`BRLd*hJ+=@=M<_8n$J!`icY!pql;uT$Go/7SHUmO=nTAi\m'+IK1Q!ah;#E<g"YWp2FN$&&ZDFA)fsLI?Jlrd5^sa"2k8jt8Ilka2YU`qq2\5o0#u7]GD%NCoNT^&u+mqYIM?Ip9WCD8uGu^ZdLIr=ONk1;T,LRb(jm'&`H)4p#KPE#=0=!9kHgrF<\JGGETN=eXNTmPjpq7E>nY_Vd$DAkAfB9ig:$Y@lMA6,=.HJ6Q<ODq6bi8]^q14?M`0t(i'9pF4hfD(+6:ESoe$>9uN^>KQ4H;a;;%OA7,RW?A-42g_Yl'kiU<%W2"U9oK6K4:K!3XR=n?S9I-.[%#pZW%'K$579T&r%<nY7XA<Ob<TbK1;5jiS]A-jrjj;4_G(l\>+;>-:8ZnQa.&3>nMr?LTGp6TgQ11r^>r7iE[ZM5nP+pAC9tkl$XXVC0s'F[eZb3:BP+0*7]Pm0fFM]nb@GF,D&>V(B>s;$1E&fU\81s)\!lDo2Mg>mc1;S"JGWZd%@1q&<*crg\Q'0AjrFlsOSt$IJ26RBW]ok.4TfWqu..%a3h981S])[t:eZQ:-U?'B,A7CfHuL"+LHjLJgg$rLilQ7)dcjiZE[e[0:JEcc[I.G<?%_Tb+kiKjp_G/-ai7G?9LWYeq_C3O!8,,]rn+plZU&A!6ut"bcP3o<Vht\.s;g_'NeeBMgfB3i8Q.KfTQp.\E4F^"FG6llK0*ITK:7Z64:2b*j#+Fiq!<&rM&Tc1'ITJ`!hTH*q=n&=XFR;_X,]&65dk3F7%\nB*[>00VFTFVoh0XITmOW7eQ.\AA57C)cVf/dPFTq"`GXVNuauN2@oA'Tl*%`D-!8.!hpW'U[/,d:LngeV?@OXY`5]@@KUN<(TDgR-gT_m+-%)imgZ;[*k<cYUBdS1X5Vi@!4+B?>Wjt`%LXr!u-ICOqT_u$4IVnI<8W"=LAX0eeqC_RmQ4c0On,m&id"NGF-o)XhJq@8Y$I0n9[lKAm'GX*>033el/&<l%K%ibARCq"eAd!$l!rOLWWmr.6,hJ0;`1+h\;htp"GuFMCDN;4c3/*KX7CV.d[[f9)%CPF%DGWrB,1#Qm1'IU#Y=Re4p?rS"4Y!l-rHhU`_jj*\Z;?CFD%*Y,F7^c<L=SRC*&VUOmc'HRL`:Y,E,<(-3+W@gY;Z8;QT@V"7nS..lf+W"uGb$)Vjb'S/uPNg!%.Am"Gnct]"N@T2bg\1CkN>)uDDN@C5hTlM:`kKIfK*E[J#=,U)eZ"]md"g]TY?`3].WuVgV4f6c\@FjF<6A/#>N<k9r.g!,f"FbILC57I;GHRLeAlUm,(R`bj;-3feTm!G=1##'h=2*RAm,'%DVsZA7P=U4("]=A6p&jB>`-QY*6!o]'Pp2&0ia(As'j#VU6/O%=MfkK[mWGu<a*h,LcaM)F?Jkc^^K$MN^3&#gK9e8KSFFdtI`LtuN+/g,WuDlfl4q0_BdQ85!4@IH#"j=BQ5=f2*m`hhS6u&'3#ViFbK&uP&n3*r.GXID75g"$a$MXpIeP2'IkT6p[XS43onbr)!iU$@"Cl%IbNAVpC#&.P$FZSf96K;!k]r8FHsOK>+TT\@e#9ZIck`H+hS^>#\bcp7*,f]~> +GatU5>Ar7S'Roe[d+_`s;'eiaAM6L$/mDBcD&39VG'RJ3`o\J'XsD'/^Yb_rO[kWab8Bd_1-4=enaKdSk'":[HUkc2pjoo%>Qinq&UFKUKO,L+0U^;+:P.5'm_$;@rCmQGmJZ&sIpMuZ>^Tnc+*t^%ln(PeSA(kL$t&J90Va("^ND>d(\ekB]q]edppR\<G.,r@1Q5rT`;#8K'rtA>Z!rEFfj[k7feiF7,-rC_'MLl-c.Wl2rdKsZ@99')O#T"W:8ft>)AAaM$VOtoM).;9Ki9_dQ\V#2cb(/2R&4L'A8q.o]-0!2S6)O<Dk:Mbb)MhLA^,E0"ZJ5!(n^<0M.XpLAADqW]U5tVm48!km/N=@1Zs*krQo8cQ35l/3d!<u#($o8BO8t+^V("t89]g!!`$VJVqKGeDsu,*_0N)ucjr17&$g"$nSi0;p)VGM^*V#K5O0jbb10q@b6GN<;-%;uO[&G(mX%.HCcuDi&pncFB4gGPSsO&20+ZS?)18RI^O4Sa%W97`f;(Q(Fm>g6T;RO;R3/+c7^SJ$>)Pl34IIN;jufmk#`8LBM;8^ik;uTfq,>u9Was(N;A#(`Oc]Z:.TY0,CA8_9nm8Co/BH8El_5s#NZVj*"Q=qa+%J\/3_q/'Zi$gcTenY"i\s`<GYt].b<nE1Tj;!/7]7V>8uXd.Bn&,mTnESdE1G"\nJ*d8q>Lq$_HG9#KZ_5EAL'L*#-I5S^-W_"]6jQpA4*WCG#P*!7FuB+=QgB@/*57[gV,Y.:&'dc#o"r7r43W4D=MYaSsb*qO8DWL;E'!dC,=4;$>rjLKlmKn:jmPh`9$JjM2'ol/SFCsOr.i*M=<iL2NVRc1&lo_%lKshqd/[InV4/K_qlLfE8]E0qk]YX4!md4W1^?R6IB:gi4V%cgf;0oG*875=LBa&+9.L1P65f@Ee*osan3gT/Eeg+H!.k$;-FZkPYRDP+fGhN#L=7XaU+Z[G,AC0&OKKiDT9u/(qMlZ:6LU-Be>.2qB8RFkj;'K?I?9i>-Nl4RjK%?A/5<!MAW74[!&<K/)*<q]Q4kS3(cnJ],[bHgU(Bpcp/4Wj5%ooJVk>[4?:=/,#FNG2!#8mYp#:;NBS/3QE!ZAfMWbr@P(i-p^<seL<$g>L4E$ak@B<LD2Q8Lh-qs@`=u$&%3Zo'Jcp1m=D`r6Y,+Ld%BG;jla5@M!!+Z3cXoRr0h#81\YNE-MMFsu$mDPY!P';sn>``]8t5+bV6pi7meC=UB)p<Y?3+<`(9OsM6pdI;e4'V534s;//H^Nr1Q-c<79\fHE>MQ*%X"+r.>d[k0kkk#4EEqG`Jj8H]TtYIXJcoXq1^rL4*oNF6fb8>a)2/]UcMGMpu*pW8MP,;8LKqG3b][fK\?!DE@5TtO]Zfr[;f4'L=^_%2Gb8+bnFkdO]Zfr[9s`sL$0IBfRZHMi,hK'XAt`6Xj2f=TdlXIRV)m<far+mbYp8aW]!:D7G70)`>\0-CnWAnCmo8JOkC;-$V<J$]@mrlFr"-e<Y8ol6pldSJquaYjtifJ9-j0YR]SrYKaU;%9G.d7qVMAToTgn!/ka`\8EnFM=`%-rJM&_"1Ou\<!$[9^`N1d_>1nB):;9:1&+27IP=QBQ"bC]\L63R@gj42B;-f73<'Bgbn's!s0t>^,juu==X[U@Cf3/'-O.pNaD;4eI4b%s-X/LAdU-E2%&W#[5I'YfGj+K_`V\F#2C(U/nH56/q>GnD)T^uX)Ki2#\PXSn"qcS5Ok?#+VYD+04Ku*Qcaj3Kr;r$p%,+Bt0b@5EnfY*.1?Ri>6St8/EFWu?7!"I2/"@6WhbP(b+*PC:9'cu8Ua6(Gm1ik#TjneV\j<M"*0S:$_NQ1ngc],>NI.n_npVbJ~> endstream endobj 21 0 obj @@ -193,6 +193,7 @@ endobj 27 0 R 28 0 R 29 0 R +30 0 R ] endobj 23 0 obj @@ -209,10 +210,10 @@ endobj 24 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 611.2 224.964 599.2 ] +/Rect [ 126.0 651.6 174.648 639.6 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (zookeeperInternals.html) +/A << /URI (zookeeperObservers.html) /S /URI >> /H /I >> @@ -220,10 +221,10 @@ endobj 25 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 578.8 150.0 566.8 ] +/Rect [ 126.0 584.8 224.964 572.8 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://wiki.apache.org/hadoop/ZooKeeper) +/A << /URI (zookeeperInternals.html) /S /URI >> /H /I >> @@ -231,10 +232,10 @@ endobj 26 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 565.6 150.0 553.6 ] +/Rect [ 126.0 552.4 150.0 540.4 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://wiki.apache.org/hadoop/ZooKeeper/FAQ) +/A << /URI (http://wiki.apache.org/hadoop/ZooKeeper) /S /URI >> /H /I >> @@ -242,10 +243,10 @@ endobj 27 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 485.6 229.476 473.6 ] +/Rect [ 126.0 539.2 150.0 527.2 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (bookkeeperOverview.html) +/A << /URI (http://wiki.apache.org/hadoop/ZooKeeper/FAQ) /S /URI >> /H /I >> @@ -253,10 +254,10 @@ endobj 28 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 472.4 257.328 460.4 ] +/Rect [ 126.0 459.2 229.476 447.2 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (bookkeeperStarted.html) +/A << /URI (bookkeeperOverview.html) /S /URI >> /H /I >> @@ -264,43 +265,54 @@ endobj 29 0 obj << /Type /Annot /Subtype /Link -/Rect [ 126.0 459.2 363.972 447.2 ] +/Rect [ 126.0 446.0 257.328 434.0 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (bookkeeperProgrammer.html) +/A << /URI (bookkeeperStarted.html) /S /URI >> /H /I >> endobj 30 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 126.0 432.8 363.972 420.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (bookkeeperProgrammer.html) +/S /URI >> +/H /I +>> +endobj +31 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding >> endobj -31 0 obj +32 0 obj << /Type /Font /Subtype /Type1 /Name /F5 /BaseFont /Times-Roman /Encoding /WinAnsiEncoding >> endobj -32 0 obj +33 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj -33 0 obj +34 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding >> endobj -34 0 obj +35 0 obj << /Type /Font /Subtype /Type1 /Name /F7 @@ -319,15 +331,15 @@ endobj endobj 3 0 obj << -/Font << /F3 30 0 R /F5 31 0 R /F1 32 0 R /F2 33 0 R /F7 34 0 R >> +/Font << /F3 31 0 R /F5 32 0 R /F1 33 0 R /F2 34 0 R /F7 35 0 R >> /ProcSet [ /PDF /ImageC /Text ] >> endobj xref -0 35 +0 36 0000000000 65535 f -0000008857 00000 n -0000008922 00000 n -0000008972 00000 n +0000009155 00000 n +0000009220 00000 n +0000009270 00000 n 0000000015 00000 n 0000000071 00000 n 0000002781 00000 n @@ -345,26 +357,27 @@ xref 0000004698 00000 n 0000004866 00000 n 0000005029 00000 n -0000006869 00000 n -0000006992 00000 n -0000007061 00000 n -0000007242 00000 n -0000007413 00000 n -0000007598 00000 n -0000007787 00000 n -0000007958 00000 n -0000008128 00000 n -0000008301 00000 n -0000008414 00000 n -0000008524 00000 n -0000008632 00000 n -0000008748 00000 n +0000006989 00000 n +0000007112 00000 n +0000007188 00000 n +0000007369 00000 n +0000007540 00000 n +0000007711 00000 n +0000007896 00000 n +0000008085 00000 n +0000008256 00000 n +0000008426 00000 n +0000008599 00000 n +0000008712 00000 n +0000008822 00000 n +0000008930 00000 n +0000009046 00000 n trailer << -/Size 35 +/Size 36 /Root 2 0 R /Info 4 0 R >> startxref -9095 +9393 %%EOF diff --git a/docs/javaExample.html b/docs/javaExample.html index 51334e91..23efc2d2 100644 --- a/docs/javaExample.html +++ b/docs/javaExample.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/linkmap.html b/docs/linkmap.html index 5aee0a26..4a5ba1ea 100644 --- a/docs/linkmap.html +++ b/docs/linkmap.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> @@ -341,6 +344,12 @@ document.write("Last Published: " + document.lastModified); <a href="zookeeperJMX.html">JMX</a> ___________________ <em>jmx</em> </li> </ul> + +<ul> +<li> +<a href="zookeeperObservers.html">Observers Guide</a> ___________________ <em>observers</em> +</li> +</ul> </ul> </ul> diff --git a/docs/linkmap.pdf b/docs/linkmap.pdf index 9c8e1390..7fb0ca0e 100644 --- a/docs/linkmap.pdf +++ b/docs/linkmap.pdf @@ -5,10 +5,10 @@ /Producer (FOP 0.20.5) >> endobj 5 0 obj -<< /Length 1148 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 1135 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gau0D?$"IS'Re<2nC(8Vd-BBZ,DF"_?+,u.L7JL&;%b`DENC;+/c5!<QIB#PX#f:*A1#^Epn"8HHg[/@4Ic9JL*W,fR*700Yl.d"@'Z5o"PPM$nofsj#RJb>fd?NB1bFZ6T"MjQAnR/'.]W/2D#8dOR$Z[B]'.nP<+TTOlK?:@A9M^M=b:UJ*5Jg9"WYr#DK]14XU3S(*;OI!2g5\A:']I[c)'bg$anZRdWd!ZiV,aJoT5":<?'aF9VWoDjc1P?=Ut"=a8HCcm#f72fXccV#jBNJrt%i"g9iWuJA9q?g`#8J%,qtP4NF/;DN/WKR("=h!Z2-K?QdiMrp38BX$U-RAOTE&#i[#I;CrrJ"2rppL@&o4eL+MRpF/Ur@`Z6\!93jpjq'2%62%@l`CT/D+%+j&*FM>mdS&a5Za\AMU0@4e4Z,PF\K&lDW]JIOLfrWf*$WO>+/3t8I[$ZC[3s25Q'(`aRaa>S5h**@3hleoTThm!nQK8@;Ji6OYDbABKG]%BK=9PZXBp@#f6cqnZSj]SZ-+HpR+N1lll_A3j+CA_/N+QIT6^8*D,VuU9:8*N&>^Q:[_j!D9!U9'QrA"deX2PmU+IJI_[sG5'**Kj#U_il8b+D^Q=HV2iBm%-.X'[i`Y6e%0hN7V*:7LX!Oe.%#gJ'HG))DMo;Is=b!iQu5ie0>=:HDFNQhK`'*=qD3`^"oVj)smgfikt$W6Uq3C,@!UO1+q4uA]!c@J1AHp-:AS`:\YAb8q:0u00[ch=JtfIAPOSuFQ+[rkprP(s;jGL>R=52Qh@[-N[mj5I@j&7'$0(cEoIe:%-RfjI/,7;04\Wh/EmIIh8:?K<AQJ-#.G9#?BRQ/nkZG)%_e%XYd]KNdhV+gU=E1-cW:/?5Q<7N(_a'FD76M")&F7gC_9d:S<(Ue]hIF%E=DA,Q5ddIQK&&[=/BSG(1+Mg8Br7^Cb8Ifc"N_@Gr7p*!oF.WnrsG&QuC4W`o]PbSCk]`rLP6enEFl5i'U9n'Ss^,rMBR"CPKe1lIRZUE!t@WFXmDQa8KSHld-JIJH@GrGh]0al!aS,eju:p8CB5@(?C9n2=*Jf3j#(rFM/._,+'$=7Vh_XC:NKt.uQ)gS3IC+2iM.c<HjQ@6e^f3Vh8[RS=aka295.2;q3~> +Gau1/?#SIU'Sc)T.s/1r(ZkEDl*>$PDV;TX[=JfiLOllO8$snJ'U<e?IMCm3A.hptS2l*e9(7+4;LHe&H+KCAr/N%i!gkOAB!D0AR_`Mj6OC,L.*9):S3fJcQ7R4$?RPH^aHt"!S7_K?amV[+h2-HfYRo"+]<AAlp)@!L=deD`noqB![q(dIglhg[92iVLm;\'MU?P"Km`p[H(.3/HT^VNo^2:%7crm>R>tX-_BaVU&WjP>P*i^s+?("_dE3C9N['Lb5fj=OlN7e^GMdSIqN'fl!7f/*G/(KW55mRDPn`N(YIhWHl#imuU6hY#KMnlt=5nUmdOSoN[Ide.4;Vr2ZG,d$D;Etd()/HYVd5jP*]>1ZLNi;e*9D^j5A5NcGk7f%Vmp_=AJf&%uf5*1ic^)mRpH_<6d>SLbH(aN;1o@30_lB:IToE;Q<bbp2k<*95G;7-kE]Pc!#=:Ys;MrIMjr_aeB"C\$-8Zi[=]Tst&S"Y[U;hF.De,h^WM_hQ9,8]2+Cibn>=@$-O1u/\pXVd&\e;9k-36fi`OItMU\=J]eOGJQ:?iX%IXU)hlcE2)O9%a`Lol$LW)f(X;8\o^)m@8OI2?2bR(a!69/ltAX!O9a*uE&*W*)lLeqI=[:K=[C<jnT^(9LHF&EVuP-6-E/q*<;)BGYl?(c7;"l'#?eoarOK`3%PiU$lHNGao<^Fjm<fgY\eh`oK$=VccPNaQ3Q@W/huGoB+\9cMIq?REP!q^iYTX'Xc>0<Q'Emgg1t('<u[dbWU#]>4:!]F%dW!S33T![mRkKf4i8CZcCO/o-h0!^0oT)),LjolYM1[N>J;]Y,DPu57PkqHFh3ik(!L-+=CScUJ>i-$i>Z3C^tg+U4g,2bsJ`"B4XI^b$!4eQ`W%&R3WD_.hUebmop;>PA%9kbYmUGr]DN-4p_.12dRO;9B2M.ARj2BeBdem8S7R)b!W$E+=!IDq^)0)e8BW7Me-6f-`Yq9WWa6c+_Q_d2n-JEHW:2IOKsO#49V#t=sd=WbLpTR/:d,9YWioEE(ZOB.Y&?<3G1oY!)%[])ccp=Mp8XIpA82N6UMAOR!H^TfA7[elWUCfr`i`s>G8_\mo3MnKdft)]UtJNcuJEllV$tGemK?kJ'@upU&~> endstream endobj 6 0 obj @@ -20,10 +20,10 @@ endobj >> endobj 7 0 obj -<< /Length 480 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 532 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -GatU/9lGV;(r#SlHCTc(lZ_<dAfd6iR?;$8\La9eCd_+8RJm!tF93@jio'GkjnX.*3XEESO[+R$7Ub@d'RKskpCbM<7nOpj@ZJ;?HG>qI.XL<M&:rZch2<js-:r;oZ>JQFJSY,Rr99mc'L'&UH&VTX@4VCJGrKE.c&/8cS8!e\#g!eM$e=,#db`gdT=XK+]5)hN&+i2\;1JMi3c4IZDm$M\5lg]._.fCk1j0;`'@4*8Da-OAJT9O@9_OJO&OenQ[Y@]f5%R7s#)!2-c,>pFlD,*s9%'_?\m4KBLC<cVdetYR3cLI_V+Ha,O.j%^s6H<rNF2o^+C:+gJI7V\'R=2,:gF3E>#VqWb\&?^i2]5g<ulMVBkpCo%i<>^VmpDI-QsUo!r:U\9_MN)D#KRPGZ!%@I^SiI.S,1?`dWQWKkWf%X_2K5aeEgRo+j.)[cq%`2G9_i99KM/fgQ:<_W9Td%aEl/!rakG!W~> +GatUo_+rc>&;KY!$6S6Be/mALI0A&b8mA9N84Y<I1:th@&`P)C7=G%oad0l<"_gP'1WfRp2e10*OaUD(_@S+Eb)DL+`X%PW,:r;I\]c4pJe[t?f>/2r4=!FU8K'f6r!_PbEuW`L*0.!OFm#4I<c(N%3i&oc5DS;Y.F+Z/rp9R?<DW]C(TFU1<4@SP<?]8ma5mZH%c$Ne?+R&A[0r%Bo5SYuUI^AF@JM8(3(]tJGG&7<423ulIB!3&aM#J9N<-VCB5r=u0q:YEZk)WS-j+eI_-(.@H9L)iDnbW45=%_h$^!JNUZ/]fM$B*t8jq%IM..GAR^)"lGHPRV$B3-4a/\Urm3]pip81I^$=sjAL3U8pBCjG<p+WSAl!%XsJW`D0D,W\k)Fqs:4?Mg%QG(5$&W^^&Mu]ugPX"LqouC4)N,JARG,L"`DnhNV%jfUI2U"Xie$/Z&q"8J_kimQg=")`9+f0LBMH`"C`uDX\`ttH9Snp4+/Abo1ZCML`bfZr#$3osB(;&-TJ(tC.g&2$E-6P)~> endstream endobj 8 0 obj @@ -87,19 +87,19 @@ endobj xref 0 14 0000000000 65535 f -0000002651 00000 n -0000002715 00000 n -0000002765 00000 n +0000002690 00000 n +0000002754 00000 n +0000002804 00000 n 0000000015 00000 n 0000000071 00000 n -0000001311 00000 n -0000001417 00000 n -0000001988 00000 n -0000002094 00000 n -0000002206 00000 n -0000002316 00000 n -0000002427 00000 n -0000002535 00000 n +0000001298 00000 n +0000001404 00000 n +0000002027 00000 n +0000002133 00000 n +0000002245 00000 n +0000002355 00000 n +0000002466 00000 n +0000002574 00000 n trailer << /Size 14 @@ -107,5 +107,5 @@ trailer /Info 4 0 R >> startxref -2887 +2926 %%EOF diff --git a/docs/recipes.html b/docs/recipes.html index bb427748..176ef826 100644 --- a/docs/recipes.html +++ b/docs/recipes.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/releasenotes.html b/docs/releasenotes.html index efd035a1..5913c8c1 100644 --- a/docs/releasenotes.html +++ b/docs/releasenotes.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperAdmin.html b/docs/zookeeperAdmin.html index 1aebefe4..9fc2ce3d 100644 --- a/docs/zookeeperAdmin.html +++ b/docs/zookeeperAdmin.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperHierarchicalQuorums.html b/docs/zookeeperHierarchicalQuorums.html index e4f53cff..82442893 100644 --- a/docs/zookeeperHierarchicalQuorums.html +++ b/docs/zookeeperHierarchicalQuorums.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperInternals.html b/docs/zookeeperInternals.html index 4470d3b5..41a5cee8 100644 --- a/docs/zookeeperInternals.html +++ b/docs/zookeeperInternals.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_selected_1.5', 'skin/')" id="menu_selected_1.5Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Contributor</div> <div id="menu_selected_1.5" class="selectedmenuitemgroup" style="display: block;"> diff --git a/docs/zookeeperJMX.html b/docs/zookeeperJMX.html index d0388343..d44dbb51 100644 --- a/docs/zookeeperJMX.html +++ b/docs/zookeeperJMX.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menupage"> <div class="menupagetitle">JMX</div> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperObservers.html b/docs/zookeeperObservers.html new file mode 100644 index 00000000..ae6c9f16 --- /dev/null +++ b/docs/zookeeperObservers.html @@ -0,0 +1,399 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<META http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta content="Apache Forrest" name="Generator"> +<meta name="Forrest-version" content="0.8"> +<meta name="Forrest-skin-name" content="pelt"> +<title>ZooKeeper Observers</title> +<link type="text/css" href="skin/basic.css" rel="stylesheet"> +<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> +<link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> +<link type="text/css" href="skin/profile.css" rel="stylesheet"> +<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> +<link rel="shortcut icon" href="images/favicon.ico"> +</head> +<body onload="init()"> +<script type="text/javascript">ndeSetTextSize();</script> +<div id="top"> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> +<a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/zookeeper/">ZooKeeper</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> +</div> +<!--+ + |header + +--> +<div class="header"> +<!--+ + |start group logo + +--> +<div class="grouplogo"> +<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> +</div> +<!--+ + |end group logo + +--> +<!--+ + |start Project Logo + +--> +<div class="projectlogo"> +<a href="http://hadoop.apache.org/zookeeper/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a> +</div> +<!--+ + |end Project Logo + +--> +<!--+ + |start Search + +--> +<div class="searchbox"> +<form action="http://www.google.com/search" method="get" class="roundtopsmall"> +<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> + <input name="Search" value="Search" type="submit"> +</form> +</div> +<!--+ + |end search + +--> +<!--+ + |start Tabs + +--> +<ul id="tabs"> +<li> +<a class="unselected" href="http://hadoop.apache.org/zookeeper/">Project</a> +</li> +<li> +<a class="unselected" href="http://wiki.apache.org/hadoop/ZooKeeper">Wiki</a> +</li> +<li class="current"> +<a class="selected" href="index.html">ZooKeeper 3.3 Documentation</a> +</li> +</ul> +<!--+ + |end Tabs + +--> +</div> +</div> +<div id="main"> +<div id="publishedStrip"> +<!--+ + |start Subtabs + +--> +<div id="level2tabs"></div> +<!--+ + |end Endtabs + +--> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> + + + </div> +<!--+ + |start Menu, mainarea + +--> +<!--+ + |start Menu + +--> +<div id="menu"> +<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Overview</div> +<div id="menu_1.1" class="menuitemgroup"> +<div class="menuitem"> +<a href="index.html">Welcome</a> +</div> +<div class="menuitem"> +<a href="zookeeperOver.html">Overview</a> +</div> +<div class="menuitem"> +<a href="zookeeperStarted.html">Getting Started</a> +</div> +<div class="menuitem"> +<a href="releasenotes.html">Release Notes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Developer</div> +<div id="menu_1.2" class="menuitemgroup"> +<div class="menuitem"> +<a href="api/index.html">API Docs</a> +</div> +<div class="menuitem"> +<a href="zookeeperProgrammers.html">Programmer's Guide</a> +</div> +<div class="menuitem"> +<a href="javaExample.html">Java Example</a> +</div> +<div class="menuitem"> +<a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a> +</div> +<div class="menuitem"> +<a href="recipes.html">Recipes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">BookKeeper</div> +<div id="menu_1.3" class="menuitemgroup"> +<div class="menuitem"> +<a href="bookkeeperStarted.html">Getting started</a> +</div> +<div class="menuitem"> +<a href="bookkeeperOverview.html">Overview</a> +</div> +<div class="menuitem"> +<a href="bookkeeperConfig.html">Setup guide</a> +</div> +<div class="menuitem"> +<a href="bookkeeperProgrammer.html">Programmer's guide</a> +</div> +</div> +<div onclick="SwitchMenu('menu_selected_1.4', 'skin/')" id="menu_selected_1.4Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Admin & Ops</div> +<div id="menu_selected_1.4" class="selectedmenuitemgroup" style="display: block;"> +<div class="menuitem"> +<a href="zookeeperAdmin.html">Administrator's Guide</a> +</div> +<div class="menuitem"> +<a href="zookeeperQuotas.html">Quota Guide</a> +</div> +<div class="menuitem"> +<a href="zookeeperJMX.html">JMX</a> +</div> +<div class="menupage"> +<div class="menupagetitle">Observers Guide</div> +</div> +</div> +<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> +<div id="menu_1.5" class="menuitemgroup"> +<div class="menuitem"> +<a href="zookeeperInternals.html">ZooKeeper Internals</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.6', 'skin/')" id="menu_1.6Title" class="menutitle">Miscellaneous</div> +<div id="menu_1.6" class="menuitemgroup"> +<div class="menuitem"> +<a href="http://wiki.apache.org/hadoop/ZooKeeper">Wiki</a> +</div> +<div class="menuitem"> +<a href="http://wiki.apache.org/hadoop/ZooKeeper/FAQ">FAQ</a> +</div> +<div class="menuitem"> +<a href="http://hadoop.apache.org/zookeeper/mailing_lists.html">Mailing Lists</a> +</div> +</div> +<div id="credit"></div> +<div id="roundbottom"> +<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> +<!--+ + |alternative credits + +--> +<div id="credit2"></div> +</div> +<!--+ + |end Menu + +--> +<!--+ + |start content + +--> +<div id="content"> +<div title="Portable Document Format" class="pdflink"> +<a class="dida" href="zookeeperObservers.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> + PDF</a> +</div> +<h1>ZooKeeper Observers</h1> +<div id="minitoc-area"> +<ul class="minitoc"> +<li> +<a href="#ch_Introduction">Observers: Scaling ZooKeeper Without Hurting Write Performance + </a> +</li> +<li> +<a href="#sc_UsingObservers">How to use Observers</a> +</li> +<li> +<a href="#ch_UseCases">Example use cases</a> +</li> +</ul> +</div> + + + + + +<a name="N10009"></a><a name="ch_Introduction"></a> +<h2 class="h3">Observers: Scaling ZooKeeper Without Hurting Write Performance + </h2> +<div class="section"> +<p> + +<em> + Please note: the Observers feature currently only works with the basic + leader election protocol, not fast leader election or authenticated fast + leader election. This will be remedied when a bug in the leader election + protocol code is fixed in the near future. An exception will be thrown + if you try to start a cluster containing Observers without + electionAlg=0. See below for more details. + </em> + +</p> +<p> + Although ZooKeeper performs very well by having clients connect directly + to voting members of the ensemble, this architecture makes it hard to + scale out to huge numbers of clients. The problem is that as we add more + voting members, the write performance drops. This is due to the fact that + a write operation requires the agreement of (in general) at least half the + nodes in an ensemble and therefore the cost of a vote can increase + significantly as more voters are added. + </p> +<p> + We have introduced a new type of ZooKeeper node called + an <em>Observer</em> which helps address this problem and + further improves ZooKeeper's scalability. Observers are non-voting members + of an ensemble which only hear the results of votes, not the agreement + protocol that leads up to them. Other than this simple distinction, + Observers function exactly the same as Followers - clients may connect to + them and send read and write requests to them. Observers forward these + requests to the Leader like Followers do, but they then simply wait to + hear the result of the vote. Because of this, we can increase the number + of Observers as much as we like without harming the performance of votes. + </p> +<p> + Observers have other advantages. Because they do not vote, they are not a + critical part of the ZooKeeper ensemble. Therefore they can fail, or be + disconnected from the cluster, without harming the availability of the + ZooKeeper service. The benefit to the user is that Observers may connect + over less reliable network links than Followers. In fact, Observers may be + used to talk to a ZooKeeper server from another data center. Clients of + the Observer will see fast reads, as all reads are served locally, and + writes result in minimal network traffic as the number of messages + required in the absence of the vote protocol is smaller. + </p> +</div> + +<a name="N10022"></a><a name="sc_UsingObservers"></a> +<h2 class="h3">How to use Observers</h2> +<div class="section"> +<p> + +<em> + Note that + until <a href="https://issues.apache.org/jira/browse/ZOOKEEPER-578">ZOOKEEPER-578</a> + is resolved, you must set electionAlg=0 in every server configuration + file. Otherwise an exception will be thrown when you try to start your + ensemble. + </em> + +</p> +<p> + +<em> + The reason: because Observers do not participate in leader elections, + they rely on voting Followers to inform them of changes to the + Leader. Currently, only the basic leader election algorithm starts a + thread that responds to requests from Observers to identify the current + Leader. Work is in progress on other JIRAs to bring this functionality + to all leader election protocols. + </em> + +</p> +<p>Setting up a ZooKeeper ensemble that uses Observers is very simple, + and requires just two changes to your config files. Firstly, in the config + file of every node that is to be an Observer, you must place this line: + </p> +<pre class="code"> + peerType=observer + </pre> +<p> + This line tells ZooKeeper that the server is to be an Observer. Secondly, + in every server config file, you must add :observer to the server + definition line of each Observer. For example: + </p> +<pre class="code"> + server.1:localhost:2181:3181:observer + </pre> +<p> + This tells every other server that server.1 is an Observer, and that they + should not expect it to vote. This is all the configuration you need to do + to add an Observer to your ZooKeeper cluster. Now you can connect to it as + though it were an ordinary Follower. Try it out, by running:</p> +<pre class="code"> + bin/zkCli.sh -server localhost:2181 + </pre> +<p> + where localhost:2181 is the hostname and port number of the Observer as + specified in every config file. You should see a command line prompt + through which you can issue commands like <em>ls</em> to query + the ZooKeeper service. + </p> +</div> + + +<a name="N10051"></a><a name="ch_UseCases"></a> +<h2 class="h3">Example use cases</h2> +<div class="section"> +<p> + Two example use cases for Observers are listed below. In fact, wherever + you wish to scale the numbe of clients of your ZooKeeper ensemble, or + where you wish to insulate the critical part of an ensemble from the load + of dealing with client requests, Observers are a good architectural + choice. + </p> +<ul> + +<li> + +<p> As a datacenter bridge: Forming a ZK ensemble between two + datacenters is a problematic endeavour as the high variance in latency + between the datacenters could lead to false positive failure detection + and partitioning. However if the ensemble runs entirely in one + datacenter, and the second datacenter runs only Observers, partitions + aren't problematic as the ensemble remains connected. Clients of the + Observers may still see and issue proposals.</p> + +</li> + +<li> + +<p>As a link to a message bus: Some companies have expressed an + interest in using ZK as a component of a persistent reliable message + bus. Observers would give a natural integration point for this work: a + plug-in mechanism could be used to attach the stream of proposals an + Observer sees to a publish-subscribe system, again without loading the + core ensemble. + </p> + +</li> + +</ul> +</div> + +<p align="right"> +<font size="-2"></font> +</p> +</div> +<!--+ + |end content + +--> +<div class="clearboth"> </div> +</div> +<div id="footer"> +<!--+ + |start bottomstrip + +--> +<div class="lastmodified"> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<div class="copyright"> + Copyright © + 2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> +</div> +<!--+ + |end bottomstrip + +--> +</div> +</body> +</html> diff --git a/docs/zookeeperObservers.pdf b/docs/zookeeperObservers.pdf new file mode 100644 index 00000000..8b045956 --- /dev/null +++ b/docs/zookeeperObservers.pdf @@ -0,0 +1,276 @@ +%PDF-1.3 +%���� +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 521 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHHa_nsL&A@6Wjp6aR?F09).M->*D+,_G/r'uXL]fGT9IYHuP*FUuWktI0F,4K,4>ZG?4G2:u*&4*+@>H]M=238&Yj_qV_abiLHp5T,q'![lGj[^DFXQ9?l98J"+?UY<mUrBGmo#L#G_Pq$NYs\t4)or"OO(Z#bk<VL!5a5;F+H"g%6RBt.p8\_ePT+RG+YeOaMQTr!XS56,FDL4.7&oN-6R<eoGbHWhgr6VYAE2iR]Q'h@3$A*)V6f)9=6Y.1#?Zo\MVjIEL8m[SLM6IMUZX??e5<K!fC*fTLVfDJ/KP$@e@#;/psaD[Oe3?ITnl+3ZW30rI;:+-2auJJtJe@%loN-eC[>=3q2)uZJN)Sfhikd9)7B2>7G8q1]rGte1gUQlBbd@/V4'=#iKj-(f:<;6KUFWK5)UW<\97s)B9%;JO"lROY7hd8-Fn`2eo\Nq-n()7!PDW=GpW.=t<oOCKte@e?pYR1V6%:Srq_Bo>K^i5gHdfar5Ejd]mB>HuihtIff32&j-~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 529.541 435.26 517.541 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 511.341 216.308 499.341 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 493.141 199.976 481.141 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Length 2546 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GatU59on'f'#"%Ccn,PXUZbos"+.6S44tm8^$u(,ZJG[fU:Q2Da:9LjHiEu0>$k8BfcKTk/jm$7Z6!AdH6hB1m!a>K3e$.9c9o/6SE%n=4S\N=]a)lH.!5DAE?8lXhr">!&'fS7pWpU(IqZ?@ijsEC4StMP9$$&`i%LpUj8@-oo&u9N:TA2s#JYAl,p.Q`U?,&>"4sBt23U$_r/^I=lgF/XmVeJ(5LZ6K_^r^Hc'$#%Y+OL)C$Z%ak0@E0YMG[3_Skts+[R)Qi4DiI"S\VLX?kl=eT)@.HfBe<QU`?7D5YcCH)ENdD=tP(;A-Gk8jB$[/c^iA\-$M7^,08m"br7C4E#q:,<ReL06;b#_R!:Fb#NRSJj^cdVL?Q7F#`Abf"m;V.rs53i`j#_[!F\<Jk8rc(_@s]f=V[n:tqAtII)8ibSQW]RO.7#6^/84K)F4dQp=(JY6rb]%]:"J0K@W\-"go8ZKm@$0RA_mj'QhUVDB,U1Vf1LDDo@`TkmM.?-r>?>lK4R*I:R-p9n`3^M0*RN@<sif<cC1>fi9<8J9mA9p`7[9YeEGNrSS[(*>Q^'4.Yr1%Z`4P;2\Be$`::AIAuhWu?3Kil?8J=Z57fSRpi(e^*lD;V*%;k<[!^J.O?!!`"!#IQ=Y\JlQ+`8@+$N.KLG%K[A)K/5,Z3#-g$n"<'<M7Et-#=V"MA^*(M,CuF-,hr3HIkF'UkT]\<Ma-o/3oTEm-`4E2"%*AtCKCAuXGNt2JR6!\dL@ClN9VO7n9OnJ&#Up8rfB+&k:CO-&jtu@T'duOD_J+_d?\c&!ZRinG<m*;%Q89ho3mXn);1%Q/#ul6@s$auQ8<]I+%+B0LK6>$<B$@3(C&W;],]50nYbU?Rrj&@n3rT)Hkj":'keHul"8\TiUqT*2JVj*JX;E<cg#t(TlS3(_emOaaZNU;3O=iCD#n+g&\S<\3,$][Xp_,d&OL_7NV@c<I8uU=jT_*=`@$*#nBu<r6>"hkTo>8S[..LoKq1QH^Z,;i*[TXDON_:^V=7Q`NlhScL#T;,oOoc)e;NW-`/H8r+(hSW:pEC6d<5*6LN<lmKNYD!:jWnkG2,:eF2Zu7a!:t4ropM&%C@+KX#g5gog*_i3W8W@Vbhj;>n'JsYG1f/\U4"rqJ'7+M-1C6?-47XnGA\1cgKCo#OS\8VED\!T!8=Sj/OT6-DJUBiCJ(tAK7a^R+:,V_@="E"B%*<QEa.QMTZ[WD&OVDCdD/pqi!RlIdTdgCh9@f/41R,Zr!Q`^$OfVEC(-2l^trOmhPgiE)]A5GL>/[F)L,b?elZ&fU#EKbb#hbP>a;>'0(B-AkLSWD=De]VpP[W)h7le0F5kHRN4Lip?gUlDnVDWO5[>eFqM1+%S1ZO*^m.UG]eX#;@bVSRJ<;G/CIh-??BS.^,XlC]iAp3B9*sf\!QB>0H'tJ9rb<_'6^Z@7e$d.L[5N^C8<UrXd7)i,N5.'t`%R96)suQQm#4@oN07>9%,oT;Xf)r=iX)+Y47_Ob)GuM0Lc7_Bq^LCT^09&,b:XM"(q^=kjYPSoDHLL9!t*VT=U%MRPpX(R0UOmP?^Q0dCckRVG`R0D\im&73G$U<[Lcj%J7JFO$o.r'/GA-_YLjN4G;KIVaArCP4eY9hD0r>dq"E,_/#7*V'_XB7Xk7JVp(gGH]>gcsU9I^[[RbNe(bmG%\,A$'Q=Te^Vhdkt.l9TST7eJ8Hb3OH\K<4l`j;sY^DJ[,0rlA?i3TU>.rJ:#1:\$TK!dR6U"FFGK)]Ojs/c]69_cJGI&>?MF/%*gkmOf9P4c6`<t-_7%/6j&G9_]]SJ+WF9(ffUBADmoD\,+[C5B?cOIW&bP.%&fX'&TbF(<e&^b[*p/Cs/g]rQ_Y]t@:5!h8nM*,dT7.r.H)<-]bC)fJG%DV]6loQ#2cghP1I_o0g4@#-O&AG\gS%4,IG.9^L)fj7,N60?0q0O707rKl>/"Fe#=[/<2kVaAtV<`F>9TbHV8T:MOF"E#&fcroT#TsNGZim@A2j[nQYDAK.q`VDJeB`)$USU!Cl05*t-]mp(:gUA9]rcoT`ff2+./dgTK*!gs<.%=3?l%%D^NgK&AT5ldDG5>ud(oJa]A4o[;pA?EP_b$J`V)WYa'<"fj;L[G8>m.#*fpCF<i(ZPN5&I26'`ZX-4+(RteB]m/SK^K4`UG4MI6>4=5^1AP;=YH^A`_n[<QJq:HJ9`dEFtToGSP)i/WJ9\6Hpf8Cd^B:V[n4r\`"7*@9\Di(:#mGA9?><b*>=G-^+7iI2]80dp4-*JC;$Nr]ffQrQ&?OP>t*V-0J72W.iD4@4p4_;cSG94tBc<_V]6?WA-)gLb(m?h^t-P4N=GQ=nr.IF!G`)(8l&;`_HjbkQcHeNDEJsT-f+Xhg:n5hufSWr=%*aAi]:E]pt>C?]Q/DG[V@KYhdXRES(e<\=K!W%+)Y8Dl=);d6S\i87)6SG5"SpA._5f%pf.0cHfZ6:Y0/KZQ>"M60p8,!!IR]NJTeT?CtA\gE0GK?:ONA4F*hc=g7^NG5Q1'[@qR\]0U9t(`mn;m_:)8kh?~> +endstream +endobj +15 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 14 0 R +/Annots 16 0 R +>> +endobj +16 0 obj +[ +17 0 R +] +endobj +17 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 162.348 223.332 253.008 211.332 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (https://issues.apache.org/jira/browse/ZOOKEEPER-578) +/S /URI >> +/H /I +>> +endobj +18 0 obj +<< /Length 2247 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm=gN)%,&:N/3n?'fkNUT]c1OVeLELP)C&o4AngB0i1&o%\K#h"V,oj(I[\1<-9dq1^^"Hg4\fs>=8iE*A-S(Zlo,k.,hn*OZN1Z(jS'Rn(oqtW8[6^G\t-+s'7QcZ;]jMc!$"X-!s/LGp*c%hq,gJ:OAQMRjF>C1i^q^?\<.e_Z0%_K-?rG&A0@3dH$XG>,kat,&Od8P[hRX-9[@JKiXVjHVlg'jV:Q./J!BSABM1qKW^d9Oe>kFU"b)@!4JAJbe$](s[c^V&7s'+j/Z`<QsiRbAHF=k:@ai\igV8#HC9iF1H2W,$GOAqNi:I/Mff%mk*RdBjYS\J!P:?DEde5X>6(\"\0[Q$#.mSfi9Z>]5k:67:1B0W;qQ7TOd(^Di(hY.)CWQTo'\_<tdOohp`nFCuM&B4:F!,(p_0$&dGZ.k=i)h3FI@5blLq+j)AJ\.fKkb#+\j(bZ'4n(\RX/s)61_X[e"JAX'X9ebEP?B>iZ;^eWa9ttujp-).0bYL?nAO2Q7mJe&Oj*B2')h7YIXg]+(,gQ^h#cB5qM,$aX^o':*_H'g&b\)4LLPHGkCpt#6S)4"^U*PR/k(c)2ZSV37ks?ikck.\U?%`cEZi"0;,!+Qe?s,M4`tMno0IQou_"%R-0&iT7/eE`'-us51oNbL(KVfFkWT@Foh4>bZ_i*&6Qu7o<l6'a(_.rLo[lX.UW=2bN_lr6D4H3I3m>ofP'&4m6E4%I0^'TES\;ApH:ZZejAh9"6r'"Q]$"H^Cj-`+<fagSq1OMTUS>(KK7$tnCdjXjk2X"02AHO@q*MB:j46Jl!`8JZWRKlb!6lV!t0a=OqL[$f5VrT-iUP1,\\W4<]mdE_T\WD22DF81#^`a1iI))FP&Cr+YIPfdk@g[T'Y[.tJi6u(HE(iRek]5d6D_%h"2)J;^2907@2+j8g3VEkGa6T1k7c^'-S</Ca$P/=uNRgofI7njA6Q[HN&+\XK/=c-h24+0Y5PM@HA@=o\ofIN'O/2!ST]5L/>XV`7ij:Xl]-8je?-/MYE'Uf!C;)$$6tG/A$FrSalT$:J<dD"93U\\t/?")(8/u3$[!;$/.#hAs?:]^)>/0#=3#*_iYe<"[en\_Y9kmMVY;\lF.N05[Q,KY>4:n?sNB#\E3fQl9jr]O_nm3=#Oe0rF#_KB#p&^cdBKt3+SBSd;n@9_<LA9Cu?+!FoKc&O8bR7krK-@UrBtE1T6=,#Fcgnsid(!Hh]/F8WE72Tm?l_Yn+^fpBZ8;m?O`1Z*3Qn-\q/2>bi[KXCda(nUoZ&D;7&^=f7PpSB:dtT*4%,>;0<c48nP19dY\YgA+s;'*"^qrpJq\KeP2:ZIi%Ea>_-SN=FD]hE8(60(fY1QYAmt=,,jgH^8SoEcCXNJsG?%JC!X)7$ZB.L<!ZR.'&"A:5pUoBiJ2W@&XUHl=99t/[dd_6e]fJT-(q7buYdHp04iT<(DG+J3VtJ/r"hjTA.TWj0&cjI-<'h<M6c(l$M'OKIP$l&QT4+*l;BA'k-VXE>nWaSFFieF_DKUs71Tt?"Cg2Ia[&DOY7snP3iKDsXU>?6.mg--!%%E:mRNso/b%iCVM\D>%>Jia=KEfaeO>,Xg$!Q<so=ulu2/`/.k%sjsMqo]NnBt)'$CWI!9/(S*/\&*aK$p,,%#V>F('<q*7gbT__&P$qICNC#f,tWf?*ahi7O%X1'U,n4+0.lVh%&Njm](@\/"q-(:lQO$dN;Fu5]XMM,5lV106!??oS>usiuOoob(11j!rX.R.=-ZB6&qaKN0"N+@7dF)LUoZ:PfR.60;T(inVH%-F516U')-UWR)?0%<L1hT/3+%JJ-%"XppP?6hh!+ddF[Sd4!D5;DkR-oI[i6[aF%DJ?Xh]'/<[b0KBm_0-9#1&>b%lT%RB6pgUhV`d^.)^LB??PiFlmU&R)9=S6G-.4W]9M4XPiJ*m;!J[6TGS@%sVs8PVh?_MDJCeWq]@P,K)K?h6H%\h&^C&SQ`"T&)#7N5Xm+AKngeh4!X.`XRH5QIjaqCXM",9=r2M-cY)]Xg)C1)8*,oh&I0/\e9Bb^!lR9oPt_>Ka0H_:k8;20.Hdk@@_"E8A'KLFK"3nn`=C79DS_HB495Smu'R/*]Q*L`[oI44b)nuJ(uYC-U'BQm@@T"-g7a95ltMW-=.b2nb_g)D)T!512l?eXS"D<HkHj_d.d-?1tr,A+2$A/W+^Qk(A5,>b4a`JRRWVa;&Gb1(Ua47gY8I-k^Y2:!8P?L*i\l~> +endstream +endobj +19 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 18 0 R +>> +endobj +20 0 obj +<< /Length 456 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gar&:bAMqd&A7ljHq^BVZ&KiECoX@1HYBI#lJl7b#ET4g2DiS6f0COl&Mi'C4?MoR3X60Up]1M`r]Gc@,7K8;rlt4?0,&mRZl0_<%BOWTg2KT]s,c[MSk.Ku";75b9J2FB%-@fi\`g-ZN)P_JJP/$5>GsI0087mK=52)A>"<d`'CeEB%7O4-C?^JiD/ESBLjsHY528W7;UpohZ0j+uoV'YL[B&%;d]B0_>@ThmLAB<kQ7P>gZDDH04qruGkGtWmKrMQ*qf?1#/l<fQPU&PJAi&nnQtNP5H%.8?-3XjU\F['VSC-LK(kO()F67!eU3mA]IgLj7lZ01qU^>6^9''4L<L*EQ,)cGii,c+NV3:@.?HuCp3)#atJBXeV9m<mc=\XXnd?qlhjWkt+E>hY;HTQkl:2OM=DYF$+fr2536b?9FD-@Y$RO3PVgI54?hX+//mLKhZXip~> +endstream +endobj +21 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 20 0 R +>> +endobj +23 0 obj +<< + /Title (\376\377\0\61\0\40\0\117\0\142\0\163\0\145\0\162\0\166\0\145\0\162\0\163\0\72\0\40\0\123\0\143\0\141\0\154\0\151\0\156\0\147\0\40\0\132\0\157\0\157\0\113\0\145\0\145\0\160\0\145\0\162\0\40\0\127\0\151\0\164\0\150\0\157\0\165\0\164\0\40\0\110\0\165\0\162\0\164\0\151\0\156\0\147\0\40\0\127\0\162\0\151\0\164\0\145\0\40\0\120\0\145\0\162\0\146\0\157\0\162\0\155\0\141\0\156\0\143\0\145) + /Parent 22 0 R + /Next 24 0 R + /A 9 0 R +>> endobj +24 0 obj +<< + /Title (\376\377\0\62\0\40\0\110\0\157\0\167\0\40\0\164\0\157\0\40\0\165\0\163\0\145\0\40\0\117\0\142\0\163\0\145\0\162\0\166\0\145\0\162\0\163) + /Parent 22 0 R + /Prev 23 0 R + /Next 25 0 R + /A 11 0 R +>> endobj +25 0 obj +<< + /Title (\376\377\0\63\0\40\0\105\0\170\0\141\0\155\0\160\0\154\0\145\0\40\0\165\0\163\0\145\0\40\0\143\0\141\0\163\0\145\0\163) + /Parent 22 0 R + /Prev 24 0 R + /A 13 0 R +>> endobj +26 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +27 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +28 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F6 +/BaseFont /Times-Italic +/Encoding /WinAnsiEncoding >> +endobj +29 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +30 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +31 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +32 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 4 +/Kids [6 0 R 15 0 R 19 0 R 21 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 22 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F3 26 0 R /F5 27 0 R /F1 29 0 R /F6 28 0 R /F9 30 0 R /F2 31 0 R /F7 32 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 252.666 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [19 0 R /XYZ 85.0 336.26 null] +>> +endobj +22 0 obj +<< + /First 23 0 R + /Last 25 0 R +>> endobj +xref +0 33 +0000000000 65535 f +0000008992 00000 n +0000009071 00000 n +0000009163 00000 n +0000000015 00000 n +0000000071 00000 n +0000000683 00000 n +0000000803 00000 n +0000000842 00000 n +0000009308 00000 n +0000000976 00000 n +0000009371 00000 n +0000001113 00000 n +0000009437 00000 n +0000001250 00000 n +0000003889 00000 n +0000004012 00000 n +0000004039 00000 n +0000004244 00000 n +0000006584 00000 n +0000006692 00000 n +0000007240 00000 n +0000009502 00000 n +0000007348 00000 n +0000007804 00000 n +0000008027 00000 n +0000008219 00000 n +0000008332 00000 n +0000008442 00000 n +0000008553 00000 n +0000008661 00000 n +0000008767 00000 n +0000008883 00000 n +trailer +<< +/Size 33 +/Root 2 0 R +/Info 4 0 R +>> +startxref +9553 +%%EOF diff --git a/docs/zookeeperOver.html b/docs/zookeeperOver.html index eaec7f1b..7ceef77e 100644 --- a/docs/zookeeperOver.html +++ b/docs/zookeeperOver.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperProgrammers.html b/docs/zookeeperProgrammers.html index 8018a348..21cee903 100644 --- a/docs/zookeeperProgrammers.html +++ b/docs/zookeeperProgrammers.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperQuotas.html b/docs/zookeeperQuotas.html index 1165b799..e848fb01 100644 --- a/docs/zookeeperQuotas.html +++ b/docs/zookeeperQuotas.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperStarted.html b/docs/zookeeperStarted.html index 92c8906a..f1666401 100644 --- a/docs/zookeeperStarted.html +++ b/docs/zookeeperStarted.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/docs/zookeeperTutorial.html b/docs/zookeeperTutorial.html index 0fb9172f..fceba36e 100644 --- a/docs/zookeeperTutorial.html +++ b/docs/zookeeperTutorial.html @@ -161,6 +161,9 @@ document.write("Last Published: " + document.lastModified); <div class="menuitem"> <a href="zookeeperJMX.html">JMX</a> </div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> </div> <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> <div id="menu_1.5" class="menuitemgroup"> diff --git a/src/docs/src/documentation/content/xdocs/index.xml b/src/docs/src/documentation/content/xdocs/index.xml index 76b51524..266db171 100644 --- a/src/docs/src/documentation/content/xdocs/index.xml +++ b/src/docs/src/documentation/content/xdocs/index.xml @@ -64,6 +64,7 @@ <li><a href="zookeeperQuotas.html">Quota Guide</a> - a guide for system administrators on Quotas in ZooKeeper. </li> <li><a href="zookeeperJMX.html">JMX</a> - how to enable JMX in ZooKeeper</li> <li><a href="zookeeperHierarchicalQuorums.html">Hierarchical quorums</a></li> + <li><a href="zookeeperObservers.html">Observers</a> - non-voting ensemble members that easily improve ZooKeeper's scalability</li> </ul> </li> diff --git a/src/docs/src/documentation/content/xdocs/site.xml b/src/docs/src/documentation/content/xdocs/site.xml index 4ac950d3..34ac4b3b 100644 --- a/src/docs/src/documentation/content/xdocs/site.xml +++ b/src/docs/src/documentation/content/xdocs/site.xml @@ -57,6 +57,7 @@ See http://forrest.apache.org/docs/linking.html for more info. <admin label="Administrator's Guide" href="zookeeperAdmin.html" /> <quota label="Quota Guide" href="zookeeperQuotas.html" /> <jmx label="JMX" href="zookeeperJMX.html" /> + <observers label="Observers Guide" href="zookeeperObservers.html" /> </docs> <docs label="Contributor"> diff --git a/src/docs/src/documentation/content/xdocs/zookeeperObservers.xml b/src/docs/src/documentation/content/xdocs/zookeeperObservers.xml new file mode 100644 index 00000000..3db2cc8a --- /dev/null +++ b/src/docs/src/documentation/content/xdocs/zookeeperObservers.xml @@ -0,0 +1,174 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Copyright 2002-2004 The Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!DOCTYPE article PUBLIC "-//OASIS//DTD Simplified DocBook XML V1.0//EN" +"http://www.oasis-open.org/docbook/xml/simple/1.0/sdocbook.dtd"> +<article id="bk_GettStartedGuide"> + <title>ZooKeeper Observers</title> + + <articleinfo> + <legalnotice> + <para>Licensed under the Apache License, Version 2.0 (the "License"); you + may not use this file except in compliance with the License. You may + obtain a copy of the License + at <ulink url="http://www.apache.org/licenses/LICENSE-2.0">http://www.apache.org/licenses/LICENSE-2.0</ulink>.</para> + + <para>Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License.</para> + </legalnotice> + + <abstract> + <para>This guide contains information about using non-voting servers, or + observers in your ZooKeeper ensembles.</para> + </abstract> + </articleinfo> + + <section id="ch_Introduction"> + <title>Observers: Scaling ZooKeeper Without Hurting Write Performance + </title> + <para> + <emphasis> + Please note: the Observers feature currently only works with the basic + leader election protocol, not fast leader election or authenticated fast + leader election. This will be remedied when a bug in the leader election + protocol code is fixed in the near future. An exception will be thrown + if you try to start a cluster containing Observers without + electionAlg=0. See below for more details. + </emphasis> + </para> + <para> + Although ZooKeeper performs very well by having clients connect directly + to voting members of the ensemble, this architecture makes it hard to + scale out to huge numbers of clients. The problem is that as we add more + voting members, the write performance drops. This is due to the fact that + a write operation requires the agreement of (in general) at least half the + nodes in an ensemble and therefore the cost of a vote can increase + significantly as more voters are added. + </para> + <para> + We have introduced a new type of ZooKeeper node called + an <emphasis>Observer</emphasis> which helps address this problem and + further improves ZooKeeper's scalability. Observers are non-voting members + of an ensemble which only hear the results of votes, not the agreement + protocol that leads up to them. Other than this simple distinction, + Observers function exactly the same as Followers - clients may connect to + them and send read and write requests to them. Observers forward these + requests to the Leader like Followers do, but they then simply wait to + hear the result of the vote. Because of this, we can increase the number + of Observers as much as we like without harming the performance of votes. + </para> + <para> + Observers have other advantages. Because they do not vote, they are not a + critical part of the ZooKeeper ensemble. Therefore they can fail, or be + disconnected from the cluster, without harming the availability of the + ZooKeeper service. The benefit to the user is that Observers may connect + over less reliable network links than Followers. In fact, Observers may be + used to talk to a ZooKeeper server from another data center. Clients of + the Observer will see fast reads, as all reads are served locally, and + writes result in minimal network traffic as the number of messages + required in the absence of the vote protocol is smaller. + </para> + </section> + <section id="sc_UsingObservers"> + <title>How to use Observers</title> + <para> + <emphasis> + Note that + until <ulink url="https://issues.apache.org/jira/browse/ZOOKEEPER-578">ZOOKEEPER-578</ulink> + is resolved, you must set electionAlg=0 in every server configuration + file. Otherwise an exception will be thrown when you try to start your + ensemble. + </emphasis> + </para> + <para> + <emphasis> + The reason: because Observers do not participate in leader elections, + they rely on voting Followers to inform them of changes to the + Leader. Currently, only the basic leader election algorithm starts a + thread that responds to requests from Observers to identify the current + Leader. Work is in progress on other JIRAs to bring this functionality + to all leader election protocols. + </emphasis> + </para> + <para>Setting up a ZooKeeper ensemble that uses Observers is very simple, + and requires just two changes to your config files. Firstly, in the config + file of every node that is to be an Observer, you must place this line: + </para> + <programlisting> + peerType=observer + </programlisting> + + <para> + This line tells ZooKeeper that the server is to be an Observer. Secondly, + in every server config file, you must add :observer to the server + definition line of each Observer. For example: + </para> + + <programlisting> + server.1:localhost:2181:3181:observer + </programlisting> + + <para> + This tells every other server that server.1 is an Observer, and that they + should not expect it to vote. This is all the configuration you need to do + to add an Observer to your ZooKeeper cluster. Now you can connect to it as + though it were an ordinary Follower. Try it out, by running:</para> + <programlisting> + bin/zkCli.sh -server localhost:2181 + </programlisting> + <para> + where localhost:2181 is the hostname and port number of the Observer as + specified in every config file. You should see a command line prompt + through which you can issue commands like <emphasis>ls</emphasis> to query + the ZooKeeper service. + </para> + </section> + + <section id="ch_UseCases"> + <title>Example use cases</title> + <para> + Two example use cases for Observers are listed below. In fact, wherever + you wish to scale the numbe of clients of your ZooKeeper ensemble, or + where you wish to insulate the critical part of an ensemble from the load + of dealing with client requests, Observers are a good architectural + choice. + </para> + <itemizedlist> + <listitem> + <para> As a datacenter bridge: Forming a ZK ensemble between two + datacenters is a problematic endeavour as the high variance in latency + between the datacenters could lead to false positive failure detection + and partitioning. However if the ensemble runs entirely in one + datacenter, and the second datacenter runs only Observers, partitions + aren't problematic as the ensemble remains connected. Clients of the + Observers may still see and issue proposals.</para> + </listitem> + <listitem> + <para>As a link to a message bus: Some companies have expressed an + interest in using ZK as a component of a persistent reliable message + bus. Observers would give a natural integration point for this work: a + plug-in mechanism could be used to attach the stream of proposals an + Observer sees to a publish-subscribe system, again without loading the + core ensemble. + </para> + </listitem> + </itemizedlist> + </section> +</article> diff --git a/src/java/main/org/apache/zookeeper/server/ObserverBean.java b/src/java/main/org/apache/zookeeper/server/ObserverBean.java new file mode 100644 index 00000000..4e0e82a8 --- /dev/null +++ b/src/java/main/org/apache/zookeeper/server/ObserverBean.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server; + +import org.apache.zookeeper.server.quorum.Observer; +import org.apache.zookeeper.server.quorum.ObserverMXBean; + +/** + * ObserverBean + * + */ +public class ObserverBean extends ZooKeeperServerBean implements ObserverMXBean{ + + private Observer observer; + + public ObserverBean(Observer observer, ZooKeeperServer zks) { + super(zks); + this.observer = observer; + } + + public int getPendingRevalidationCount() { + return this.observer.getPendingRevalidationsCount(); + } + + public String getQuorumAddress() { + return observer.getSocket().toString(); + } + +} diff --git a/src/java/main/org/apache/zookeeper/server/quorum/AuthFastLeaderElection.java b/src/java/main/org/apache/zookeeper/server/quorum/AuthFastLeaderElection.java index 52291211..80668711 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/AuthFastLeaderElection.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/AuthFastLeaderElection.java @@ -712,7 +712,7 @@ public class AuthFastLeaderElection implements Election { t.start(); } - for (QuorumServer server : self.quorumPeers.values()) { + for (QuorumServer server : self.getVotingView().values()) { InetSocketAddress saddr = new InetSocketAddress(server.addr .getAddress(), port); addrChallengeMap.put(saddr, new HashMap<Long, Long>()); @@ -744,7 +744,7 @@ public class AuthFastLeaderElection implements Election { private void starter(QuorumPeer self) { this.self = self; - port = self.quorumPeers.get(self.getId()).electionAddr.getPort(); + port = self.getVotingView().get(self.getId()).electionAddr.getPort(); proposedLeader = -1; proposedZxid = -1; @@ -755,10 +755,10 @@ public class AuthFastLeaderElection implements Election { e1.printStackTrace(); throw new RuntimeException(); } - sendqueue = new LinkedBlockingQueue<ToSend>(2 * self.quorumPeers.size()); - recvqueue = new LinkedBlockingQueue<Notification>(2 * self.quorumPeers + sendqueue = new LinkedBlockingQueue<ToSend>(2 * self.getVotingView().size()); + recvqueue = new LinkedBlockingQueue<Notification>(2 * self.getVotingView() .size()); - new Messenger(self.quorumPeers.size() * 2, mySocket); + new Messenger(self.getVotingView().size() * 2, mySocket); } private void leaveInstance() { @@ -766,12 +766,12 @@ public class AuthFastLeaderElection implements Election { } private void sendNotifications() { - for (QuorumServer server : self.quorumPeers.values()) { + for (QuorumServer server : self.getView().values()) { ToSend notmsg = new ToSend(ToSend.mType.notification, AuthFastLeaderElection.sequencer++, proposedLeader, proposedZxid, logicalclock, QuorumPeer.ServerState.LOOKING, - self.quorumPeers.get(server.id).electionAddr); + self.getView().get(server.id).electionAddr); sendqueue.offer(notmsg); } @@ -801,7 +801,7 @@ public class AuthFastLeaderElection implements Election { count++; } - if (count > (self.quorumPeers.size() / 2)) + if (count > (self.getVotingView().size() / 2)) return true; else return false; @@ -875,7 +875,7 @@ public class AuthFastLeaderElection implements Election { recvset.put(n.addr, new Vote(n.leader, n.zxid)); // If have received from all nodes, then terminate - if (self.quorumPeers.size() == recvset.size()) { + if (self.getVotingView().size() == recvset.size()) { self.setPeerState((proposedLeader == self.getId()) ? ServerState.LEADING: ServerState.FOLLOWING); // if (self.state == ServerState.FOLLOWING) { diff --git a/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java b/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java index 8bc7b8c1..fc9ad410 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java @@ -440,7 +440,7 @@ public class FastLeaderElection implements Election { * Send notifications to all peers upon a change in our vote */ private void sendNotifications() { - for (QuorumServer server : self.quorumPeers.values()) { + for (QuorumServer server : self.getVotingView().values()) { long sid = server.id; ToSend notmsg = new ToSend(ToSend.mType.notification, diff --git a/src/java/main/org/apache/zookeeper/server/quorum/Leader.java b/src/java/main/org/apache/zookeeper/server/quorum/Leader.java index e23322ac..84ba917a 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/Leader.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/Leader.java @@ -42,6 +42,7 @@ import org.apache.log4j.Logger; import org.apache.zookeeper.server.FinalRequestProcessor; import org.apache.zookeeper.server.Request; import org.apache.zookeeper.server.RequestProcessor; +import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; /** * This class has the control logic for the Leader. @@ -79,6 +80,8 @@ public class Leader { // list of followers that are ready to follow (i.e synced with the leader) public HashSet<LearnerHandler> forwardingFollowers = new HashSet<LearnerHandler>(); + + protected HashSet<LearnerHandler> observingLearners = new HashSet<LearnerHandler>(); //Pending sync requests public HashMap<Long,List<LearnerSyncRequest>> pendingSyncs = new HashMap<Long,List<LearnerSyncRequest>>(); @@ -146,6 +149,11 @@ public class Leader { */ final static int SNAP = 15; + /** + * This tells the leader that the connecting peer is actually an observer + */ + final static int OBSERVERINFO = 16; + /** * This message type is sent by the leader to indicate it's zxid and if * needed, its database. @@ -202,6 +210,11 @@ public class Leader { * between the leader and the follower. */ final static int SYNC = 7; + + /** + * This message type informs observers of a committed proposal. + */ + final static int INFORM = 8; private ConcurrentMap<Long, Proposal> outstandingProposals = new ConcurrentHashMap<Long, Proposal>(); @@ -267,9 +280,10 @@ public class Leader { synchronized(this){ lastProposed = zk.getZxid(); } - + newLeaderProposal.packet = new QuorumPacket(NEWLEADER, zk.getZxid(), - null, null); + null, null); + if ((newLeaderProposal.packet.getZxid() & 0xffffffffL) != 0) { LOG.info("NEWLEADER proposal has Zxid of " @@ -346,8 +360,9 @@ public class Leader { if (!tickSkip && !self.getQuorumVerifier().containsQuorum(syncedSet)) { //if (!tickSkip && syncedCount < self.quorumPeers.size() / 2) { // Lost quorum, shutdown + // TODO: message is wrong unless majority quorums used shutdown("Only " + syncedCount + " followers, need " - + (self.quorumPeers.size() / 2)); + + (self.getVotingView().size() / 2)); // make sure the order is the same! // the leader goes to looking return; @@ -362,7 +377,7 @@ public class Leader { boolean isShutdown; /** - * Close down all the FollowerHandlers + * Close down all the LearnerHandlers */ void shutdown(String reason) { if (isShutdown) { @@ -465,6 +480,7 @@ public class Leader { LOG.warn("Going to commmit null: " + p); } commit(zxid); + inform(p); zk.commitProcessor.commit(p.request); if(pendingSyncs.containsKey(zxid)){ for(LearnerSyncRequest r: pendingSyncs.remove(zxid)) { @@ -545,6 +561,17 @@ public class Leader { } } + /** + * send a packet to all observers + */ + void sendObserverPacket(QuorumPacket qp) { + synchronized(observingLearners) { + for (LearnerHandler f : observingLearners) { + f.queuePacket(qp); + } + } + } + long lastCommitted = -1; /** @@ -559,6 +586,17 @@ public class Leader { QuorumPacket qp = new QuorumPacket(Leader.COMMIT, zxid, null, null); sendPacket(qp); } + + /** + * Create an inform packet and send it to all observers. + * @param zxid + * @param proposal + */ + public void inform(Proposal proposal) { + QuorumPacket qp = new QuorumPacket(Leader.INFORM, proposal.request.zxid, + proposal.packet.getData(), null); + sendObserverPacket(qp); + } long lastProposed; @@ -569,7 +607,6 @@ public class Leader { * @return the proposal that is queued to send to all the members */ public Proposal propose(Request request) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryOutputArchive boa = BinaryOutputArchive.getArchive(baos); try { @@ -662,10 +699,16 @@ public class Leader { handler.queuePacket(outstandingProposals.get(zxid).packet); } } - synchronized (forwardingFollowers) { - forwardingFollowers.add(handler); + if (handler.getLearnerType() == LearnerType.PARTICIPANT) { + synchronized (forwardingFollowers) { + forwardingFollowers.add(handler); + } + } else { + synchronized (observingLearners) { + observingLearners.add(handler); + } } - + return lastProposed; } diff --git a/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java b/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java index 0f8c539d..d5eda76d 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java @@ -35,6 +35,7 @@ import org.apache.log4j.Logger; import org.apache.zookeeper.jmx.MBeanRegistry; import org.apache.zookeeper.server.quorum.Vote; +import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; @@ -142,7 +143,7 @@ public class LeaderElection implements Election { DatagramPacket responsePacket = new DatagramPacket(responseBytes, responseBytes.length); HashMap<InetSocketAddress, Vote> votes = - new HashMap<InetSocketAddress, Vote>(self.quorumPeers.size()); + new HashMap<InetSocketAddress, Vote>(self.getVotingView().size()); int xid = epochGen.nextInt(); while (self.running) { votes.clear(); @@ -150,7 +151,7 @@ public class LeaderElection implements Election { requestBuffer.putInt(xid); requestPacket.setLength(4); HashSet<Long> heardFrom = new HashSet<Long>(); - for (QuorumServer server : self.quorumPeers.values()) { + for (QuorumServer server : self.getVotingView().values()) { LOG.info("Server address: " + server.addr); try { requestPacket.setSocketAddress(server.addr); @@ -200,16 +201,38 @@ public class LeaderElection implements Election { ElectionResult result = countVotes(votes, heardFrom); if (result.winner.id >= 0) { self.setCurrentVote(result.vote); - if (result.winningCount > (self.quorumPeers.size() / 2)) { + // To do: this doesn't use a quorum verifier + if (result.winningCount > (self.getVotingView().size() / 2)) { self.setCurrentVote(result.winner); s.close(); Vote current = self.getCurrentVote(); - self.setPeerState((current.id == self.getId()) - ? ServerState.LEADING: ServerState.FOLLOWING); - if (self.getPeerState() == ServerState.FOLLOWING) { - Thread.sleep(100); + LOG.info("Found leader: my type is: " + self.getPeerType()); + /** + * We want to make sure we implement the state machine + * correctly. If we are a PARTICIPANT, once a leader + * is elected we can move either to LEADING or + * FOLLOWING. However if we are an OBSERVER, it is an + * error to be elected as a Leader. + */ + if (self.getPeerType() == LearnerType.OBSERVER) { + if (current.id == self.getId()) { + // This should never happen! + LOG.error("OBSERVER elected as leader!"); + Thread.sleep(100); + } + else { + self.setPeerState(ServerState.OBSERVING); + Thread.sleep(100); + return current; + } + } else { + self.setPeerState((current.id == self.getId()) + ? ServerState.LEADING: ServerState.FOLLOWING); + if (self.getPeerState() == ServerState.FOLLOWING) { + Thread.sleep(100); + } + return current; } - return current; } } Thread.sleep(1000); diff --git a/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java b/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java index 97cb2aaf..e3baf4b6 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java @@ -38,6 +38,7 @@ import org.apache.zookeeper.ZooDefs.OpCode; import org.apache.zookeeper.server.Request; import org.apache.zookeeper.server.ZooTrace; import org.apache.zookeeper.server.quorum.Leader.Proposal; +import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; import org.apache.zookeeper.server.util.SerializeUtils; import org.apache.zookeeper.txn.TxnHeader; @@ -101,7 +102,12 @@ public class LearnerHandler extends Thread { * If this packet is queued, the sender thread will exit */ final QuorumPacket proposalOfDeath = new QuorumPacket(); - + + private LearnerType learnerType = LearnerType.PARTICIPANT; + public LearnerType getLearnerType() { + return learnerType; + } + /** * This method will use the thread to send packets added to the * queuedPackets list @@ -217,9 +223,9 @@ public class LearnerHandler extends Thread { QuorumPacket qp = new QuorumPacket(); ia.readRecord(qp, "packet"); - if(qp.getType() != Leader.FOLLOWERINFO) { + if(qp.getType() != Leader.FOLLOWERINFO && qp.getType() != Leader.OBSERVERINFO){ LOG.error("First packet " + qp.toString() - + " is not FOLLOWERINFO!"); + + " is not FOLLOWERINFO or OBSERVERINFO!"); return; } if (qp.getData() != null) { @@ -231,9 +237,11 @@ public class LearnerHandler extends Thread { LOG.info("Follower sid: " + this.sid + " : info : " + leader.self.quorumPeers.get(this.sid)); + + if (qp.getType() == Leader.OBSERVERINFO) { + learnerType = LearnerType.OBSERVER; + } - /* this is the last zxid from the follower but the leader might have to - restart the follower from a different zxid depending on truncate and diff. */ long peerLastZxid = qp.getZxid(); /* the default to send to the follower */ int packetToSend = Leader.SNAP; @@ -356,6 +364,9 @@ public class LearnerHandler extends Thread { switch (qp.getType()) { case Leader.ACK: + if (this.learnerType == LearnerType.OBSERVER) { + LOG.error("Received ACK from Observer " + this.sid); + } leader.processAck(this.sid, qp.getZxid(), sock.getLocalSocketAddress()); break; case Leader.PING: diff --git a/src/java/main/org/apache/zookeeper/server/quorum/Observer.java b/src/java/main/org/apache/zookeeper/server/quorum/Observer.java new file mode 100644 index 00000000..48bc02a1 --- /dev/null +++ b/src/java/main/org/apache/zookeeper/server/quorum/Observer.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.InetSocketAddress; + +import org.apache.jute.BinaryInputArchive; +import org.apache.jute.Record; +import org.apache.zookeeper.server.ObserverBean; +import org.apache.zookeeper.server.Request; +import org.apache.zookeeper.server.util.SerializeUtils; +import org.apache.zookeeper.txn.TxnHeader; + +/** + * Observers are peers that do not take part in the atomic broadcast protocol. + * Instead, they are informed of successful proposals by the Leader. Observers + * therefore naturally act as a relay point for publishing the proposal stream + * and can relieve Followers of some of the connection load. Observers may + * submit proposals, but do not vote in their acceptance. + * + * See ZOOKEEPER-368 for a discussion of this feature. + */ +public class Observer extends Learner{ + + Observer(QuorumPeer self,ObserverZooKeeperServer observerZooKeeperServer) { + this.self = self; + this.zk=observerZooKeeperServer; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("Observer ").append(sock); + sb.append(" pendingRevalidationCount:") + .append(pendingRevalidations.size()); + return sb.toString(); + } + + /** + * the main method called by the observer to observe the leader + * + * @throws InterruptedException + */ + void observeLeader() throws InterruptedException { + zk.registerJMX(new ObserverBean(this, zk), self.jmxLocalPeerBean); + + try { + InetSocketAddress addr = findLeader(); + LOG.info("Observing " + addr); + try { + connectToLeader(addr); + long newLeaderZxid = registerWithLeader(Leader.OBSERVERINFO); + + syncWithLeader(newLeaderZxid); + QuorumPacket qp = new QuorumPacket(); + while (self.running) { + readPacket(qp); + processPacket(qp); + } + } catch (IOException e) { + LOG.warn("Exception when observing the leader", e); + try { + sock.close(); + } catch (IOException e1) { + e1.printStackTrace(); + } + + synchronized (pendingRevalidations) { + // clear pending revalidations + pendingRevalidations.clear(); + pendingRevalidations.notifyAll(); + } + } + } finally { + zk.unregisterJMX(this); + } + } + + /** + * Controls the response of an observer to the receipt of a quorumpacket + * @param qp + * @throws IOException + */ + protected void processPacket(QuorumPacket qp) throws IOException{ + switch (qp.getType()) { + case Leader.PING: + ping(qp); + break; + case Leader.PROPOSAL: + LOG.warn("Ignoring proposal"); + break; + case Leader.COMMIT: + LOG.warn("Ignoring commit"); + break; + case Leader.UPTODATE: + zk.takeSnapshot(); + self.cnxnFactory.setZooKeeperServer(zk); + break; + case Leader.REVALIDATE: + revalidate(qp); + break; + case Leader.SYNC: + ((ObserverZooKeeperServer)zk).sync(); + break; + case Leader.INFORM: + TxnHeader hdr = new TxnHeader(); + BinaryInputArchive ia = BinaryInputArchive + .getArchive(new ByteArrayInputStream(qp.getData())); + Record txn = SerializeUtils.deserializeTxn(ia, hdr); + Request request = new Request (null, hdr.getClientId(), + hdr.getCxid(), + hdr.getType(), null, null); + request.txn = txn; + request.hdr = hdr; + ObserverZooKeeperServer obs = (ObserverZooKeeperServer)zk; + obs.commitRequest(request); + break; + } + } + + /** + * Shutdown the Observer. + */ + public void shutdown() { + LOG.info("shutdown called", new Exception("shutdown Observer")); + super.shutdown(); + } +} + diff --git a/src/java/main/org/apache/zookeeper/server/quorum/ObserverMXBean.java b/src/java/main/org/apache/zookeeper/server/quorum/ObserverMXBean.java new file mode 100644 index 00000000..2c1799ab --- /dev/null +++ b/src/java/main/org/apache/zookeeper/server/quorum/ObserverMXBean.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import org.apache.zookeeper.server.ZooKeeperServerMXBean; + +/** + * Observer MX Bean interface, implemented by ObserverBean + * + */ +public interface ObserverMXBean extends ZooKeeperServerMXBean { + /** + * @return count of pending revalidations + */ + public int getPendingRevalidationCount(); + + /** + * @return socket address + */ + public String getQuorumAddress(); +} diff --git a/src/java/main/org/apache/zookeeper/server/quorum/ObserverRequestProcessor.java b/src/java/main/org/apache/zookeeper/server/quorum/ObserverRequestProcessor.java new file mode 100644 index 00000000..eac747fb --- /dev/null +++ b/src/java/main/org/apache/zookeeper/server/quorum/ObserverRequestProcessor.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.log4j.Logger; + +import org.apache.zookeeper.ZooDefs.OpCode; +import org.apache.zookeeper.server.RequestProcessor; +import org.apache.zookeeper.server.Request; +import org.apache.zookeeper.server.ZooTrace; + +/** + * This RequestProcessor forwards any requests that modify the state of the + * system to the Leader. + */ +public class ObserverRequestProcessor extends Thread implements + RequestProcessor { + private static final Logger LOG = Logger.getLogger(ObserverRequestProcessor.class); + + ObserverZooKeeperServer zks; + + RequestProcessor nextProcessor; + + // We keep a queue of requests. As requests get submitted they are + // stored here. The queue is drained in the run() method. + LinkedBlockingQueue<Request> queuedRequests = new LinkedBlockingQueue<Request>(); + + boolean finished = false; + + /** + * Constructor - takes an ObserverZooKeeperServer to associate with + * and the next processor to pass requests to after we're finished. + * @param zks + * @param nextProcessor + */ + public ObserverRequestProcessor(ObserverZooKeeperServer zks, + RequestProcessor nextProcessor) { + super("ObserverRequestProcessor:" + zks.getServerId()); + this.zks = zks; + this.nextProcessor = nextProcessor; + } + + @Override + public void run() { + try { + while (!finished) { + Request request = queuedRequests.take(); + if (LOG.isTraceEnabled()) { + ZooTrace.logRequest(LOG, ZooTrace.CLIENT_REQUEST_TRACE_MASK, + 'F', request, ""); + } + if (request == Request.requestOfDeath) { + break; + } + // We want to queue the request to be processed before we submit + // the request to the leader so that we are ready to receive + // the response + nextProcessor.processRequest(request); + + // We now ship the request to the leader. As with all + // other quorum operations, sync also follows this code + // path, but different from others, we need to keep track + // of the sync operations this Observer has pending, so we + // add it to pendingSyncs. + switch (request.type) { + case OpCode.sync: + zks.pendingSyncs.add(request); + zks.getObserver().request(request); + break; + case OpCode.create: + case OpCode.delete: + case OpCode.setData: + case OpCode.setACL: + case OpCode.createSession: + case OpCode.closeSession: + zks.getObserver().request(request); + break; + } + } + } catch (Exception e) { + LOG.error("Unexpected exception causing exit", e); + } + LOG.info("ObserverRequestProcessor exited loop!"); + } + + /** + * Simply queue the request, which will be processed in FIFO order. + */ + public void processRequest(Request request) { + if (!finished) { + queuedRequests.add(request); + } + } + + /** + * Shutdown the processor. + */ + public void shutdown() { + finished = true; + queuedRequests.clear(); + queuedRequests.add(Request.requestOfDeath); + nextProcessor.shutdown(); + } + +} diff --git a/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java b/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java new file mode 100644 index 00000000..f336c6e3 --- /dev/null +++ b/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zookeeper.server.quorum; + +import java.io.IOException; +import java.util.concurrent.ConcurrentLinkedQueue; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.server.FinalRequestProcessor; +import org.apache.zookeeper.server.Request; +import org.apache.zookeeper.server.RequestProcessor; +import org.apache.zookeeper.server.SyncRequestProcessor; +import org.apache.zookeeper.server.persistence.FileTxnSnapLog; + +/** + * A ZooKeeperServer for the Observer node type. Not much is different, but + * we anticipate specializing the request processors in the future. + * + */ +public class ObserverZooKeeperServer extends LearnerZooKeeperServer { + private static final Logger LOG = Logger.getLogger(ObserverZooKeeperServer.class); + + /* + * Request processors + */ + private CommitProcessor commitProcessor; + private SyncRequestProcessor syncProcessor; + + /* + * Pending sync requests + */ + ConcurrentLinkedQueue<Request> pendingSyncs = + new ConcurrentLinkedQueue<Request>(); + + ObserverZooKeeperServer(FileTxnSnapLog logFactory, QuorumPeer self, + DataTreeBuilder treeBuilder) throws IOException { + super(logFactory, self.tickTime, treeBuilder); + this.self = self; + } + + public Observer getObserver() { + return self.observer; + } + + @Override + public Learner getLearner() { + return self.observer; + } + + /** + * Unlike a Follower, which sees a full request only during the PROPOSAL + * phase, Observers get all the data required with the INFORM packet. + * This method commits a request that has been unpacked by from an INFORM + * received from the Leader. + * + * @param request + */ + public void commitRequest(Request request) { + commitProcessor.commit(request); + } + + /** + * Set up the request processors for an Observer: + * firstProcesor->commitProcessor->finalProcessor + */ + @Override + protected void setupRequestProcessors() { + // We might consider changing the processor behaviour of + // Observers to, for example, remove the disk sync requirements. + // Currently, they behave almost exactly the same as followers. + RequestProcessor finalProcessor = new FinalRequestProcessor(this); + commitProcessor = new CommitProcessor(finalProcessor, + Long.toString(getServerId()), true); + commitProcessor.start(); + firstProcessor = new ObserverRequestProcessor(this, commitProcessor); + ((ObserverRequestProcessor) firstProcessor).start(); + syncProcessor = new SyncRequestProcessor(this, + new SendAckRequestProcessor(getObserver())); + syncProcessor.start(); + } + + /* + * Process a sync request + */ + synchronized public void sync(){ + if(pendingSyncs.size() ==0){ + LOG.warn("Not expecting a sync."); + return; + } + + Request r = pendingSyncs.remove(); + commitProcessor.commit(r); + } + + @Override + public String getState() { + return "observer"; + }; +} diff --git a/src/java/main/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/src/java/main/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 557fd65a..10a55250 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -126,7 +126,7 @@ public class QuorumCnxManager { SocketChannel channel; LOG.debug("Opening channel to server " + sid); channel = SocketChannel - .open(self.quorumPeers.get(sid).electionAddr); + .open(self.getVotingView().get(sid).electionAddr); channel.socket().setTcpNoDelay(true); initiateConnection(channel, sid); } @@ -327,7 +327,8 @@ public class QuorumCnxManager { try { SocketChannel channel; LOG.debug("Opening channel to server " + sid); - channel = SocketChannel.open(electionAddr); + channel = SocketChannel + .open(self.getView().get(sid).electionAddr); channel.socket().setTcpNoDelay(true); initiateConnection(channel, sid); } catch (UnresolvedAddressException e) { @@ -510,7 +511,7 @@ public class QuorumCnxManager { LOG.warn("Exception while closing socket"); } //channel = null; - + this.interrupt(); if (recvWorker != null) recvWorker.finish(); diff --git a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java index 7b5213b7..e49c9027 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -25,6 +25,8 @@ import java.net.InetSocketAddress; import java.net.SocketException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -93,22 +95,54 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { this.electionAddr = null; } + public QuorumServer(long id, InetSocketAddress addr, + InetSocketAddress electionAddr, LearnerType type) { + this.id = id; + this.addr = addr; + this.electionAddr = electionAddr; + this.type = type; + } + public InetSocketAddress addr; public InetSocketAddress electionAddr; public long id; + + public LearnerType type = LearnerType.PARTICIPANT; } public enum ServerState { - LOOKING, FOLLOWING, LEADING; + LOOKING, FOLLOWING, LEADING, OBSERVING; + } + + /** + * A peer can either be participating, which implies that it is willing to + * both vote in instances of consensus and to elect or become a Leader, or + * it may be observing in which case it isn't. + * + * We need this distinction to decide which ServerState to move to when + * conditions change (e.g. which state to become after LOOKING). + */ + public enum LearnerType { + PARTICIPANT, OBSERVER; + } + + private LearnerType peerType = LearnerType.PARTICIPANT; + + public LearnerType getPeerType() { + return peerType; + } + + public void setPeerType(LearnerType p) { + peerType = p; } /** * The servers that make up the cluster */ - Map<Long, QuorumServer> quorumPeers; + protected Map<Long, QuorumServer> quorumPeers; public int getQuorumSize(){ - return quorumPeers.size(); + return getVotingView().size(); } /** @@ -226,6 +260,11 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { // This can happen in state transitions, // just ignore the request } + break; + case OBSERVING: + // Do nothing, Observers keep themselves to + // themselves. + break; } packet.setData(b); udpSocket.send(packet); @@ -233,7 +272,7 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { packet.setLength(b.length); } } catch (Exception e) { - LOG.warn("Unexpected exception",e); + LOG.warn("Unexpected exception in ResponderThread",e); } finally { LOG.warn("QuorumPeer responder thread exited"); } @@ -282,7 +321,8 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { long myid, int tickTime, int initLimit, int syncLimit, NIOServerCnxn.Factory cnxnFactory) throws IOException { this(quorumPeers, dataDir, dataLogDir, electionType, myid, tickTime, - initLimit, syncLimit, cnxnFactory, new QuorumMaj(quorumPeers.size())); + initLimit, syncLimit, cnxnFactory, + new QuorumMaj(countParticipants(quorumPeers))); } public QuorumPeer(Map<Long, QuorumServer> quorumPeers, File dataDir, @@ -300,7 +340,7 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { this.syncLimit = syncLimit; this.logFactory = new FileTxnSnapLog(dataLogDir, dataDir); if(quorumConfig == null) - this.quorumConfig = new QuorumMaj(quorumPeers.size()); + this.quorumConfig = new QuorumMaj(countParticipants(quorumPeers)); else this.quorumConfig = quorumConfig; } @@ -310,8 +350,10 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { @Override public synchronized void start() { - cnxnFactory.start(); - startLeaderElection(); + cnxnFactory.start(); + if (getPeerType() == LearnerType.PARTICIPANT) { + startLeaderElection(); + } super.start(); } @@ -323,7 +365,7 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { } synchronized public void startLeaderElection() { currentVote = new Vote(myid, getLastLoggedZxid()); - for (QuorumServer p : quorumPeers.values()) { + for (QuorumServer p : getView().values()) { if (p.id == myid) { myQuorumAddr = p.addr; break; @@ -344,6 +386,20 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { this.electionAlg = createElectionAlgorithm(electionType); } + /** + * Count the number of nodes in the map that could be followers. + * @param peers + * @return The number of followers in the map + */ + protected static int countParticipants(Map<Long,QuorumServer> peers) { + int count = 0; + for (QuorumServer q : peers.values()) { + if (q.type == LearnerType.PARTICIPANT) { + count++; + } + } + return count; + } /** * This constructor is only used by the existing unit test code. @@ -357,7 +413,7 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { this(quorumPeers, snapDir, logDir, electionAlg, myid,tickTime, initLimit,syncLimit, new NIOServerCnxn.Factory(clientPort), - new QuorumMaj(quorumPeers.size())); + new QuorumMaj(countParticipants(quorumPeers))); } /** @@ -380,6 +436,7 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { } public Follower follower; public Leader leader; + public Observer observer; protected Follower makeFollower(FileTxnSnapLog logFactory) throws IOException { return new Follower(this, new FollowerZooKeeperServer(logFactory, @@ -390,9 +447,15 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { return new Leader(this, new LeaderZooKeeperServer(logFactory, this,new ZooKeeperServer.BasicDataTreeBuilder())); } + + protected Observer makeObserver(FileTxnSnapLog logFactory) throws IOException { + return new Observer(this, new ObserverZooKeeperServer(logFactory, + this, new ZooKeeperServer.BasicDataTreeBuilder())); + } private Election createElectionAlgorithm(int electionAlgorithm){ Election le=null; + //TODO: use a factory rather than a switch switch (electionAlgorithm) { case 0: @@ -423,6 +486,8 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { protected Election makeLEStrategy(){ LOG.debug("Initializing leader election protocol..."); + // LeaderElection is the only implementation that correctly + // transitions between LOOKING and OBSERVER if(electionAlg==null) return new LeaderElection(this); return electionAlg; @@ -435,12 +500,18 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { synchronized protected void setFollower(Follower newFollower){ follower=newFollower; } + + synchronized protected void setObserver(Observer newObserver){ + observer=newObserver; + } synchronized public ZooKeeperServer getActiveServer(){ if(leader!=null) return leader.zk; else if(follower!=null) return follower.zk; + else if (observer != null) + return observer.zk; return null; } @@ -491,6 +562,19 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { setPeerState(ServerState.LOOKING); } break; + case OBSERVING: + try { + LOG.info("OBSERVING"); + setObserver(makeObserver(logFactory)); + observer.observeLeader(); + } catch (Exception e) { + LOG.warn("Unexpected exception",e ); + } finally { + observer.shutdown(); + setObserver(null); + setPeerState(ServerState.LOOKING); + } + break; case FOLLOWING: try { LOG.info("FOLLOWING"); @@ -549,11 +633,42 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { } /** - * A 'view' is a node's current opinion of the membership of the - * ensemble. + * A 'view' is a node's current opinion of the membership of the entire + * ensemble. */ public Map<Long,QuorumPeer.QuorumServer> getView() { - return this.quorumPeers; + return Collections.unmodifiableMap(this.quorumPeers); + } + + /** + * Observers are not contained in this view, only nodes with + * PeerType=PARTICIPANT. + */ + public Map<Long,QuorumPeer.QuorumServer> getVotingView() { + Map<Long,QuorumPeer.QuorumServer> ret = + new HashMap<Long, QuorumPeer.QuorumServer>(); + Map<Long,QuorumPeer.QuorumServer> view = getView(); + for (QuorumServer server : view.values()) { + if (server.type == LearnerType.PARTICIPANT) { + ret.put(server.id, server); + } + } + return ret; + } + + /** + * Returns only observers, no followers. + */ + public Map<Long,QuorumPeer.QuorumServer> getObservingView() { + Map<Long,QuorumPeer.QuorumServer> ret = + new HashMap<Long, QuorumPeer.QuorumServer>(); + Map<Long,QuorumPeer.QuorumServer> view = getView(); + for (QuorumServer server : view.values()) { + if (server.type == LearnerType.OBSERVER) { + ret.put(server.id, server); + } + } + return ret; } /** @@ -565,6 +680,9 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { return this.quorumPeers.containsKey(sid); } + /** + * Only used by QuorumStats at the moment + */ public String[] getQuorumPeers() { List<String> l = new ArrayList<String>(); synchronized (this) { @@ -594,6 +712,8 @@ public class QuorumPeer extends Thread implements QuorumStats.Provider { return QuorumStats.Provider.LEADING_STATE; case FOLLOWING: return QuorumStats.Provider.FOLLOWING_STATE; + case OBSERVING: + return QuorumStats.Provider.OBSERVING_STATE; } return QuorumStats.Provider.UNKNOWN_STATE; } diff --git a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java index dbb1e159..dc6b86e8 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java @@ -33,6 +33,7 @@ import java.util.Map.Entry; import org.apache.log4j.Logger; import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; +import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier; import org.apache.zookeeper.server.quorum.flexible.QuorumMaj; import org.apache.zookeeper.server.quorum.flexible.QuorumHierarchical; @@ -52,12 +53,16 @@ public class QuorumPeerConfig { protected int maxClientCnxns = 10; protected final HashMap<Long,QuorumServer> servers = new HashMap<Long, QuorumServer>(); + protected final HashMap<Long,QuorumServer> observers = + new HashMap<Long, QuorumServer>(); protected long serverId; protected HashMap<Long, Long> serverWeight = new HashMap<Long, Long>(); protected HashMap<Long, Long> serverGroup = new HashMap<Long, Long>(); protected int numGroups = 0; protected QuorumVerifier quorumVerifier; + + protected LearnerType peerType = LearnerType.PARTICIPANT; @SuppressWarnings("serial") public static class ConfigException extends Exception { @@ -128,13 +133,23 @@ public class QuorumPeerConfig { electionAlg = Integer.parseInt(value); } else if (key.equals("maxClientCnxns")) { maxClientCnxns = Integer.parseInt(value); + } else if (key.equals("peerType")) { + if (value.toLowerCase().equals("observer")) { + peerType = LearnerType.OBSERVER; + } else if (value.toLowerCase().equals("participant")) { + peerType = LearnerType.PARTICIPANT; + } else + { + throw new ConfigException("Unrecognised peertype: " + value); + } } else if (key.startsWith("server.")) { int dot = key.indexOf('.'); long sid = Long.parseLong(key.substring(dot + 1)); String parts[] = value.split(":"); - if ((parts.length != 2) && (parts.length != 3)) { + if ((parts.length != 2) && (parts.length != 3) && (parts.length !=4)) { LOG.error(value - + " does not have the form host:port or host:port:port"); + + " does not have the form host:port or host:port:port " + + " or host:port:port:type"); } InetSocketAddress addr = new InetSocketAddress(parts[0], Integer.parseInt(parts[1])); @@ -145,6 +160,21 @@ public class QuorumPeerConfig { parts[0], Integer.parseInt(parts[2])); servers.put(Long.valueOf(sid), new QuorumServer(sid, addr, electionAddr)); + } else if (parts.length == 4) { + InetSocketAddress electionAddr = new InetSocketAddress( + parts[0], Integer.parseInt(parts[2])); + LearnerType type = LearnerType.PARTICIPANT; + if (parts[3].toLowerCase().equals("observer")) { + type = LearnerType.OBSERVER; + observers.put(Long.valueOf(sid), new QuorumServer(sid, addr, + electionAddr,type)); + } else if (parts[3].toLowerCase().equals("participant")) { + type = LearnerType.PARTICIPANT; + servers.put(Long.valueOf(sid), new QuorumServer(sid, addr, + electionAddr,type)); + } else { + throw new ConfigException("Unrecognised peertype: " + value); + } } } else if (key.startsWith("group")) { int dot = key.indexOf('.'); @@ -169,6 +199,10 @@ public class QuorumPeerConfig { System.setProperty("zookeeper." + key, value); } } + if (observers.size() > 0 && electionAlg != 0) { + throw new IllegalArgumentException("Observers must currently be used with simple leader election" + + " (set electionAlg=0)"); + } if (dataDir == null) { throw new IllegalArgumentException("dataDir is not set"); } @@ -233,6 +267,10 @@ public class QuorumPeerConfig { quorumVerifier = new QuorumMaj(servers.size()); } + // Now add observers to servers, once the quorums have been + // figured out + servers.putAll(observers); + File myIdFile = new File(dataDir, "myid"); if (!myIdFile.exists()) { throw new IllegalArgumentException(myIdFile.toString() @@ -276,4 +314,8 @@ public class QuorumPeerConfig { public long getServerId() { return serverId; } public boolean isDistributed() { return servers.size() > 1; } + + public LearnerType getPeerType() { + return peerType; + } } diff --git a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerMain.java b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerMain.java index 7ed148ac..0dc158a3 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerMain.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeerMain.java @@ -133,6 +133,7 @@ public class QuorumPeerMain { quorumPeer.setSyncLimit(config.getSyncLimit()); quorumPeer.setQuorumVerifier(config.getQuorumVerifier()); quorumPeer.setCnxnFactory(cnxnFactory); + quorumPeer.setPeerType(config.getPeerType()); quorumPeer.start(); quorumPeer.join(); diff --git a/src/java/main/org/apache/zookeeper/server/quorum/QuorumStats.java b/src/java/main/org/apache/zookeeper/server/quorum/QuorumStats.java index 06ca1fb5..b6c62e44 100644 --- a/src/java/main/org/apache/zookeeper/server/quorum/QuorumStats.java +++ b/src/java/main/org/apache/zookeeper/server/quorum/QuorumStats.java @@ -26,7 +26,7 @@ public class QuorumStats { static public final String LOOKING_STATE = "leaderelection"; static public final String LEADING_STATE = "leading"; static public final String FOLLOWING_STATE = "following"; - + static public final String OBSERVING_STATE = "observing"; public String[] getQuorumPeers(); public String getServerState(); } @@ -53,7 +53,8 @@ public class QuorumStats { sb.append(" ").append(f); } sb.append("\n"); - }else if(state.equals(Provider.FOLLOWING_STATE)){ + }else if(state.equals(Provider.FOLLOWING_STATE) + || state.equals(Provider.OBSERVING_STATE)){ sb.append("Leader: "); String[] ldr=getQuorumPeers(); if(ldr.length>0) diff --git a/src/java/test/org/apache/zookeeper/server/quorum/ObserverTest.java b/src/java/test/org/apache/zookeeper/server/quorum/ObserverTest.java new file mode 100644 index 00000000..72b05c96 --- /dev/null +++ b/src/java/test/org/apache/zookeeper/server/quorum/ObserverTest.java @@ -0,0 +1,236 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.concurrent.CountDownLatch; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.Watcher.Event.KeeperState; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper.States; + +import org.apache.zookeeper.AsyncCallback.VoidCallback; +import org.apache.zookeeper.KeeperException.ConnectionLossException; + +import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; +import org.apache.zookeeper.test.ClientBase; +import org.junit.Test; + +/** + * Test Observer behaviour and specific code paths. + * + */ +public class ObserverTest extends QuorumPeerTestBase implements Watcher{ + protected static final Logger LOG = + Logger.getLogger(ObserverTest.class); + + // We expect two notifications before we want to continue + CountDownLatch latch = new CountDownLatch(2); + ZooKeeper zk; + WatchedEvent lastEvent = null; + + /** + * This test ensures two things: + * 1. That Observers can successfully proxy requests to the ensemble. + * 2. That Observers don't participate in leader elections. + * The second is tested by constructing an ensemble where a leader would + * be elected if and only if an Observer voted. + * @throws Exception + */ + @Test + public void testObserver() throws Exception { + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + final int CLIENT_PORT_QP2 = CLIENT_PORT_QP1 + 3; + final int CLIENT_PORT_OBS = CLIENT_PORT_QP2 + 3; + + String quorumCfgSection = + "electionAlg=0\n" + + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + + "\nserver.2=localhost:" + (CLIENT_PORT_QP2 + 1) + + ":" + (CLIENT_PORT_QP2 + 2) + + "\nserver.3=localhost:" + + (CLIENT_PORT_OBS+1)+ ":" + (CLIENT_PORT_OBS + 2) + ":observer"; + String obsCfgSection = quorumCfgSection + "\npeerType=observer"; + MainThread q1 = new MainThread(1, CLIENT_PORT_QP1, quorumCfgSection); + MainThread q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgSection); + MainThread q3 = new MainThread(3, CLIENT_PORT_OBS, obsCfgSection); + q1.start(); + q2.start(); + q3.start(); + assertTrue("waiting for server 1 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP1, + CONNECTION_TIMEOUT)); + assertTrue("waiting for server 2 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP2, + CONNECTION_TIMEOUT)); + assertTrue("waiting for server 3 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_OBS, + CONNECTION_TIMEOUT)); + + zk = new ZooKeeper("localhost:" + CLIENT_PORT_OBS, + ClientBase.CONNECTION_TIMEOUT, this); + zk.create("/obstest", "test".getBytes(),Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + + // Assert that commands are getting forwarded correctly + assertEquals(new String(zk.getData("/obstest", null, null)), "test"); + + // Now check that other commands don't blow everything up + zk.sync("/", null, null); + zk.setData("/obstest", "test2".getBytes(), -1); + zk.getChildren("/", false); + + assertEquals(zk.getState(), States.CONNECTED); + + // Now kill one of the other real servers + q2.shutdown(); + + assertTrue("Waiting for server 2 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP2, + ClientBase.CONNECTION_TIMEOUT)); + + // Now the resulting ensemble shouldn't be quorate + latch.await(); + assertNotSame("zk should not be connected", KeeperState.SyncConnected,lastEvent.getState()); + + try { + assertFalse("Shouldn't get a response when cluster not quorate!", + new String(zk.getData("/obstest", null, null)).equals("test")); + } + catch (ConnectionLossException c) { + LOG.info("Connection loss exception caught - ensemble not quorate (this is expected)"); + } + + latch = new CountDownLatch(1); + + // Bring it back + q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgSection); + q2.start(); + assertTrue("waiting for server 2 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP2, + CONNECTION_TIMEOUT)); + + latch.await(); + // It's possible our session expired - but this is ok, shows we + // were able to talk to the ensemble + assertTrue("Didn't reconnect", + (KeeperState.SyncConnected==lastEvent.getState() || + KeeperState.Expired==lastEvent.getState())); + + q1.shutdown(); + q2.shutdown(); + q3.shutdown(); + + zk.close(); + assertTrue("Waiting for server 1 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP1, + ClientBase.CONNECTION_TIMEOUT)); + assertTrue("Waiting for server 2 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP2, + ClientBase.CONNECTION_TIMEOUT)); + assertTrue("Waiting for server 3 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_OBS, + ClientBase.CONNECTION_TIMEOUT)); + + } + + public void process(WatchedEvent event) { + latch.countDown(); + lastEvent = event; + } + + /** + * This test ensures that an Observer does not elect itself as a leader, or + * indeed come up properly, if it is the lone member of an ensemble. + * @throws IOException + */ + @Test + public void testSingleObserver() throws IOException{ + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + + String quorumCfgSection = + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + "\npeerType=observer"; + + MainThread q1 = new MainThread(1, CLIENT_PORT_QP1, quorumCfgSection); + q1.start(); + assertFalse("Observer shouldn't come up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP1, + CONNECTION_TIMEOUT)); + + q1.shutdown(); + } + + @Test + public void testLeaderElectionFail() throws Exception { + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + final int CLIENT_PORT_QP2 = CLIENT_PORT_QP1 + 3; + final int CLIENT_PORT_OBS = CLIENT_PORT_QP2 + 3; + + String quorumCfgSection = + "electionAlg=1\n" + + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + + "\nserver.2=localhost:" + (CLIENT_PORT_QP2 + 1) + + ":" + (CLIENT_PORT_QP2 + 2) + + "\nserver.3=localhost:" + + (CLIENT_PORT_OBS+1)+ ":" + (CLIENT_PORT_OBS + 2) + ":observer"; + QuorumPeerConfig qpc = new QuorumPeerConfig(); + + File tmpDir = ClientBase.createTmpDir(); + File confFile = new File(tmpDir, "zoo.cfg"); + + FileWriter fwriter = new FileWriter(confFile); + fwriter.write("tickTime=2000\n"); + fwriter.write("initLimit=10\n"); + fwriter.write("syncLimit=5\n"); + + File dataDir = new File(tmpDir, "data"); + if (!dataDir.mkdir()) { + throw new IOException("Unable to mkdir " + dataDir); + } + fwriter.write("dataDir=" + dataDir.toString() + "\n"); + + fwriter.write("clientPort=" + CLIENT_PORT_QP1 + "\n"); + fwriter.write(quorumCfgSection + "\n"); + fwriter.flush(); + fwriter.close(); + try { + qpc.parse(confFile.toString()); + } catch (ConfigException e) { + LOG.info("Config exception caught as expected: " + e.getCause()); + return; + } + + assertTrue("Didn't get the expected config exception", false); + } +} diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java index 4c7723e6..9d102c92 100644 --- a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java +++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java @@ -55,63 +55,8 @@ import org.junit.Test; * Test stand-alone server. * */ -public class QuorumPeerMainTest extends TestCase implements Watcher { - protected static final Logger LOG = - Logger.getLogger(QuorumPeerMainTest.class); - - public static class MainThread extends Thread { - final File confFile; - final TestQPMain main; - - public MainThread(int myid, int clientPort, String quorumCfgSection) - throws IOException - { - super("QuorumPeer with myid:" + myid - + " and clientPort:" + clientPort); - File tmpDir = ClientBase.createTmpDir(); - confFile = new File(tmpDir, "zoo.cfg"); - - FileWriter fwriter = new FileWriter(confFile); - fwriter.write("tickTime=2000\n"); - fwriter.write("initLimit=10\n"); - fwriter.write("syncLimit=5\n"); - - File dataDir = new File(tmpDir, "data"); - if (!dataDir.mkdir()) { - throw new IOException("Unable to mkdir " + dataDir); - } - fwriter.write("dataDir=" + dataDir.toString() + "\n"); - - fwriter.write("clientPort=" + clientPort + "\n"); - fwriter.write(quorumCfgSection + "\n"); - fwriter.flush(); - fwriter.close(); - - File myidFile = new File(dataDir, "myid"); - fwriter = new FileWriter(myidFile); - fwriter.write(Integer.toString(myid)); - fwriter.flush(); - fwriter.close(); - - main = new TestQPMain(); - } - - public void run() { - String args[] = new String[1]; - args[0] = confFile.toString(); - try { - main.initializeAndRun(args); - } catch (Exception e) { - // test will still fail even though we just log/ignore - LOG.error("unexpected exception in run", e); - } - } - - public void shutdown() { - main.shutdown(); - } - } - +public class QuorumPeerMainTest extends QuorumPeerTestBase { + public static class TestQPMain extends QuorumPeerMain { public void shutdown() { super.shutdown(); @@ -358,8 +303,5 @@ public class QuorumPeerMainTest extends TestCase implements Watcher { } assertTrue("fastleaderelection used", found); } - - public void process(WatchedEvent event) { - // ignore for this test - } + } diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java new file mode 100644 index 00000000..05326d2f --- /dev/null +++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + */ +package org.apache.zookeeper.server.quorum; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.server.quorum.QuorumPeerMainTest.TestQPMain; +import org.apache.zookeeper.test.ClientBase; + +import junit.framework.TestCase; + +/** + * Has some common functionality for tests that work with QuorumPeers. + * Override process(WatchedEvent) to implement the Watcher interface + */ +public class QuorumPeerTestBase extends TestCase implements Watcher { + protected static final Logger LOG = + Logger.getLogger(QuorumPeerTestBase.class); + + public void process(WatchedEvent event) { + // ignore for this test + } + + public static class MainThread extends Thread { + final File confFile; + final TestQPMain main; + + public MainThread(int myid, int clientPort, String quorumCfgSection) + throws IOException + { + super("QuorumPeer with myid:" + myid + + " and clientPort:" + clientPort); + File tmpDir = ClientBase.createTmpDir(); + confFile = new File(tmpDir, "zoo.cfg"); + + FileWriter fwriter = new FileWriter(confFile); + fwriter.write("tickTime=2000\n"); + fwriter.write("initLimit=10\n"); + fwriter.write("syncLimit=5\n"); + + File dataDir = new File(tmpDir, "data"); + if (!dataDir.mkdir()) { + throw new IOException("Unable to mkdir " + dataDir); + } + fwriter.write("dataDir=" + dataDir.toString() + "\n"); + + fwriter.write("clientPort=" + clientPort + "\n"); + fwriter.write(quorumCfgSection + "\n"); + fwriter.flush(); + fwriter.close(); + + File myidFile = new File(dataDir, "myid"); + fwriter = new FileWriter(myidFile); + fwriter.write(Integer.toString(myid)); + fwriter.flush(); + fwriter.close(); + + main = new TestQPMain(); + } + + public void run() { + String args[] = new String[1]; + args[0] = confFile.toString(); + try { + main.initializeAndRun(args); + } catch (Exception e) { + // test will still fail even though we just log/ignore + LOG.error("unexpected exception in run", e); + } + } + + public void shutdown() { + main.shutdown(); + } + } +} diff --git a/src/java/test/org/apache/zookeeper/test/AsyncHammerTest.java b/src/java/test/org/apache/zookeeper/test/AsyncHammerTest.java index 5dda8628..df30303d 100644 --- a/src/java/test/org/apache/zookeeper/test/AsyncHammerTest.java +++ b/src/java/test/org/apache/zookeeper/test/AsyncHammerTest.java @@ -202,6 +202,26 @@ public class AsyncHammerTest extends TestCase LOG.info("Verifying hammers 2"); qb.verifyRootOfAllServersMatch(qb.hostPort); } + + @Test + public void testObserversHammer() throws Exception { + qb.tearDown(); + qb.setUp(true); + bang = true; + Thread[] hammers = new Thread[100]; + for (int i = 0; i < hammers.length; i++) { + hammers[i] = new HammerThread("HammerThread-" + i); + hammers[i].start(); + } + Thread.sleep(5000); // allow the clients to run for max 5sec + bang = false; + for (int i = 0; i < hammers.length; i++) { + hammers[i].interrupt(); + verifyThreadTerminated(hammers[i], 60000); + } + // before restart + qb.verifyRootOfAllServersMatch(qb.hostPort); + } @SuppressWarnings("unchecked") public void processResult(int rc, String path, Object ctx, String name) { diff --git a/src/java/test/org/apache/zookeeper/test/HierarchicalQuorumTest.java b/src/java/test/org/apache/zookeeper/test/HierarchicalQuorumTest.java index 63088041..e3107109 100644 --- a/src/java/test/org/apache/zookeeper/test/HierarchicalQuorumTest.java +++ b/src/java/test/org/apache/zookeeper/test/HierarchicalQuorumTest.java @@ -50,20 +50,20 @@ public class HierarchicalQuorumTest extends ClientBase { File s1dir, s2dir, s3dir, s4dir, s5dir; QuorumPeer s1, s2, s3, s4, s5; - private int port1; - private int port2; - private int port3; - private int port4; - private int port5; + protected int port1; + protected int port2; + protected int port3; + protected int port4; + protected int port5; - private int leport1; - private int leport2; - private int leport3; - private int leport4; - private int leport5; + protected int leport1; + protected int leport2; + protected int leport3; + protected int leport4; + protected int leport5; Properties qp; - private final ClientHammerTest cht = new ClientHammerTest(); + protected final ClientHammerTest cht = new ClientHammerTest(); @Override protected void setUp() throws Exception { @@ -118,15 +118,29 @@ public class HierarchicalQuorumTest extends ClientBase { LOG.info("Setup finished"); } - + /** + * This method is here to keep backwards compatibility with the test code + * written before observers. + * @throws Exception + */ void startServers() throws Exception { + startServers(false); + } + + /** + * Starts 5 Learners. When withObservers == false, all 5 are Followers. + * When withObservers == true, 3 are Followers and 2 Observers. + * @param withObservers + * @throws Exception + */ + void startServers(boolean withObservers) throws Exception { int tickTime = 2000; int initLimit = 3; int syncLimit = 3; HashMap<Long,QuorumServer> peers = new HashMap<Long,QuorumServer>(); peers.put(Long.valueOf(1), new QuorumServer(1, new InetSocketAddress("127.0.0.1", port1 + 1000), - new InetSocketAddress("127.0.0.1", leport1 + 1000))); + new InetSocketAddress("127.0.0.1", leport1 + 1000))); peers.put(Long.valueOf(2), new QuorumServer(2, new InetSocketAddress("127.0.0.1", port2 + 1000), new InetSocketAddress("127.0.0.1", leport2 + 1000))); @@ -135,10 +149,14 @@ public class HierarchicalQuorumTest extends ClientBase { new InetSocketAddress("127.0.0.1", leport3 + 1000))); peers.put(Long.valueOf(4), new QuorumServer(4, new InetSocketAddress("127.0.0.1", port4 + 1000), - new InetSocketAddress("127.0.0.1", leport4 + 1000))); + new InetSocketAddress("127.0.0.1", leport4 + 1000), + withObservers ? QuorumPeer.LearnerType.OBSERVER + : QuorumPeer.LearnerType.PARTICIPANT)); peers.put(Long.valueOf(5), new QuorumServer(5, new InetSocketAddress("127.0.0.1", port5 + 1000), - new InetSocketAddress("127.0.0.1", leport5 + 1000))); + new InetSocketAddress("127.0.0.1", leport5 + 1000), + withObservers ? QuorumPeer.LearnerType.OBSERVER + : QuorumPeer.LearnerType.PARTICIPANT)); LOG.info("creating QuorumPeer 1 port " + port1); QuorumHierarchical hq1 = new QuorumHierarchical(qp); @@ -158,21 +176,37 @@ public class HierarchicalQuorumTest extends ClientBase { LOG.info("creating QuorumPeer 4 port " + port4); QuorumHierarchical hq4 = new QuorumHierarchical(qp); s4 = new QuorumPeer(peers, s4dir, s4dir, port4, 3, 4, tickTime, initLimit, syncLimit, hq4); + if (withObservers) { + s4.setPeerType(QuorumPeer.LearnerType.OBSERVER); + } assertEquals(port4, s4.getClientPort()); - + LOG.info("creating QuorumPeer 5 port " + port5); QuorumHierarchical hq5 = new QuorumHierarchical(qp); s5 = new QuorumPeer(peers, s5dir, s5dir, port5, 3, 5, tickTime, initLimit, syncLimit, hq5); + if (withObservers) { + s5.setPeerType(QuorumPeer.LearnerType.OBSERVER); + } assertEquals(port5, s5.getClientPort()); + + // Observers are currently only compatible with LeaderElection + if (withObservers) { + s1.setElectionType(0); + s2.setElectionType(0); + s3.setElectionType(0); + s4.setElectionType(0); + s5.setElectionType(0); + } + LOG.info("start QuorumPeer 1"); s1.start(); LOG.info("start QuorumPeer 2"); s2.start(); LOG.info("start QuorumPeer 3"); s3.start(); - LOG.info("start QuorumPeer 4"); + LOG.info("start QuorumPeer 4" + (withObservers ? "(observer)" : "")); s4.start(); - LOG.info("start QuorumPeer 5"); + LOG.info("start QuorumPeer 5" + (withObservers ? "(observer)" : "")); s5.start(); LOG.info("started QuorumPeer 5"); diff --git a/src/java/test/org/apache/zookeeper/test/ObserverHierarchicalQuorumTest.java b/src/java/test/org/apache/zookeeper/test/ObserverHierarchicalQuorumTest.java new file mode 100644 index 00000000..66fecc3d --- /dev/null +++ b/src/java/test/org/apache/zookeeper/test/ObserverHierarchicalQuorumTest.java @@ -0,0 +1,59 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.test; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.server.quorum.QuorumPeer; +import org.junit.Test; + +/** + * Mimics QuorumHierarchical test, but on an ensemble that includes 2 + * observers. + */ + +public class ObserverHierarchicalQuorumTest extends HierarchicalQuorumTest { + private static final Logger LOG = Logger.getLogger(QuorumBase.class); + + /** + * startServers(true) puts two observers into a 5 peer ensemble + */ + void startServers() throws Exception { + startServers(true); + } + + protected void shutdown(QuorumPeer qp) { + try { + /* TODO: when Observers are compatible with fle, shutdown + * the leader election */ + LOG.info("Done with leader election"); + qp.shutdown(); + LOG.info("Done with quorum peer"); + qp.join(30000); + if (qp.isAlive()) { + fail("QP failed to shutdown in 30 seconds"); + } + } catch (InterruptedException e) { + LOG.debug("QP interrupted", e); + } + } + + @Test + public void testHierarchicalQuorum() throws Throwable { + cht.runHammer(5, 10); + } +} \ No newline at end of file diff --git a/src/java/test/org/apache/zookeeper/test/ObserverQuorumHammerTest.java b/src/java/test/org/apache/zookeeper/test/ObserverQuorumHammerTest.java new file mode 100644 index 00000000..94ae77fc --- /dev/null +++ b/src/java/test/org/apache/zookeeper/test/ObserverQuorumHammerTest.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.test; +import org.apache.log4j.Logger; +import org.junit.Before; +import org.junit.Test; + +/** + * Mimics QuorumHammerTest, but with 2 observers in the 5 Learner ensemble. + */ +public class ObserverQuorumHammerTest extends QuorumHammerTest { + public static final long CONNECTION_TIMEOUT = ClientTest.CONNECTION_TIMEOUT; + + + @Before + @Override + protected void setUp() throws Exception { + qb.setUp(true); + cht.hostPort = qb.hostPort; + cht.setUpAll(); + } + + @Test + public void testHammerBasic() throws Throwable { + cht.testHammerBasic(); + } +} \ No newline at end of file diff --git a/src/java/test/org/apache/zookeeper/test/ObserverTest.java b/src/java/test/org/apache/zookeeper/test/ObserverTest.java new file mode 100644 index 00000000..9d8ca6c1 --- /dev/null +++ b/src/java/test/org/apache/zookeeper/test/ObserverTest.java @@ -0,0 +1,242 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.test; + +import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.concurrent.CountDownLatch; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.KeeperException.ConnectionLossException; +import org.apache.zookeeper.Watcher.Event.KeeperState; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper.States; +import org.apache.zookeeper.server.quorum.QuorumPeerConfig; +import org.apache.zookeeper.server.quorum.QuorumPeerTestBase; +import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; +import org.junit.Test; + +/** + * Test Observer behaviour and specific code paths. + * + */ +public class ObserverTest extends QuorumPeerTestBase implements Watcher{ + protected static final Logger LOG = + Logger.getLogger(ObserverTest.class); + + // We expect two notifications before we want to continue + CountDownLatch latch = new CountDownLatch(2); + ZooKeeper zk; + WatchedEvent lastEvent = null; + + /** + * This test ensures two things: + * 1. That Observers can successfully proxy requests to the ensemble. + * 2. That Observers don't participate in leader elections. + * The second is tested by constructing an ensemble where a leader would + * be elected if and only if an Observer voted. + * @throws Exception + */ + @Test + public void testObserver() throws Exception { + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + final int CLIENT_PORT_QP2 = CLIENT_PORT_QP1 + 3; + final int CLIENT_PORT_OBS = CLIENT_PORT_QP2 + 3; + + String quorumCfgSection = + "electionAlg=0\n" + + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + + "\nserver.2=localhost:" + (CLIENT_PORT_QP2 + 1) + + ":" + (CLIENT_PORT_QP2 + 2) + + "\nserver.3=localhost:" + + (CLIENT_PORT_OBS+1)+ ":" + (CLIENT_PORT_OBS + 2) + ":observer"; + String obsCfgSection = quorumCfgSection + "\npeerType=observer"; + MainThread q1 = new MainThread(1, CLIENT_PORT_QP1, quorumCfgSection); + MainThread q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgSection); + MainThread q3 = new MainThread(3, CLIENT_PORT_OBS, obsCfgSection); + q1.start(); + q2.start(); + q3.start(); + assertTrue("waiting for server 1 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP1, + CONNECTION_TIMEOUT)); + assertTrue("waiting for server 2 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP2, + CONNECTION_TIMEOUT)); + assertTrue("waiting for server 3 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_OBS, + CONNECTION_TIMEOUT)); + + zk = new ZooKeeper("localhost:" + CLIENT_PORT_OBS, + ClientBase.CONNECTION_TIMEOUT, this); + zk.create("/obstest", "test".getBytes(),Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + + // Assert that commands are getting forwarded correctly + assertEquals(new String(zk.getData("/obstest", null, null)), "test"); + + // Now check that other commands don't blow everything up + zk.sync("/", null, null); + zk.setData("/obstest", "test2".getBytes(), -1); + zk.getChildren("/", false); + + assertEquals(zk.getState(), States.CONNECTED); + + // Now kill one of the other real servers + q2.shutdown(); + + assertTrue("Waiting for server 2 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP2, + ClientBase.CONNECTION_TIMEOUT)); + + // Now the resulting ensemble shouldn't be quorate + latch.await(); + assertNotSame("zk should not be connected", KeeperState.SyncConnected,lastEvent.getState()); + + try { + assertFalse("Shouldn't get a response when cluster not quorate!", + new String(zk.getData("/obstest", null, null)).equals("test")); + } + catch (ConnectionLossException c) { + LOG.info("Connection loss exception caught - ensemble not quorate (this is expected)"); + } + + latch = new CountDownLatch(1); + + // Bring it back + q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgSection); + q2.start(); + assertTrue("waiting for server 2 being up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP2, + CONNECTION_TIMEOUT)); + + latch.await(); + // It's possible our session expired - but this is ok, shows we + // were able to talk to the ensemble + assertTrue("Didn't reconnect", + (KeeperState.SyncConnected==lastEvent.getState() || + KeeperState.Expired==lastEvent.getState())); + + q1.shutdown(); + q2.shutdown(); + q3.shutdown(); + + zk.close(); + assertTrue("Waiting for server 1 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP1, + ClientBase.CONNECTION_TIMEOUT)); + assertTrue("Waiting for server 2 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_QP2, + ClientBase.CONNECTION_TIMEOUT)); + assertTrue("Waiting for server 3 to shut down", + ClientBase.waitForServerDown("localhost:"+CLIENT_PORT_OBS, + ClientBase.CONNECTION_TIMEOUT)); + + } + + /** + * Implementation of watcher interface. + */ + public void process(WatchedEvent event) { + latch.countDown(); + lastEvent = event; + } + + /** + * This test ensures that an Observer does not elect itself as a leader, or + * indeed come up properly, if it is the lone member of an ensemble. + * @throws IOException + */ + @Test + public void testSingleObserver() throws IOException{ + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + + String quorumCfgSection = + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + "\npeerType=observer"; + + MainThread q1 = new MainThread(1, CLIENT_PORT_QP1, quorumCfgSection); + q1.start(); + assertFalse("Observer shouldn't come up", + ClientBase.waitForServerUp("localhost:" + CLIENT_PORT_QP1, + CONNECTION_TIMEOUT)); + + q1.shutdown(); + } + + /** + * Check that an attempt to instantiate an ensemble with observers and + * electionAlg != 0 fails (this will be removed when the restriction is). + * @throws Exception + */ + @Test + public void testLeaderElectionFail() throws Exception { + ClientBase.setupTestEnv(); + final int CLIENT_PORT_QP1 = 3181; + final int CLIENT_PORT_QP2 = CLIENT_PORT_QP1 + 3; + final int CLIENT_PORT_OBS = CLIENT_PORT_QP2 + 3; + + String quorumCfgSection = + "electionAlg=1\n" + + "server.1=localhost:" + (CLIENT_PORT_QP1 + 1) + + ":" + (CLIENT_PORT_QP1 + 2) + + "\nserver.2=localhost:" + (CLIENT_PORT_QP2 + 1) + + ":" + (CLIENT_PORT_QP2 + 2) + + "\nserver.3=localhost:" + + (CLIENT_PORT_OBS+1)+ ":" + (CLIENT_PORT_OBS + 2) + ":observer"; + QuorumPeerConfig qpc = new QuorumPeerConfig(); + + File tmpDir = ClientBase.createTmpDir(); + File confFile = new File(tmpDir, "zoo.cfg"); + + FileWriter fwriter = new FileWriter(confFile); + fwriter.write("tickTime=2000\n"); + fwriter.write("initLimit=10\n"); + fwriter.write("syncLimit=5\n"); + + File dataDir = new File(tmpDir, "data"); + if (!dataDir.mkdir()) { + throw new IOException("Unable to mkdir " + dataDir); + } + fwriter.write("dataDir=" + dataDir.toString() + "\n"); + + fwriter.write("clientPort=" + CLIENT_PORT_QP1 + "\n"); + fwriter.write(quorumCfgSection + "\n"); + fwriter.flush(); + fwriter.close(); + try { + qpc.parse(confFile.toString()); + } catch (ConfigException e) { + LOG.info("Config exception caught as expected: " + e.getCause()); + return; + } + + assertTrue("Didn't get the expected config exception", false); + } +} diff --git a/src/java/test/org/apache/zookeeper/test/QuorumBase.java b/src/java/test/org/apache/zookeeper/test/QuorumBase.java index 2b68e26a..1b670de8 100644 --- a/src/java/test/org/apache/zookeeper/test/QuorumBase.java +++ b/src/java/test/org/apache/zookeeper/test/QuorumBase.java @@ -31,6 +31,7 @@ import org.apache.log4j.Logger; import org.apache.zookeeper.PortAssignment; import org.apache.zookeeper.TestableZooKeeper; import org.apache.zookeeper.server.quorum.QuorumPeer; +import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; import org.junit.After; @@ -49,6 +50,10 @@ public class QuorumBase extends ClientBase { @Override protected void setUp() throws Exception { + setUp(false); + } + + protected void setUp(boolean withObservers) throws Exception { LOG.info("STARTING " + getName()); setupTestEnv(); @@ -74,7 +79,7 @@ public class QuorumBase extends ClientBase { s4dir = ClientBase.createTmpDir(); s5dir = ClientBase.createTmpDir(); - startServers(); + startServers(withObservers); OperatingSystemMXBean osMbean = ManagementFactory.getOperatingSystemMXBean(); @@ -87,7 +92,12 @@ public class QuorumBase extends ClientBase { LOG.info("Setup finished"); } + void startServers() throws Exception { + startServers(false); + } + + void startServers(boolean withObservers) throws Exception { int tickTime = 2000; int initLimit = 3; int syncLimit = 3; @@ -97,6 +107,11 @@ public class QuorumBase extends ClientBase { peers.put(Long.valueOf(3), new QuorumServer(3, new InetSocketAddress("127.0.0.1", port3 + 1000))); peers.put(Long.valueOf(4), new QuorumServer(4, new InetSocketAddress("127.0.0.1", port4 + 1000))); peers.put(Long.valueOf(5), new QuorumServer(5, new InetSocketAddress("127.0.0.1", port5 + 1000))); + + if (withObservers) { + peers.get(Long.valueOf(4)).type = LearnerType.OBSERVER; + peers.get(Long.valueOf(5)).type = LearnerType.OBSERVER; + } LOG.info("creating QuorumPeer 1 port " + port1); s1 = new QuorumPeer(peers, s1dir, s1dir, port1, 0, 1, tickTime, initLimit, syncLimit); @@ -113,6 +128,12 @@ public class QuorumBase extends ClientBase { LOG.info("creating QuorumPeer 5 port " + port5); s5 = new QuorumPeer(peers, s5dir, s5dir, port5, 0, 5, tickTime, initLimit, syncLimit); assertEquals(port5, s5.getClientPort()); + + if (withObservers) { + s4.setPeerType(LearnerType.OBSERVER); + s5.setPeerType(LearnerType.OBSERVER); + } + LOG.info("start QuorumPeer 1"); s1.start(); LOG.info("start QuorumPeer 2"); diff --git a/src/java/test/org/apache/zookeeper/test/QuorumHammerTest.java b/src/java/test/org/apache/zookeeper/test/QuorumHammerTest.java index d81b5ad8..8bbbc2e4 100644 --- a/src/java/test/org/apache/zookeeper/test/QuorumHammerTest.java +++ b/src/java/test/org/apache/zookeeper/test/QuorumHammerTest.java @@ -22,11 +22,11 @@ import org.junit.Before; import org.junit.Test; public class QuorumHammerTest extends QuorumBase { - private static final Logger LOG = Logger.getLogger(QuorumHammerTest.class); + protected static final Logger LOG = Logger.getLogger(QuorumHammerTest.class); public static final long CONNECTION_TIMEOUT = ClientTest.CONNECTION_TIMEOUT; - private final QuorumBase qb = new QuorumBase(); - private final ClientHammerTest cht = new ClientHammerTest(); + protected final QuorumBase qb = new QuorumBase(); + protected final ClientHammerTest cht = new ClientHammerTest(); @Before @Override -- GitLab