Docs/linux基础/优化Haproxy/6.html
2022-10-18 16:59:37 +08:00

235 lines
No EOL
9.5 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="tool" content="leanote-desktop-app">
<title>6</title>
<style>
*{font-family:"lucida grande","lucida sans unicode",lucida,helvetica,"Hiragino Sans GB","Microsoft YaHei","WenQuanYi Micro Hei",sans-serif;}
body {
margin: 0;
}
/*公用文字样式*/
h1{font-size:30px}h2{font-size:24px}h3{font-size:18px}h4{font-size:14px}
.note-container{
width:850px;
margin:auto;
padding: 10px 20px;
box-shadow: 1px 1px 10px #eee;
}
#title {
margin: 0;
}
table {
margin-bottom: 16px;
border-collapse: collapse;
}
table th, table td {
padding: 6px 13px;
border: 1px solid #ddd;
}
table th {
font-weight: bold;
}
table tr {
background-color: none;
border-top: 1px solid #ccc;
}
table tr:nth-child(2n) {
background-color: rgb(247, 247, 249);
}
.mce-item-table, .mce-item-table td, .mce-item-table th, .mce-item-table caption {
border: 1px solid #ddd;
border-collapse: collapse;
padding: 6px 13px;
}
blockquote {
border-left-width:10px;
background-color:rgba(128,128,128,0.05);
border-top-right-radius:5px;
border-bottom-right-radius:5px;
padding:15px 20px;
border-left:5px solid rgba(128,128,128,0.075);
}
blockquote p {
margin-bottom:1.1em;
font-size:1em;
line-height:1.45
}
blockquote ul:last-child,blockquote ol:last-child {
margin-bottom:0
}
pre {
padding: 18px;
background-color: #f7f7f9;
border: 1px solid #e1e1e8;
border-radius: 3px;
display: block;
}
code {
padding: 2px 4px;
font-size: 90%;
color: #c7254e;
white-space: nowrap;
background-color: #f9f2f4;
border-radius: 4px;
}
.footnote {
vertical-align: top;
position: relative;
top: -0.5em;
font-size: .8em;
}
hr {
margin:2em 0
}
img {
max-width:100%
}
pre {
word-break:break-word
}
p,pre,pre.prettyprint,blockquote {
margin:0 0 1.1em
}
hr {
margin:2em 0
}
img {
max-width:100%
}
.sequence-diagram,.flow-chart {
text-align:center;
margin-bottom:1.1em
}
.sequence-diagram text,.flow-chart text {
font-size:15px !important;
font-family:"Source Sans Pro",sans-serif !important
}
.sequence-diagram [fill="#ffffff"],.flow-chart [fill="#ffffff"] {
fill:#f6f6f6
}
.sequence-diagram [stroke="#000000"],.flow-chart [stroke="#000000"] {
stroke:#3f3f3f
}
.sequence-diagram text[stroke="#000000"],.flow-chart text[stroke="#000000"] {
stroke:none
}
.sequence-diagram [fill="#000"],.flow-chart [fill="#000"],.sequence-diagram [fill="#000000"],.flow-chart [fill="#000000"],.sequence-diagram [fill="black"],.flow-chart [fill="black"] {
fill:#3f3f3f
}
ul,ol {
margin-bottom:1.1em
}
ul ul,ol ul,ul ol,ol ol {
margin-bottom:1.1em
}
kbd {
padding:.1em .6em;
border:1px solid rgba(63,63,63,0.25);
-webkit-box-shadow:0 1px 0 rgba(63,63,63,0.25);
box-shadow:0 1px 0 rgba(63,63,63,0.25);
font-size:.7em;
font-family:sans-serif;
background-color:#fff;
color:#333;
border-radius:3px;
display:inline-block;
margin:0 .1em;
white-space:nowrap
}
.toc ul {
list-style-type:none;
margin-bottom:15px
}
</style>
<!-- 该css供自定义样式 -->
<link href="../leanote-html.css" rel="stylesheet">
</head>
<body>
<div class="note-container">
<h1 class="title" id="leanote-title">6</h1>
<div class="content-html" id="leanote-content"><h2>里程碑 #2</h2><p>我们获得了不同机器配置下的每秒请求数上限现在我们仅剩下前面提到的一个任务达到生产环境的3倍负载</p><ul><li>每秒请求数900</li><li>TCP连接数2.1m</li><li>网络带宽30MB/s</li></ul><p>我们再次在达到220k个TCP连接数上受阻。无论怎么设置休眠时间TCP连接数就是无法再上升。</p><p>让我们来计算下220k个TCP连接和每秒请求数900110,000 / 900 ~= 120秒。这里使用110k因为220k个连接同时包含了输入和输出既双向总数。</p><p>这让我们怀疑2分钟是系统某处的一个限制通过查看HAProxy日志可以验证日志中大部分连接的总耗时都在120000毫秒。</p><pre>Mar 23 13:24:24 localhost haproxy[53750]: 172.168.0.232:48380
[23/Mar/2017:13:22:22.686] api~ api-backend/http31 39/0/2062/-1/122101
-1 0 - - SD-- 1714/1714/1678/35/0 0/0 {0,"",""}
"POST /ping HTTP/1.1"</pre><p>其中122101是总处理时长。日志中所有字段详细值参见HAProxy文档。</p><p>经过进一步研究我们发现Node.js有2分钟的默认超时时间。</p><p>具体信息参见下面一些资料:</p><ul><li><a href="http://stackoverflow.com/questions/23925284/how-to-modify-the-nodejs-request-default-timeout-time" data-mce-href="http://stackoverflow.com/questions/23925284/how-to-modify-the-nodejs-request-default-timeout-time">如何修改Node.js请求默认超时时间</a></li><li><a href="https://nodejs.org/api/http.html#http_server_settimeout_msecs_callback" data-mce-href="https://nodejs.org/api/http.html#http_server_settimeout_msecs_callback">Node.js Http server文档</a></li></ul><p>解决了超时时间之后事情并没有想象中的顺利。当连接数达到1.3m个时HAProxy连接数突然下降到0,然后再次开始上升。通过<a href="http://www.linfo.org/dmesg.html" data-mce-href="http://www.linfo.org/dmesg.html">dmesg</a>命令查看内核日志之后发现该现象是系统内存不足造成的。通过更换成16核64GB内存并设置<code>nbproc = 3</code>之后最终达到了2.4m个连接。</p><h2>后端代码</h2><p>下面是HAProxy后端服务的源码。我们在代码中使用了statsd库以获取服务端每秒请求数。</p><pre class=" language-nolanguage">var http = require('http');
var createStatsd = require('uber-statsd-client');
qs = require('querystring');
var sdc = createStatsd({
host: '172.168.0.134',
port: 8125
});
var argv = process.argv;
var port = argv[2];
function randomIntInc (low, high)
{
return Math.floor(Math.random() * (high - low + 1) + low);
}
function sendResponse(res,times, old_sleep)
{
res.write('pong');
if(times==0)
{
res.end();
}
else
{
sleep = randomIntInc(0, old_sleep+1);
setTimeout(sendResponse, sleep, res,times-1, old_sleep);
}
}
var server = http.createServer(function(req, res)
{
headers = req.headers;
old_sleep = parseInt(headers["sleep"]);
times = headers["times"] || 0;
sleep = randomIntInc(0, old_sleep+1);
console.log(sleep);
sdc.increment("ssl.server.http");
res.writeHead(200);
setTimeout(sendResponse, sleep, res, times, old_sleep)
});
server.timeout = 3600000;
server.listen(port);</pre><p>同时我们还有一个小脚本来运行多个后端服务。整个测试中我们使用了8台服务器每台服务器上运行了10个后端服务进程以避免后端服务称为压力测试的瓶颈。</p><pre>counter=0
while [ $counter -le 9 ]
do
port=$((8282+$counter))
nodejs /opt/local/share/test-tools/HikeCLI/nodeclient/httpserver.js $port &amp;
echo "Server created on port " $port
((counter++))
done
echo "Created all servers"</pre><h2>客户端代码</h2><p>对于客户端每个IP有63k个TCP连接的限制。如果对此不了解参见本系列的<a href="https://medium.freecodecamp.com/load-testing-haproxy-part-2-4c8677780df6" data-mce-href="https://medium.freecodecamp.com/load-testing-haproxy-part-2-4c8677780df6">前面一篇文章</a></p><p>因此为了达到2.4m个连接双向连接对于客户端来说要发起1.2m个连接我们需要大约20台机器。在所有20台机器上同时运行Vegeta命令非常痛苦即使使用了类似<a href="https://github.com/brockgr/csshx" data-mce-href="https://github.com/brockgr/csshx">csshx工具</a>仍然需要从所有Vegeta合并最终测试结果。</p><p>脚本如下:</p><pre class="brush:sh">result_file=$1
declare -a machines=("172.168.0.138" "172.168.0.141
" "172.168.0.142" "172.168.0.18" "
172.168.0.5" "172.168.0.122" "172.168.0.123" "
172.168.0.124" "172.168.0.232" " 172.168.0.24
4" "172.168.0.170" "172.168.0.179" "
172.168.0.59" "172.168.0.68" "172.168.0.137" "
;172.168.0.155" "172.168.0.154" "172.168.0.45" "
172.168.0.136" "172.168.0.143")
bins=""
commas=""
for i in "${machines[@]}"; do bins=$bins","$i".
bin"; commas=$commas","$i; done;
bins=${bins:1}
commas=${commas:1}
pdsh -b -w "$commas" 'echo "POST
http://test.haproxy.in:80/ping" | /home/sachinm/.linuxbrew/bin/vegeta -cpus=32
attack -connections=1000000 -header="sleep:20" -header="
times:2" -body=post_smaller.txt -timeout=2h -rate=3000 -workers=
500 &gt; ' $result_file for i in "${machines[@]}"; do scp sachinm
@$i:/home/sachinm/$result_file $i.bin ; done;
vegeta report -inputs="$bins"</pre><p>幸好这里使用了<a href="https://github.com/grondo/pdsh" data-mce-href="https://github.com/grondo/pdsh">pdsh工具</a>使得我们能够在多台远程服务器上并行的执行命令。同时Vegeta也提供了结果合并功能这也是我们急需的。</p><h2>HAProxy配置</h2><p>本节大概是读者最希望了解的内容下面是我们在压力测试场景中使用的HAProxy配置。其中最重要的部分是<code>nbproc</code><code>maxconn</code>设置。其中<code>maxconn</code>设置允许HAProxy能够支持我们期望达到的TCP连接数。</p><p><code>maxconn</code>&nbsp;设置会影响HAProxy进程的ulimit例如</p><p>&nbsp;<img id="__LEANOTE_D_IMG_1505976168009" src="6_files/59c3d21bd01cce1c4d00003c.png" alt="" width="550" data-mce-src="/api/file/getImage?fileId=59c3d21bd01cce1c4d00003c"></p><p>最大文件打开数设置到4m因为HAProxy的最大连接数设置成了2m。干净利落</p><p><br></p></div>
</div>
<!-- 该js供其它处理 -->
<script src="../leanote-html.js"></script>
</body>
</html>