`
learnworld
  • 浏览: 168309 次
  • 性别: Icon_minigender_1
  • 来自: 南京
社区版块
存档分类
最新评论

多线程批量检测未注册域名

    博客分类:
  • java
阅读更多

最近想注册一个域名,使用万网尝试了很多域名,基本都已被注册。之前听说双拼域名很火,所以想写个脚本,看看哪些双拼域名还未被注册。

 

一、查询接口

网上搜索了一下,万网的域名查询接口比较简单易用,查询URL格式为: http://panda.www.net.cn/cgi-bin/check.cgi?area_domain=aaa.com

返回值及含义:

210 : Domain name is available
211 : Domain name is not available
212 : Domain name is invalid
214 : Unknown error

 

二、编程思路

1. DomainGenerator读取文件pinyin.txt,获取所有可用的拼音字母。遍历拼音字母, 组装成双拼域名。这个拼音列表是从网上搜索来的,可能会有纰漏。

2. 创建域名检测线程DomainRunner,每个线程采用httpclient调用万网的域名查询接口。

3. 每个线程调用DomainValidator检查返回结果。

4. 线程ResultRunner将可用域名写入domain.txt文件。

 

三、核心代码

DomainGenerator.java, 启动类,读取拼音列表,组装需要检测的域名,创建检测线程和结果处理线程。

 

package com.learnworld;

import java.util.List;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;

public class DomainGenerator {

	public static void main(String[] args){
	    // pinyin list, read from pinyin.txt
		List<String> items = new ArrayList<String>();
		// domain list, which need to check
	    ArrayBlockingQueue<String> taskQueue = new ArrayBlockingQueue<String>(163620);
	    // available domain list, which need to save into file
	    LinkedBlockingQueue<String> resultQueue = new LinkedBlockingQueue<String>();
	    // counter, need to count unavailable domain statistical information
	    AtomicInteger count = new AtomicInteger(0);
	    
	    // Httpclient initialization
	    PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        cm.setMaxTotal(20);
        cm.setDefaultMaxPerRoute(20);
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
	    
		try {
		    // pinyin.txt, used to save all available pinyin
            BufferedReader reader = new BufferedReader(new FileReader("pinyin.txt"));
            // domain.txt, used to save all available domain result
            BufferedWriter writer = new BufferedWriter(new FileWriter("domain.txt"));
            
            String item = null;
            while((item = reader.readLine()) != null){
              items.add(item);
            }

            // generate domain list
            for (String item1 : items){
                for (String item2 : items) {
                    taskQueue.offer(item1 + item2 + ".com");
                }
            }
			
            int domainThreadNum = 3;
            CountDownLatch downLatch = new CountDownLatch(domainThreadNum);
			ExecutorService executor = Executors.newFixedThreadPool(domainThreadNum + 1); 
			
			// start domain check thread
			for(int i = 0; i < domainThreadNum; i++){
				executor.execute(new DomainRunner(taskQueue, resultQueue, downLatch, count, httpClient));
			}
			
			// start result handle thread
			executor.execute(new ResultRunner(resultQueue, writer));
			
			downLatch.await();
			System.out.println("All tasks are done!");
			
			// TODO, suggest use volatile flag to control ResultRunner
			executor.shutdownNow();
			
			reader.close();
			writer.close();
			httpClient.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
	}
}

 DomainRunner:域名检测线程,从域名domainQueue中读取域名,调用接口进行检测。 如果域名可用,将结果放入resultQueue中等待写入文件。

package com.learnworld;

import java.io.IOException;
import java.util.Calendar;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;

public class DomainRunner implements Runnable {

	private ArrayBlockingQueue<String> domainQueue;
	private LinkedBlockingQueue<String> resultQueue;
	private CountDownLatch downLatch;
	private AtomicInteger count;
	private CloseableHttpClient httpClient;
	
	public DomainRunner(ArrayBlockingQueue<String> domainQueue,
			LinkedBlockingQueue<String> resultQueue, CountDownLatch downLatch,
			AtomicInteger count, CloseableHttpClient httpClient) {
		super();
		this.domainQueue = domainQueue;
		this.resultQueue = resultQueue;
		this.downLatch = downLatch;
		this.count = count;
		this.httpClient = httpClient;
	}

	@Override
	public void run() {
		String domain = null;
		while ((domain = domainQueue.poll()) != null) {
				boolean isDomainAvailable = false;
		        
		        RequestConfig requestConfig = RequestConfig.custom()
		                .setSocketTimeout(5000)
		                .setConnectTimeout(5000)
		                .setConnectionRequestTimeout(5000)
		                .build();
		        
				HttpGet httpGet = new HttpGet("http://panda.www.net.cn/cgi-bin/check.cgi?area_domain=" + domain);
				httpGet.setConfig(requestConfig);
				httpGet.setHeader("Connection", "close");
				HttpContext context = new BasicHttpContext();
				CloseableHttpResponse response = null;
				try {
					response = httpClient.execute(httpGet, context);
					HttpEntity entity = response.getEntity();
		            int status = response.getStatusLine().getStatusCode();
		            if (status >= 200 && status < 300) {
		            	String resultXml = EntityUtils.toString(entity);
		            	isDomainAvailable = DomainValidator.isAvailableDomainForResponse(resultXml);
		            	EntityUtils.consumeQuietly(entity); 
		            } else {
		            	System.out.println(domain + " check error.");
		            }
				} catch (Exception e) {
				    e.printStackTrace();
				} finally {
					try {
						httpGet.releaseConnection();
						if (response != null) {
							response.close();
						}
						
					} catch (IOException e) {
						e.printStackTrace();
					}		
				}
			
			// result handle
			if(isDomainAvailable) {
				resultQueue.offer(domain);
			} else {
				int totalInvalid = count.addAndGet(1);
				if (totalInvalid % 100 == 0) {
					System.out.println(totalInvalid + " " + Calendar.getInstance().getTime());
				}
			}
		}
		
		downLatch.countDown();
		
	}
	
}

 

DomainValidator: 对万网返回结果进行检查,判断域名是否可用。

package com.learnworld;

public class DomainValidator {

	public static boolean isAvailableDomainForResponse(String responseXml){
		if(responseXml == null || responseXml.isEmpty()){
			return false;
		}
		
		if(responseXml.contains("<original>210")){
			return true;
		} else if(responseXml.contains("<original>211") 
		          || responseXml.contains("<original>212")
		          || responseXml.contains("<original>214")){
			return false;
		} else {
		    System.out.println("api callback error!");
		    try {
                Thread.sleep(60000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
		    
		    return false;
		}
	}
	
}

 

ResultRunner: 结果处理线程,将可用域名写入文件domain.txt中。

package com.learnworld;

import java.io.BufferedWriter;
import java.util.concurrent.LinkedBlockingQueue;

public class ResultRunner implements Runnable{

	private LinkedBlockingQueue<String> resultQueue;
	BufferedWriter writer;
	
	public ResultRunner(LinkedBlockingQueue<String> resultQueue,
			BufferedWriter writer) {
		super();
		this.resultQueue = resultQueue;
		this.writer = writer;
	}

	@Override
	public void run() {
		String result = null;
		try {
			while ((result = resultQueue.take()) != null) {
				writer.write(result);
				writer.newLine();
				writer.flush();			
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		
	}

}

 

 

四、总结

1. 第一版程序采用单线程处理,性能很差,每100个域名大概需要90s左右,主要原因是网络IO延迟比较大。将代码修改为多线程处理,创建两个检测线程,每100个域名大概需要30s左右。

 

2. 提高检测线程数会加快处理性能,但建议不超过三个,原因有两个:

1) 万网采用了阿里云的过滤技术,如果一段时间内某个IP的请求数很高,就会将该IP加入屏蔽列表。 我开始采用了100个线程,不到1分钟就被屏蔽。

2)当请求数很高时,网络连接不能得到及时释放,很多TCP连接处于TIME_WAIT状态,进而出现BindException错误。

 

3. 我遍历了所有的双拼域名,目前约有1万个域名尚未被注册,结果见附件。我又遍历了四位及以下的纯英文字母域名,已经全部被注册。

 

需要注册双拼域名的童鞋要抓紧了~~

 

 

分享到:
评论
2 楼 Jindev 2016-11-09  
 
1 楼 aliensb 2016-04-06  
api callback error!是怎么回事,谢谢

相关推荐

Global site tag (gtag.js) - Google Analytics