提交 2312a6db authored 作者: zhengfg's avatar zhengfg

1、爬虫加入翻译

上级 277a4cf2
......@@ -6,6 +6,7 @@ import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.SpiderUtil;
import com.diaoyun.zion.master.util.TranslateHelper;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -24,7 +25,7 @@ public class AfricaShopItemSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(AfricaShopItemSpider.class);
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, InterruptedException, ExecutionException, TimeoutException {
JSONObject resultObj;
//获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.AfriEshop.getValue());
......@@ -33,6 +34,8 @@ public class AfricaShopItemSpider implements IItemSpider {
//格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatAfricaShopProductResponse(resultObj);
resultObj = JSONObject.fromObject(productResponse);
//翻译
TranslateHelper.translateProductResponse(resultObj);
return resultObj;
}
......
......@@ -46,30 +46,8 @@ public class GapItemSpider implements IItemSpider {
//格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatGapProductResponse(resultObj.getJSONObject("data"));
resultObj=JSONObject.fromObject(productResponse);
//////////////////////翻译//////////////////////
List<Map<String, Object>> futureList= new ArrayList<>();
Map<String, JSONArray> productPropSet = resultObj.getJSONObject("productPropSet");
if(!productPropSet.isEmpty()) {
//调用腾讯ai,翻译属性值
TranslateHelper.translateProp(futureList,productPropSet);
//翻译属性名
//翻译属性名
JSONArray translateArray =productPropSet.get("translateArray");
for(int i=0;i<translateArray.size();i++) {
Map <String,Object> keyTranslateMap=translateArray.getJSONObject(i);
for(Map.Entry<String,Object> entry:keyTranslateMap.entrySet()) {
String key=entry.getKey();
//翻译属性名
if(ValidateUtils.isContainChinese(key)) {
TranslateHelper.translateText(futureList,keyTranslateMap,key);
}
}
}
}
//等待翻译结果
TranslateHelper.waitForResult(futureList);
resultObj.put("productPropSet",productPropSet);
//////////////////////翻译 END//////////////////////
//翻译
TranslateHelper.translateProductResponse(resultObj);
}
return resultObj;
......
......@@ -10,6 +10,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* nike数据爬虫
......@@ -21,7 +23,7 @@ public class NikeItemSpider implements IItemSpider {
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, InterruptedException, ExecutionException, TimeoutException {
JSONObject resultObj;
//获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.NIKE.getValue());
......@@ -30,6 +32,8 @@ public class NikeItemSpider implements IItemSpider {
//格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatNikeProductResponse(resultObj);
resultObj = JSONObject.fromObject(productResponse);
//翻译
TranslateHelper.translateProductResponse(resultObj);
return resultObj;
}
......
......@@ -8,6 +8,7 @@ import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
......@@ -84,6 +85,8 @@ public class TranslateHelper {
//翻译属性值
if(ValidateUtils.isContainChinese((String) productPropMap.get("propName"))) {
TranslateHelper.translateText(futureList,productPropMap, (String) productPropMap.get("propName"));
} else {
productPropMap.put("translate",(String) productPropMap.get("propName"));
}
}
......@@ -92,4 +95,44 @@ public class TranslateHelper {
}
productPropSetMap.put("translateArray",translateArray);
}
/**
* 翻译爬取回来的数据
* @param resultObj ProductResponse格式的JSONObject
* @throws InterruptedException
* @throws ExecutionException
* @throws TimeoutException
*/
public static void translateProductResponse(JSONObject resultObj) throws InterruptedException, ExecutionException, TimeoutException {
//////////////////////翻译//////////////////////
List<Map<String, Object>> futureList= new ArrayList<>();
Map<String, JSONArray> productPropSet = resultObj.getJSONObject("productPropSet");
if(!productPropSet.isEmpty()) {
//调用腾讯ai,翻译属性值
TranslateHelper.translateProp(futureList,productPropSet);
//翻译属性名
//翻译属性名
JSONArray translateArray =productPropSet.get("translateArray");
for(int i=0;i<translateArray.size();i++) {
Map <String,Object> keyTranslateMap=translateArray.getJSONObject(i);
boolean tranlateFlag=false;
String key="";
for(Map.Entry<String,Object> entry:keyTranslateMap.entrySet()) {
key=entry.getKey();
//翻译属性名
if(ValidateUtils.isContainChinese(key)) {
tranlateFlag=true;
TranslateHelper.translateText(futureList,keyTranslateMap,key);
}
}
if(!tranlateFlag) {
keyTranslateMap.put("translate",key);
}
}
}
//等待翻译结果
TranslateHelper.waitForResult(futureList);
resultObj.put("productPropSet",productPropSet);
//////////////////////翻译 END//////////////////////
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论