提交 2312a6db authored 作者: zhengfg's avatar zhengfg

1、爬虫加入翻译

上级 277a4cf2
...@@ -6,6 +6,7 @@ import com.diaoyun.zion.chinafrica.vo.ProductResponse; ...@@ -6,6 +6,7 @@ import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.HttpClientUtil; import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil; import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.SpiderUtil; import com.diaoyun.zion.master.util.SpiderUtil;
import com.diaoyun.zion.master.util.TranslateHelper;
import net.sf.json.JSONObject; import net.sf.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -24,7 +25,7 @@ public class AfricaShopItemSpider implements IItemSpider { ...@@ -24,7 +25,7 @@ public class AfricaShopItemSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(AfricaShopItemSpider.class); private static Logger logger = LoggerFactory.getLogger(AfricaShopItemSpider.class);
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{ public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, InterruptedException, ExecutionException, TimeoutException {
JSONObject resultObj; JSONObject resultObj;
//获取url中的网页内容 > //获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.AfriEshop.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.AfriEshop.getValue());
...@@ -33,6 +34,8 @@ public class AfricaShopItemSpider implements IItemSpider { ...@@ -33,6 +34,8 @@ public class AfricaShopItemSpider implements IItemSpider {
//格式化为封装数据 //格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatAfricaShopProductResponse(resultObj); ProductResponse productResponse = SpiderUtil.formatAfricaShopProductResponse(resultObj);
resultObj = JSONObject.fromObject(productResponse); resultObj = JSONObject.fromObject(productResponse);
//翻译
TranslateHelper.translateProductResponse(resultObj);
return resultObj; return resultObj;
} }
......
...@@ -46,30 +46,8 @@ public class GapItemSpider implements IItemSpider { ...@@ -46,30 +46,8 @@ public class GapItemSpider implements IItemSpider {
//格式化为封装数据 //格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatGapProductResponse(resultObj.getJSONObject("data")); ProductResponse productResponse = SpiderUtil.formatGapProductResponse(resultObj.getJSONObject("data"));
resultObj=JSONObject.fromObject(productResponse); resultObj=JSONObject.fromObject(productResponse);
//////////////////////翻译////////////////////// //翻译
List<Map<String, Object>> futureList= new ArrayList<>(); TranslateHelper.translateProductResponse(resultObj);
Map<String, JSONArray> productPropSet = resultObj.getJSONObject("productPropSet");
if(!productPropSet.isEmpty()) {
//调用腾讯ai,翻译属性值
TranslateHelper.translateProp(futureList,productPropSet);
//翻译属性名
//翻译属性名
JSONArray translateArray =productPropSet.get("translateArray");
for(int i=0;i<translateArray.size();i++) {
Map <String,Object> keyTranslateMap=translateArray.getJSONObject(i);
for(Map.Entry<String,Object> entry:keyTranslateMap.entrySet()) {
String key=entry.getKey();
//翻译属性名
if(ValidateUtils.isContainChinese(key)) {
TranslateHelper.translateText(futureList,keyTranslateMap,key);
}
}
}
}
//等待翻译结果
TranslateHelper.waitForResult(futureList);
resultObj.put("productPropSet",productPropSet);
//////////////////////翻译 END//////////////////////
} }
return resultObj; return resultObj;
......
...@@ -10,6 +10,8 @@ import org.slf4j.LoggerFactory; ...@@ -10,6 +10,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.io.IOException; import java.io.IOException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/** /**
* nike数据爬虫 * nike数据爬虫
...@@ -21,7 +23,7 @@ public class NikeItemSpider implements IItemSpider { ...@@ -21,7 +23,7 @@ public class NikeItemSpider implements IItemSpider {
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{ public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, InterruptedException, ExecutionException, TimeoutException {
JSONObject resultObj; JSONObject resultObj;
//获取url中的网页内容 > //获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.NIKE.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.NIKE.getValue());
...@@ -30,6 +32,8 @@ public class NikeItemSpider implements IItemSpider { ...@@ -30,6 +32,8 @@ public class NikeItemSpider implements IItemSpider {
//格式化为封装数据 //格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatNikeProductResponse(resultObj); ProductResponse productResponse = SpiderUtil.formatNikeProductResponse(resultObj);
resultObj = JSONObject.fromObject(productResponse); resultObj = JSONObject.fromObject(productResponse);
//翻译
TranslateHelper.translateProductResponse(resultObj);
return resultObj; return resultObj;
} }
......
...@@ -8,6 +8,7 @@ import net.sf.json.JSONObject; ...@@ -8,6 +8,7 @@ import net.sf.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -84,6 +85,8 @@ public class TranslateHelper { ...@@ -84,6 +85,8 @@ public class TranslateHelper {
//翻译属性值 //翻译属性值
if(ValidateUtils.isContainChinese((String) productPropMap.get("propName"))) { if(ValidateUtils.isContainChinese((String) productPropMap.get("propName"))) {
TranslateHelper.translateText(futureList,productPropMap, (String) productPropMap.get("propName")); TranslateHelper.translateText(futureList,productPropMap, (String) productPropMap.get("propName"));
} else {
productPropMap.put("translate",(String) productPropMap.get("propName"));
} }
} }
...@@ -92,4 +95,44 @@ public class TranslateHelper { ...@@ -92,4 +95,44 @@ public class TranslateHelper {
} }
productPropSetMap.put("translateArray",translateArray); productPropSetMap.put("translateArray",translateArray);
} }
/**
* 翻译爬取回来的数据
* @param resultObj ProductResponse格式的JSONObject
* @throws InterruptedException
* @throws ExecutionException
* @throws TimeoutException
*/
public static void translateProductResponse(JSONObject resultObj) throws InterruptedException, ExecutionException, TimeoutException {
//////////////////////翻译//////////////////////
List<Map<String, Object>> futureList= new ArrayList<>();
Map<String, JSONArray> productPropSet = resultObj.getJSONObject("productPropSet");
if(!productPropSet.isEmpty()) {
//调用腾讯ai,翻译属性值
TranslateHelper.translateProp(futureList,productPropSet);
//翻译属性名
//翻译属性名
JSONArray translateArray =productPropSet.get("translateArray");
for(int i=0;i<translateArray.size();i++) {
Map <String,Object> keyTranslateMap=translateArray.getJSONObject(i);
boolean tranlateFlag=false;
String key="";
for(Map.Entry<String,Object> entry:keyTranslateMap.entrySet()) {
key=entry.getKey();
//翻译属性名
if(ValidateUtils.isContainChinese(key)) {
tranlateFlag=true;
TranslateHelper.translateText(futureList,keyTranslateMap,key);
}
}
if(!tranlateFlag) {
keyTranslateMap.put("translate",key);
}
}
}
//等待翻译结果
TranslateHelper.waitForResult(futureList);
resultObj.put("productPropSet",productPropSet);
//////////////////////翻译 END//////////////////////
}
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论