提交 4ac0e175 authored 作者: zhengfg's avatar zhengfg

1、gap爬虫加入翻译

上级 ccc87b22
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.*;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.http.message.BasicHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
......@@ -34,7 +24,7 @@ public class GapItemSpider implements IItemSpider {
private static final String gapUrl="https://apicn.gap.cn/gap/store/product/list/searchProductByCondition.do";
@Override
public JSONObject captureItem(String targetUrl) throws IOException {
public JSONObject captureItem(String targetUrl) throws IOException, InterruptedException, ExecutionException, TimeoutException {
JSONObject resultObj;
//获取链接中的商品spuCode
String itemId= getItemId(targetUrl);
......@@ -56,11 +46,38 @@ public class GapItemSpider implements IItemSpider {
//格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatGapProductResponse(resultObj.getJSONObject("data"));
resultObj=JSONObject.fromObject(productResponse);
//////////////////////翻译//////////////////////
List<Map<String, Object>> futureList= new ArrayList<>();
Map<String, JSONArray> productPropSet = resultObj.getJSONObject("productPropSet");
if(!productPropSet.isEmpty()) {
//调用腾讯ai,翻译属性值
TranslateHelper.translateProp(futureList,productPropSet);
//翻译属性名
//翻译属性名
JSONArray translateArray =productPropSet.get("translateArray");
for(int i=0;i<translateArray.size();i++) {
Map <String,Object> keyTranslateMap=translateArray.getJSONObject(i);
for(Map.Entry<String,Object> entry:keyTranslateMap.entrySet()) {
String key=entry.getKey();
//翻译属性名
if(ValidateUtils.isContainChinese(key)) {
TranslateHelper.translateText(futureList,keyTranslateMap,key);
}
}
}
}
//等待翻译结果
TranslateHelper.waitForResult(futureList);
resultObj.put("productPropSet",productPropSet);
//////////////////////翻译 END//////////////////////
}
return resultObj;
}
private String getItemId(String targetUrl) {
String spuCode=targetUrl.substring(targetUrl.lastIndexOf("/")+1);
int firstUnder=spuCode.indexOf("_");
......
......@@ -4,23 +4,15 @@ import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.*;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* Gap数据爬虫
* nike数据爬虫
*/
@Component("nikeItemSpider")
public class NikeItemSpider implements IItemSpider {
......
......@@ -2,9 +2,6 @@ package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.bo.TencentTranslateParam;
import com.diaoyun.zion.master.thread.TaskLimitSemaphore;
import com.diaoyun.zion.master.thread.TranslateCallable;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.TranslateHelper;
......@@ -12,28 +9,17 @@ import com.diaoyun.zion.master.util.ValidateUtils;
import net.sf.json.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeoutException;
/**
......@@ -102,7 +88,7 @@ public class TbItemSpider implements IItemSpider {
* @throws ExecutionException
* @throws InterruptedException
*/
private void translateProp(List<Map<String, Object>> futureList,Map<String, Object> propMap) throws ExecutionException, InterruptedException {
private void translateProp(List<Map<String, Object>> futureList,Map<String, Object> propMap) {
/*腾讯翻译*/
for(Map.Entry<String,Object>entry : propMap.entrySet()) {
String key=entry.getKey();
......@@ -124,7 +110,7 @@ public class TbItemSpider implements IItemSpider {
* @throws ExecutionException
* @throws InterruptedException
*/
private void translateTitle(List<Map<String, Object>> futureList,Map<String, Object> skuMap) throws ExecutionException, InterruptedException {
private void translateTitle(List<Map<String, Object>> futureList,Map<String, Object> skuMap) {
for(Map.Entry<String,Object>entry : skuMap.entrySet()) {
String key=entry.getKey();
if(entry.getValue() instanceof Map) {
......
......@@ -3,6 +3,8 @@ package com.diaoyun.zion.master.util;
import com.diaoyun.zion.master.bo.TencentTranslateParam;
import com.diaoyun.zion.master.thread.TaskLimitSemaphore;
import com.diaoyun.zion.master.thread.TranslateCallable;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -62,4 +64,32 @@ public class TranslateHelper {
valeMap.put("translate",targetText);
}
}
/**
* 翻译规格属性 productPropSetMap 为 ProductResponse 的productPropSetMap 属性
* @param futureList 线程回调
* @param productPropSetMap 属性集合Map 为 ProductResponse 的productPropSetMap 属性
*/
public static void translateProp(List<Map<String, Object>> futureList, Map<String,JSONArray> productPropSetMap) {
JSONArray translateArray=new JSONArray();
/*腾讯翻译*/
for(Map.Entry<String, JSONArray>entry : productPropSetMap.entrySet()) {
String key=entry.getKey();
Map <String,Object> keyTranslateMap=new HashMap<>();
keyTranslateMap.put(key,key);
JSONArray productPropSet = entry.getValue();
for(int i=0;i<productPropSet.size();i++) {
JSONObject productPropMap=productPropSet.getJSONObject(i);
//翻译属性值
if(ValidateUtils.isContainChinese((String) productPropMap.get("propName"))) {
TranslateHelper.translateText(futureList,productPropMap, (String) productPropMap.get("propName"));
}
}
translateArray.add(keyTranslateMap);
}
productPropSetMap.put("translateArray",translateArray);
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论