提交 9f0148fb authored 作者: 张光耀's avatar 张光耀

添加了,Zara,代做

上级 0818a63b
...@@ -2,7 +2,11 @@ package com.diaoyun.zion.chinafrica.bis.impl; ...@@ -2,7 +2,11 @@ package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider; import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum; import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.HttpClientUtil; import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.SpiderUtil;
import com.diaoyun.zion.master.util.TranslateHelper;
import net.sf.json.JSONArray; import net.sf.json.JSONArray;
import net.sf.json.JSONObject; import net.sf.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -29,14 +33,16 @@ public class ZaraSpider implements IItemSpider { ...@@ -29,14 +33,16 @@ public class ZaraSpider implements IItemSpider {
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
JSONObject resultObj;
// 获取url中的网页内容 // 获取url中的网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue());
// 截取主要的 Json 内容 // 截取主要的 Json 内容
String jsonDataStr = getDataJson(content, "dataLayer", ";window.zara.viewPayload") resultObj = JsoupUtil.getZara(content);
.replace("dataLayer = ", ""); ProductResponse productResponse = SpiderUtil.formatNikeProductResponse(resultObj);
// 转换为 Json 格式 resultObj = JSONObject.fromObject(productResponse);
JSONObject jsonObject = JSONObject.fromObject(jsonDataStr); //翻译
return jsonObject; TranslateHelper.translateProductResponse(resultObj);
return resultObj;
} }
/** /**
...@@ -49,7 +55,7 @@ public class ZaraSpider implements IItemSpider { ...@@ -49,7 +55,7 @@ public class ZaraSpider implements IItemSpider {
private static String getDataJson(String jsonStr, String startStr, String lastStr) { private static String getDataJson(String jsonStr, String startStr, String lastStr) {
int startIndex = jsonStr.indexOf(startStr); int startIndex = jsonStr.indexOf(startStr);
int lastIndex = jsonStr.lastIndexOf(lastStr); int lastIndex = jsonStr.lastIndexOf(lastStr);
return jsonStr.substring(startIndex, lastIndex); return jsonStr.substring(startIndex, lastIndex);
} }
public static void main(String[] args) throws IOException, URISyntaxException { public static void main(String[] args) throws IOException, URISyntaxException {
......
...@@ -51,6 +51,12 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -51,6 +51,12 @@ public class SpiderServiceImpl implements SpiderService {
platformEnum=PlatformEnum.GAP; platformEnum=PlatformEnum.GAP;
} else if(targetUrl.contains("www.nike.com/cn/t/")) { } else if(targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum=PlatformEnum.NIKE; platformEnum=PlatformEnum.NIKE;
} else if(targetUrl.contains("www.afri-eshop.com")&&targetUrl.contains("/products/")) {
platformEnum=PlatformEnum.AfriEshop;
} else if (targetUrl.contains("zara.cn")) {
platformEnum = platformEnum.ZARA;
} else if (targetUrl.contains("uniqlo.cn/product-detail.html")) {
platformEnum = platformEnum.UNIQLO;
} }
return platformEnum; return platformEnum;
} }
......
...@@ -224,6 +224,19 @@ public class JsoupUtil { ...@@ -224,6 +224,19 @@ public class JsoupUtil {
return dataMap; return dataMap;
} }
/**
*
* @param content
* @return
*/
public static JSONObject getZara(String content) {
int labelHeadIndex = content.indexOf("dataLayer");
int labelTailIndex = content.lastIndexOf(";window.zara.viewPayload");
String abv = content.substring(labelHeadIndex, labelTailIndex).replace("dataLayer = ", "");
JSONObject dataMap= JSONObject.fromObject(abv);
return dataMap;
}
/** /**
* 根据script id获取内容 * 根据script id获取内容
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论