提交 ccc87b22 authored 作者: zhengfg's avatar zhengfg

1、增加 afri-eshop 自营商城爬虫

上级 fa12c353
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.SpiderUtil;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* afri-eshop 数据爬虫
*/
@Component("africaShopItemSpider")
public class AfricaShopItemSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(AfricaShopItemSpider.class);
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{
JSONObject resultObj;
//获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.AfriEshop.getValue());
//获取商品相关信息,详情放在<script> 标签里 <script type="application/json" id="ProductJson-product-template">
resultObj = JsoupUtil.getScriptContentById(content, "ProductJson-product-template");
//格式化为封装数据
ProductResponse productResponse = SpiderUtil.formatAfricaShopProductResponse(resultObj);
resultObj = JSONObject.fromObject(productResponse);
return resultObj;
}
}
...@@ -34,7 +34,7 @@ public class GapItemSpider implements IItemSpider { ...@@ -34,7 +34,7 @@ public class GapItemSpider implements IItemSpider {
private static final String gapUrl="https://apicn.gap.cn/gap/store/product/list/searchProductByCondition.do"; private static final String gapUrl="https://apicn.gap.cn/gap/store/product/list/searchProductByCondition.do";
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws IOException {
JSONObject resultObj; JSONObject resultObj;
//获取链接中的商品spuCode //获取链接中的商品spuCode
String itemId= getItemId(targetUrl); String itemId= getItemId(targetUrl);
......
...@@ -27,7 +27,7 @@ public class NikeItemSpider implements IItemSpider { ...@@ -27,7 +27,7 @@ public class NikeItemSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(NikeItemSpider.class); private static Logger logger = LoggerFactory.getLogger(NikeItemSpider.class);
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException{
JSONObject resultObj; JSONObject resultObj;
//获取url中的网页内容 > //获取url中的网页内容 >
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.NIKE.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.NIKE.getValue());
......
...@@ -14,7 +14,8 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> { ...@@ -14,7 +14,8 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> {
TM("天猫", "tm"), TM("天猫", "tm"),
GAP("GAP", "gap"), GAP("GAP", "gap"),
NIKE("NIKE", "nike"), NIKE("NIKE", "nike"),
UN("未知", "un"); UN("未知", "un"),
AfriEshop("afri-eshop","afri-eshop" );
private String label; private String label;
private String value; private String value;
......
...@@ -28,6 +28,10 @@ public class ItemSpiderFactory { ...@@ -28,6 +28,10 @@ public class ItemSpiderFactory {
iItemSpider= (IItemSpider) SpringContextUtil.getBean("nikeItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("nikeItemSpider");
break; break;
} }
case "afri-eshop":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("africaShopItemSpider");
break;
}
default:{ default:{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("emptyItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("emptyItemSpider");
break; break;
......
...@@ -51,6 +51,8 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -51,6 +51,8 @@ public class SpiderServiceImpl implements SpiderService {
platformEnum=PlatformEnum.GAP; platformEnum=PlatformEnum.GAP;
} else if(targetUrl.contains("www.nike.com/cn/t/")) { } else if(targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum=PlatformEnum.NIKE; platformEnum=PlatformEnum.NIKE;
} else if(targetUrl.contains("www.afri-eshop.com")&&targetUrl.contains("/products/")) {
platformEnum=PlatformEnum.AfriEshop;
} }
return platformEnum; return platformEnum;
} }
......
...@@ -125,7 +125,7 @@ public class HttpClientUtil { ...@@ -125,7 +125,7 @@ public class HttpClientUtil {
headerList.add(new BasicHeader(HttpHeaders.CACHE_CONTROL, "max-age=0")); headerList.add(new BasicHeader(HttpHeaders.CACHE_CONTROL, "max-age=0"));
headerList.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive")); headerList.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive"));
headerList.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-CN,zh;q=0.9")); headerList.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-CN,zh;q=0.9"));
headerList.add(new BasicHeader(HttpHeaders.CONTENT_TYPE, "charset=utf-8")); headerList.add(new BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json;charset=utf-8"));
//cookie //cookie
CookieStore cookieStore = new BasicCookieStore(); CookieStore cookieStore = new BasicCookieStore();
return HttpClients.custom().setDefaultHeaders(headerList).setDefaultCookieStore(cookieStore).build(); return HttpClients.custom().setDefaultHeaders(headerList).setDefaultCookieStore(cookieStore).build();
......
...@@ -225,5 +225,18 @@ public class JsoupUtil { ...@@ -225,5 +225,18 @@ public class JsoupUtil {
} }
/**
* 根据script id获取内容
* @param content
* @param id
* @return
*/
public static JSONObject getScriptContentById(String content, String id) {
Document document = Jsoup.parse(content);
Element element = document.getElementById(id);
String dataStr=element.data();
JSONObject dataMap= JSONObject.fromObject(dataStr);
return dataMap;
}
} }
...@@ -263,4 +263,98 @@ public class SpiderUtil { ...@@ -263,4 +263,98 @@ public class SpiderUtil {
productResponse.setDynStock(dynStock); productResponse.setDynStock(dynStock);
return productResponse; return productResponse;
} }
/**
* 格式化 afric-eshop 返回数据
*
* @param resultObj
* @return
*/
public static ProductResponse formatAfricaShopProductResponse(JSONObject resultObj) {
ProductResponse productResponse = new ProductResponse();
//原始价
List<OriginalPrice> originalPriceList = new ArrayList<>();
//促销价格 TODO
List<ProductPromotion> promotionList = new ArrayList<>();
//库存
DynStock dynStock = new DynStock();
//其实数据没有包含确切的库存数,这里默认给足量的库存
dynStock.setSellableQuantity(9999);
//nike 基本是 颜色、尺码属性
Map<String, Set<ProductProp>> productPropSet = new HashMap<>();
//商品基本信息
ItemInfo itemInfo = new ItemInfo();
JSONArray variantsArray = resultObj.getJSONArray("variants");
//属性
JSONArray optionsArray = resultObj.getJSONArray("options");
for (int i = 0; i < variantsArray.size(); i++) {
//属性
JSONArray itemOptionsArray = variantsArray.getJSONObject(i).getJSONArray("options");
//没有属性的时候,会返回 Default Title
if("Default Title".equalsIgnoreCase(itemOptionsArray.getString(0))) {
break;
}
String skuStr = ";";
for (int m = 0; m < itemOptionsArray.size(); m++) {
skuStr = skuStr + KeyConstant.CUSTOMIZE_ID + itemOptionsArray.getString(m) + ";";
}
///////////////////原始价////////////////////////////////////
OriginalPrice originalPrice = new OriginalPrice();
String price = variantsArray.getJSONObject(i).getString("price");
originalPrice.setPrice(price);
originalPrice.setSkuStr(skuStr);
originalPriceList.add(originalPrice);
///////////////////原始价 END////////////////////////////////
////////////////////////////////////获取库存 ////////////////////////////////////////////
productResponse.setStockFlag(true);
List<ProductSkuStock> productSkuStockList = dynStock.getProductSkuStockList();
if (productSkuStockList == null) {
productSkuStockList = new ArrayList<>();
}
ProductSkuStock productSkuStock = new ProductSkuStock();
productSkuStock.setSellableQuantity(999);
productSkuStock.setSkuStr(skuStr);
productSkuStockList.add(productSkuStock);
dynStock.setProductSkuStockList(productSkuStockList);
////////////////////////////////////获取库存 END////////////////////////////////////////////
//获取所有的属性
for (int j = 0; j < optionsArray.size(); j++) {
////////////////////////////////////获取商品属性////////////////////////////////////////////
//商品属性
Set<ProductProp> propSet = new HashSet<>();
ProductProp productProp = new ProductProp();
productProp.setPropId(KeyConstant.CUSTOMIZE_ID + itemOptionsArray.getString(j));
productProp.setPropName(itemOptionsArray.getString(j));
propSet.add(productProp);
if (productPropSet.get(optionsArray.getString(j)) == null) {
productPropSet.put(optionsArray.getString(j), propSet);
} else {
Set<ProductProp> oldPropSet = productPropSet.get(optionsArray.getString(j));
propSet.addAll(oldPropSet);
productPropSet.put(optionsArray.getString(j), propSet);
}
////////////////////////////////////获取属性 END////////////////////////////////////////////
}
}
itemInfo.setItemId(resultObj.getString("id"));
//取第一张
itemInfo.setPic(resultObj.getString("featured_image"));
itemInfo.setShopName(PlatformEnum.AfriEshop.getValue());
itemInfo.setShopUrl("https://www.afri-eshop.com/");
itemInfo.setTitle(resultObj.getString("title"));
productResponse.setPropFlag(true);
productResponse.setProductPropSet(productPropSet);
productResponse.setPlatform(PlatformEnum.AfriEshop.getValue());
productResponse.setPromotionList(promotionList);
productResponse.setOriginalPriceList(originalPriceList);
productResponse.setItemInfo(itemInfo);
productResponse.setDynStock(dynStock);
productResponse.setPrice(resultObj.getString("price"));
return productResponse;
}
} }
{
"id": 4046724890721,
"title": "Cotton Core Pillow Core A set of three pieces",
"handle": "cotton-core-pillow-core-a-set-of-three-pieces",
"description": "\u003cp\u003eProduct category: cushion core \/ pillow core \/ seat core\u003c\/p\u003e\n\u003cp\u003eProduct material: chemical fiber, pp cotton\u003c\/p\u003e\n\u003cp\u003eFiller: fiber\u003c\/p\u003e\n\u003cp\u003eStyle: modern simplicity\u003c\/p\u003e",
"published_at": "2019-09-12T11:27:55+08:00",
"created_at": "2019-09-12T11:33:50+08:00",
"vendor": "MollyKitty",
"type": "Afri Home",
"tags": ["Bedroom", "Livingroom"],
"price": 199,
"price_min": 199,
"price_max": 599,
"available": true,
"price_varies": true,
"compare_at_price": null,
"compare_at_price_min": 0,
"compare_at_price_max": 0,
"compare_at_price_varies": false,
"variants": [{
"id": 29948484649057,
"title": "60*60cm \/ 3",
"option1": "60*60cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 60*60cm \/ 3",
"public_title": "60*60cm \/ 3",
"options": ["60*60cm", "3"],
"price": 399,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484681825,
"title": "70*70cm \/ 3",
"option1": "70*70cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 70*70cm \/ 3",
"public_title": "70*70cm \/ 3",
"options": ["70*70cm", "3"],
"price": 599,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484714593,
"title": "50*50cm \/ 3",
"option1": "50*50cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 50*50cm \/ 3",
"public_title": "50*50cm \/ 3",
"options": ["50*50cm", "3"],
"price": 299,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484747361,
"title": "45*45cm \/ 3",
"option1": "45*45cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 45*45cm \/ 3",
"public_title": "45*45cm \/ 3",
"options": ["45*45cm", "3"],
"price": 199,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484780129,
"title": "65*65cm \/ 3",
"option1": "65*65cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 65*65cm \/ 3",
"public_title": "65*65cm \/ 3",
"options": ["65*65cm", "3"],
"price": 499,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484812897,
"title": "55*55cm \/ 3",
"option1": "55*55cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 55*55cm \/ 3",
"public_title": "55*55cm \/ 3",
"options": ["55*55cm", "3"],
"price": 299,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484845665,
"title": "30*45cm \/ 3",
"option1": "30*45cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 30*45cm \/ 3",
"public_title": "30*45cm \/ 3",
"options": ["30*45cm", "3"],
"price": 299,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484878433,
"title": "35*60cm \/ 3",
"option1": "35*60cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 35*60cm \/ 3",
"public_title": "35*60cm \/ 3",
"options": ["35*60cm", "3"],
"price": 299,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}, {
"id": 29948484911201,
"title": "30*50cm \/ 3",
"option1": "30*50cm",
"option2": "3",
"option3": null,
"sku": "",
"requires_shipping": true,
"taxable": true,
"featured_image": null,
"available": true,
"name": "Cotton Core Pillow Core A set of three pieces - 30*50cm \/ 3",
"public_title": "30*50cm \/ 3",
"options": ["30*50cm", "3"],
"price": 199,
"weight": 300,
"compare_at_price": null,
"inventory_management": "shopify",
"barcode": ""
}],
"images": ["\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q1.jpg?v=1568259234", "\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q2.jpg?v=1568259235", "\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q3.jpg?v=1568259236", "\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q4.jpg?v=1568259238", "\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q5.jpg?v=1568259239"],
"featured_image": "\/\/cdn.shopify.com\/s\/files\/1\/0079\/8330\/0705\/products\/q1.jpg?v=1568259234",
"options": ["Size", "Quantity"],
"content": "\u003cp\u003eProduct category: cushion core \/ pillow core \/ seat core\u003c\/p\u003e\n\u003cp\u003eProduct material: chemical fiber, pp cotton\u003c\/p\u003e\n\u003cp\u003eFiller: fiber\u003c\/p\u003e\n\u003cp\u003eStyle: modern simplicity\u003c\/p\u003e"
}
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论