提交 fda39094 authored 作者: 梁业锦's avatar 梁业锦 💬

已完善Zara与优衣库的爬虫,增加了Massimodutti的爬虫

上级 e768cea5
...@@ -30,11 +30,11 @@ ...@@ -30,11 +30,11 @@
### Gap ### Gap
- 主页:https://www.gap.cn/ - 主页:https://www.gap.cn/
- 命名:gap - 命名:gap
- 爬虫进度:已完成 - 爬虫进度:**已完成**
### Zara ### Zara
- 主页:https://www.zara.cn/cn - 主页:https://www.zara.cn/cn
- 命名:zara - 命名:zara
- 爬虫进度:已完成 - 爬虫进度:**已完成**
### Uniqlo ### Uniqlo
- 主页:https://www.uniqlo.cn/UNIQLO_U19FW_MEN.html - 主页:https://www.uniqlo.cn/UNIQLO_U19FW_MEN.html
- 命名:uniqlo - 命名:uniqlo
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
### Nike ### Nike
- 主页:https://www.nike.com/cn - 主页:https://www.nike.com/cn
- 命名:nike - 命名:nike
- 爬虫进度:已完成 - 爬虫进度:**已完成**
### Adidas ### Adidas
- 主页:https://www.adidas.com.cn/ - 主页:https://www.adidas.com.cn/
- 命名:adidas - 命名:adidas
...@@ -52,7 +52,9 @@ ...@@ -52,7 +52,9 @@
### H&M ### H&M
- 主页:https://www2.hm.com/zh_cn/ - 主页:https://www2.hm.com/zh_cn/
- 命名:hm - 命名:hm
- 爬虫进度: - 爬虫进度:已能获取到数据
- 图片路径处理难度太多
- 商品颜色通过商品详情页的url来区分,暂未找到规律
### LiLy ### LiLy
- 主页:http://www.lily.sh.cn/webapp/wcs/stores/servlet/lilystore - 主页:http://www.lily.sh.cn/webapp/wcs/stores/servlet/lilystore
- 命名:lily - 命名:lily
...@@ -65,16 +67,43 @@ ...@@ -65,16 +67,43 @@
- 主页:http://www.ur.cn/index.html - 主页:http://www.ur.cn/index.html
- 命名:ur - 命名:ur
- 爬虫进度: - 爬虫进度:
### Aber Crombie
- 主页:https://www.abercrombie.cn/zh_CN/home
- 命名:abercrombie
- 爬虫进度:
### Under Armour
- 主页:https://www.underarmour.cn/
- 命名:ur
- 爬虫进度:
### converse
- 主页:https://www.converse.com.cn/
- 命名:converse
- 爬虫进度:
### Ochirly
- 主页:http://www.ochirly.com.cn/SALE/list.shtml
- 命名:ochirly
- 爬虫进度:
### Esprit
- 主页:https://www.esprit.cn/
- 命名:esprit
- 爬虫进度:
### Levi
- 主页:https://www.levi.com.cn/sale#page=3
- 命名:levi
- 爬虫进度:
### Moco
- 主页:https://www.moco.com/moco/zh/c/BS_DISCOUNT
- 命名:moco
- 爬虫进度:
### Massimo Dutti
- 主页:https://www.massimodutti.cn/cn/男装/季末折扣/休闲西装-c1745921.html
- 命名:massimodutti
- 爬虫进度:**已完成**
- 数据来源
- 商品详情:https://www.massimodutti.cn/cn/%E5%A5%B3%E8%A3%85/%E7%B3%BB%E5%88%97/%E8%A1%AC%E8%A1%AB%E5%92%8C%E7%BD%A9%E8%A1%AB/%E8%A1%AC%E8%A1%AB/%E6%BB%91%E9%9B%AA%E9%A3%8E%E7%B3%BB%E5%88%97%E9%A5%B0%E5%8F%A3%E8%A2%8B%E8%A1%AC%E8%A1%AB-c1718602p8730105.html?colorId=420&categoryId=1718602
- 数据接口:https://www.massimodutti.cn/itxrest/2/catalog/store/35009478/30359500/category/0/product/8730105/detail?languageId=-7&appId=1
### 待选爬虫网站: ### 待选爬虫网站:
- 10, ( UR, the most famous fashion brand in china)
- 11,https://www.abercrombie.cn/zh_CN/home (A&F, America brand)
- 12,https://www.underarmour.cn/ (America famous sport brand)
- 13,https://www.converse.com.cn/ (Famous American Canvas Shoes Brand)
- 14,http://www.ochirly.com.cn/SALE/list.shtml (one of the best fashion brand in china)
- 15,https://www.esprit.cn/ (America casual fashion brand)
- 16,https://www.levi.com.cn/sale#page=3 (levis)
- 17,https://www.moco.com/moco/zh/c/BS_DISCOUNT (one of the best fashion brand in china)
- 18,https://www.massimodutti.cn/cn/男装/季末折扣/休闲西装-c1745921.html ( 西班牙品牌) - 18,https://www.massimodutti.cn/cn/男装/季末折扣/休闲西装-c1745921.html ( 西班牙品牌)
- 19,https://china.coach.com/women.html - 19,https://china.coach.com/women.html
- 20,https://www.revolve.com/wrangler/br/57f1a1/?utm_source=baidu&utm_medium=cpc&utm_campaign=intl_P_cn-d-Wrangler (美国轻奢品牌集合网站) - 20,https://www.revolve.com/wrangler/br/57f1a1/?utm_source=baidu&utm_medium=cpc&utm_campaign=intl_P_cn-d-Wrangler (美国轻奢品牌集合网站)
......
package com.diaoyun.zion.chinafrica.bis.impl;
import com.alibaba.druid.support.json.JSONUtils;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* H&M 数据爬虫
*
* @author 爱酱油不爱醋
*/
@Component("hmSpider")
public class HmSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(UniqloSpider.class);
// H&M 详情商品页url
private static final String uniqloUrl = "";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.HM.getValue());
String detailStr = JsoupUtil.getScriptContent(content, "productArticleDetails");
int firstBrackets=detailStr.indexOf("{");
int lastbrackets=detailStr.lastIndexOf("}");
String resultStr = detailStr.substring(firstBrackets,lastbrackets+1);
int firstImage = detailStr.indexOf("'images':[");
int lastImage = detailStr.lastIndexOf("'video':");
detailStr = detailStr.substring(firstImage, lastImage);
resultStr = resultStr.replace(detailStr, "");
JSONObject resultObj = JSONObject.fromObject(resultStr);
return resultObj;
}
}
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.chinafrica.vo.ProductResponse;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.SpiderUtil;
import com.diaoyun.zion.master.util.TranslateHelper;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* Massimo Dutti 数据爬虫
*
* @author 爱酱油不爱醋
*/
@Component("massimoduttiSpider")
public class MassimoduttiSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(ZaraSpider.class);
/**
* Massimo Dutti 商品详情页Url
*/
private static final String MASSIMO_DUTTI_URL = "https://www.massimodutti.cn/cn/";
/**
* Massimo Dutti 数据爬虫
* @see com.diaoyun.zion.chinafrica.service.impl.SpiderServiceImpl#judgeUrlType 修改商品详情页路径
* @see SpiderUtil#formatMassimoDuttiProductResponse 格式化数据方法
* @param targetUrl 接收的商品详情路径
* @return 格式化与翻译后的 Json 数据
*/
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 截取链接中商品的 id
String[] split = targetUrl.split(".html");
String pUrl = split[0];
int pChar = pUrl.lastIndexOf("p");
String pId = pUrl.substring(pChar + 1);
String dataUrl = "https://www.massimodutti.cn/itxrest/2/catalog/store/35009478/30359500/category/0/product/" + pId + "/detail?languageId=-7&appId=1";
String content = HttpClientUtil.getContentByUrl(dataUrl, PlatformEnum.MASSIMODUTTI.getValue());
JSONObject resultObj = JSONObject.fromObject(content);
ProductResponse productResponse = SpiderUtil.formatMassimoDuttiProductResponse(resultObj, pId);
resultObj = JSONObject.fromObject(productResponse);
TranslateHelper.translateProductResponse(resultObj);
return resultObj;
}
}
...@@ -23,44 +23,24 @@ public class PullandbearSpider implements IItemSpider { ...@@ -23,44 +23,24 @@ public class PullandbearSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(PullandbearSpider.class); private static Logger logger = LoggerFactory.getLogger(PullandbearSpider.class);
//PullandBear商品详情 /**
private static final String pullandbearUrl="https://www.pullandbear.cn/itxrest/2/catalog/store/24009528/20309423/category/0/product/"; * Pull and Bear 商品详情链接
*/
private static final String PULL_AND_BEAR_URL="https://www.pullandbear.cn/itxrest/2/catalog/store/24009528/20309423/category/0/product/";
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 提取链接中的商品 id 信息 // 提取链接中的商品 id 信息
String pId = targetUrl.substring(targetUrl.lastIndexOf("p")+1, targetUrl.lastIndexOf(".html")); String pId = targetUrl.substring(targetUrl.lastIndexOf("p")+1, targetUrl.lastIndexOf(".html"));
// 组成链接并获取商品详情信息的 Json // 组成链接并获取商品详情信息的 Json
targetUrl = pullandbearUrl + pId + "/detail?languageId=-7&appId=1"; targetUrl = PULL_AND_BEAR_URL + pId + "/detail?languageId=-7&appId=1";
// 获取网页内容 // 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.PULLANDBEAR.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.PULLANDBEAR.getValue());
// 转换为Json格式 // 转换为Json格式
JSONObject json = JSONObject.fromObject(content); JSONObject resultJson = JSONObject.fromObject(content);
// 简化Json格式 return resultJson;
json = getMainData(json, content);
return json;
} }
/**
* 简化Json格式
* @param json
* @param content
* @return
*/
private JSONObject getMainData(JSONObject json, String content) {
json = json.getJSONArray("bundleProductSummaries").getJSONObject(0);
// 商品id
String productId = json.getString("id");
// 商品名称
String productName = json.getString("name");
JSONArray colors = json.getJSONObject("detail").getJSONArray("colors");
// 返回数据
JSONObject returnJson = new JSONObject();
returnJson.put("id", productId);
returnJson.put("name", productName);
returnJson.put("data", colors);
return returnJson;
}
} }
......
...@@ -20,34 +20,51 @@ import java.util.concurrent.TimeoutException; ...@@ -20,34 +20,51 @@ import java.util.concurrent.TimeoutException;
/** /**
* 优衣库数据爬虫 * 优衣库数据爬虫
* *
* 图片路径为:"https://www.uniqlo.cn/hmall/test/" + 商品id + "/sku/561/" + 商品图片id + ".jpg"
*
* @author 爱酱油不爱醋 * @author 爱酱油不爱醋
*/ */
@Component("uniqloSpider") @Component("uniqloSpider")
public class UniqloSpider implements IItemSpider { public class UniqloSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(UniqloSpider.class); private static Logger logger = LoggerFactory.getLogger(UniqloSpider.class);
// 优衣库数据爬虫 /**
private static final String uniqloUrl = "https://www.uniqlo.cn/data/products/spu/"; * Uniqlo 商品详情链接
*/
private static final String UNIQLO_URL = "https://www.uniqlo.cn/data/products/spu/";
/**
* Uniqlo 数据爬虫
* @see com.diaoyun.zion.chinafrica.service.impl.SpiderServiceImpl#judgeUrlType 修改商品详情页路径
* @see SpiderUtil#formatUniqloProductResponse 格式化数据方法
* @param targetUrl 接收的商品详情路径
* @return 格式化与翻译后的 Json 数据
*/
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 获取商品 id // 截取链接中的商品 id
String[] split = targetUrl.split("productCode="); String[] split = targetUrl.split("productCode=");
String pId = split[1]; String pId = split[1];
// 获取商品详情的json链接 targetUrl = UNIQLO_URL + "zh_CN/" + pId + ".json";
targetUrl = uniqloUrl + "zh_CN/" + pId + ".json";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.UNIQLO.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.UNIQLO.getValue());
// 转换为 json // 获取折扣价格的接口
String priceUrl = "https://d.uniqlo.cn/p/product/i/product/spu/pc/query/" + pId + "/zh_CN";
String priceContent = HttpClientUtil.getContentByUrl(priceUrl, PlatformEnum.UNIQLO.getValue());
JSONObject resultJson = JSONObject.fromObject(content); JSONObject resultJson = JSONObject.fromObject(content);
// 按照封装规范封装商品数据 JSONObject priceJson = JSONObject.fromObject(priceContent);
ProductResponse productResponse = SpiderUtil.formatUniqloProductResponse(resultJson, pId); ProductResponse productResponse = SpiderUtil.formatUniqloProductResponse(resultJson, priceJson, pId);
resultJson = JSONObject.fromObject(productResponse); resultJson = JSONObject.fromObject(productResponse);
// 翻译 // 翻译
TranslateHelper.translateProductResponse(resultJson); TranslateHelper.translateProductResponse(resultJson);
return resultJson; return resultJson;
} }
public static void main(String[] args) throws Exception {
String targetUrl = "https://www.uniqlo.cn/product-detail.html?productCode=u0000000001970";
// 截取链接中的商品 id
String[] split = targetUrl.split("productCode=");
String pId = split[1];
targetUrl = UNIQLO_URL + "zh_CN/" + pId + ".json";
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.UNIQLO.getValue());
System.out.println(content);
}
} }
...@@ -29,21 +29,26 @@ import java.util.concurrent.TimeoutException; ...@@ -29,21 +29,26 @@ import java.util.concurrent.TimeoutException;
public class ZaraSpider implements IItemSpider { public class ZaraSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(ZaraSpider.class); private static Logger logger = LoggerFactory.getLogger(ZaraSpider.class);
//西班牙时尚品牌数据爬虫 /**
private static final String zaraUrl = "https://www.zara.cn/cn/zh/"; * Zara 商品详情页Url
*/
private static final String ZARA_URL = "https://www.zara.cn/cn/zh/";
/**
* Massimo Dutti 数据爬虫
* @see com.diaoyun.zion.chinafrica.service.impl.SpiderServiceImpl#judgeUrlType 修改商品详情页路径
* @see JsoupUtil#getZaraJsonData 返回截取到的主要商品数据
* @see SpiderUtil#formatZaraProductResponse 格式化数据方法
* @param targetUrl 接收的商品详情路径
* @return 格式化与翻译后的 Json 数据
*/
@Override @Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException { public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
JSONObject resultObj; JSONObject resultObj;
// 获取url中的网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue()); String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue());
// 截取主要的 Json 内容
resultObj = JsoupUtil.getZaraJsonData(content); resultObj = JsoupUtil.getZaraJsonData(content);
// 按照封装规范封装商品数据
ProductResponse productResponse = SpiderUtil.formatZaraProductResponse(resultObj); ProductResponse productResponse = SpiderUtil.formatZaraProductResponse(resultObj);
// 将封装数据转换为 json 数据
resultObj = JSONObject.fromObject(productResponse); resultObj = JSONObject.fromObject(productResponse);
// 翻译
TranslateHelper.translateProductResponse(resultObj); TranslateHelper.translateProductResponse(resultObj);
return resultObj; return resultObj;
} }
......
...@@ -17,13 +17,14 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> { ...@@ -17,13 +17,14 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> {
ZARA("Zara", "zara"), ZARA("Zara", "zara"),
UNIQLO("优衣库", "uniqlo"), UNIQLO("优衣库", "uniqlo"),
NIKE("NIKE", "nike"), NIKE("NIKE", "nike"),
HM("H&M", "hm"),
MASSIMODUTTI("MassimoDutti", "massimodutti"),
UN("未知", "un"), UN("未知", "un"),
AfriEshop("afri-eshop","afri-eshop" ); AfriEshop("afri-eshop","afri-eshop" );
private String label; private String label;
private String value; private String value;
PlatformEnum(String label, String value) { PlatformEnum(String label, String value) {
this.label = label; this.label = label;
this.value = value; this.value = value;
......
...@@ -40,6 +40,14 @@ public class ItemSpiderFactory { ...@@ -40,6 +40,14 @@ public class ItemSpiderFactory {
iItemSpider= (IItemSpider) SpringContextUtil.getBean("nikeItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("nikeItemSpider");
break; break;
} }
case "hm":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("hmSpider");
break;
}
case "massimodutti":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("massimoduttiSpider");
break;
}
case "afri-eshop":{ case "afri-eshop":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("africaShopItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("africaShopItemSpider");
break; break;
......
...@@ -43,10 +43,12 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -43,10 +43,12 @@ public class SpiderServiceImpl implements SpiderService {
private PlatformEnum judgeUrlType(String targetUrl) { private PlatformEnum judgeUrlType(String targetUrl) {
PlatformEnum platformEnum = PlatformEnum.UN; PlatformEnum platformEnum = PlatformEnum.UN;
if(targetUrl.contains("taobao.com")&&(targetUrl.contains("item.htm")||targetUrl.contains("detail.htm"))) { if (targetUrl.contains("taobao.com") && (targetUrl.contains("item.htm") || targetUrl.contains("detail.htm"))) {
platformEnum=PlatformEnum.TB; platformEnum = PlatformEnum.TB;
} else if(targetUrl.contains("tmall.com/item.htm")) { } else if (targetUrl.contains("tmall.com/item.htm")) {
platformEnum=PlatformEnum.TM; platformEnum = PlatformEnum.TM;
} else if (targetUrl.contains("pullandbear.cn/cn/")) {
platformEnum = PlatformEnum.PULLANDBEAR;
} else if(targetUrl.contains("www.gap.cn/pdp/")) { } else if(targetUrl.contains("www.gap.cn/pdp/")) {
platformEnum=PlatformEnum.GAP; platformEnum=PlatformEnum.GAP;
} else if(targetUrl.contains("www.nike.com/cn/t/")) { } else if(targetUrl.contains("www.nike.com/cn/t/")) {
...@@ -54,9 +56,13 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -54,9 +56,13 @@ public class SpiderServiceImpl implements SpiderService {
} else if(targetUrl.contains("www.afri-eshop.com")&&targetUrl.contains("/products/")) { } else if(targetUrl.contains("www.afri-eshop.com")&&targetUrl.contains("/products/")) {
platformEnum=PlatformEnum.AfriEshop; platformEnum=PlatformEnum.AfriEshop;
} else if (targetUrl.contains("zara.cn")) { } else if (targetUrl.contains("zara.cn")) {
platformEnum = platformEnum.ZARA; platformEnum = PlatformEnum.ZARA;
} else if (targetUrl.contains("uniqlo.cn/product-detail.html")) { } else if (targetUrl.contains("uniqlo.cn/product-detail.html")) {
platformEnum = platformEnum.UNIQLO; platformEnum = PlatformEnum.UNIQLO;
} else if (targetUrl.contains("hm.com/zh_cn/productpage")) {
platformEnum = PlatformEnum.HM;
} else if (targetUrl.contains("massimodutti.cn/")) {
platformEnum = PlatformEnum.MASSIMODUTTI;
} }
return platformEnum; return platformEnum;
} }
......
...@@ -2,17 +2,29 @@ package com.diaoyun.zion.chinafrica.vo; ...@@ -2,17 +2,29 @@ package com.diaoyun.zion.chinafrica.vo;
/** /**
* 商品信息 * 商品信息
*
* @author G
*/ */
public class ItemInfo { public class ItemInfo {
// /**
* 商品 id
*/
private String itemId; private String itemId;
//商品标题 /**
* 商品标题
*/
private String title; private String title;
//商品主图 /**
* 商品主图
*/
private String pic; private String pic;
//所属店铺 /**
* 所属店铺
*/
private String shopName; private String shopName;
//店铺链接 /**
* 店铺链接
*/
private String shopUrl; private String shopUrl;
public String getItemId() { public String getItemId() {
......
...@@ -56,15 +56,6 @@ public class ProductResponse { ...@@ -56,15 +56,6 @@ public class ProductResponse {
*/ */
private String platform; private String platform;
public boolean isStockFlag() {
return stockFlag;
}
public void setStockFlag(boolean stockFlag) {
this.stockFlag = stockFlag;
}
public List<OriginalPrice> getOriginalPriceList() { public List<OriginalPrice> getOriginalPriceList() {
return originalPriceList; return originalPriceList;
} }
...@@ -73,16 +64,16 @@ public class ProductResponse { ...@@ -73,16 +64,16 @@ public class ProductResponse {
this.originalPriceList = originalPriceList; this.originalPriceList = originalPriceList;
} }
public List<ProductPromotion> getPromotionList() { public boolean isPromotionFlag() {
return promotionList; return promotionFlag;
} }
public String getSalePrice() { public void setPromotionFlag(boolean promotionFlag) {
return salePrice; this.promotionFlag = promotionFlag;
} }
public void setSalePrice(String salePrice) { public List<ProductPromotion> getPromotionList() {
this.salePrice = salePrice; return promotionList;
} }
public void setPromotionList(List<ProductPromotion> promotionList) { public void setPromotionList(List<ProductPromotion> promotionList) {
...@@ -97,6 +88,22 @@ public class ProductResponse { ...@@ -97,6 +88,22 @@ public class ProductResponse {
this.price = price; this.price = price;
} }
public String getSalePrice() {
return salePrice;
}
public void setSalePrice(String salePrice) {
this.salePrice = salePrice;
}
public boolean isStockFlag() {
return stockFlag;
}
public void setStockFlag(boolean stockFlag) {
this.stockFlag = stockFlag;
}
public DynStock getDynStock() { public DynStock getDynStock() {
return dynStock; return dynStock;
} }
...@@ -136,12 +143,4 @@ public class ProductResponse { ...@@ -136,12 +143,4 @@ public class ProductResponse {
public void setPlatform(String platform) { public void setPlatform(String platform) {
this.platform = platform; this.platform = platform;
} }
public boolean isPromotionFlag() {
return promotionFlag;
}
public void setPromotionFlag(boolean promotionFlag) {
this.promotionFlag = promotionFlag;
}
} }
...@@ -84,7 +84,7 @@ public class JsoupUtil { ...@@ -84,7 +84,7 @@ public class JsoupUtil {
* @param variableName * @param variableName
* @return * @return
*/ */
private static String getScriptContent(String content, String variableName) { public static String getScriptContent(String content, String variableName) {
//logger.info(content); //logger.info(content);
Document document = Jsoup.parse(content); Document document = Jsoup.parse(content);
Elements elementList = document.getElementsByTag("script"); Elements elementList = document.getElementsByTag("script");
...@@ -150,6 +150,22 @@ public class JsoupUtil { ...@@ -150,6 +150,22 @@ public class JsoupUtil {
} }
} }
public static void main(String[] args) throws Exception {
String targetUrl = "https://www2.hm.com/zh_cn/productpage.0809313001.html";
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.HM.getValue());
String detailStr = getScriptContent(content, "productArticleDetails");
int firstBrackets=detailStr.indexOf("{");
int lastbrackets=detailStr.lastIndexOf("}");
String resultStr = detailStr.substring(firstBrackets,lastbrackets+1);
int firstImage = detailStr.indexOf("'images':[");
int lastImage = detailStr.lastIndexOf("'video':");
detailStr = detailStr.substring(firstImage, lastImage);
resultStr = resultStr.replace(detailStr, "");
JSONObject resultObj = JSONObject.fromObject(resultStr);
System.out.println(resultObj);
}
/** /**
* 解析出商品详情 * 解析出商品详情
* *
...@@ -232,7 +248,7 @@ public class JsoupUtil { ...@@ -232,7 +248,7 @@ public class JsoupUtil {
} }
/** /**
* 获取Zara爬虫的主要数据 * 获取 Zara 爬虫的主要数据
* @param content * @param content
* @return * @return
*/ */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论