提交 402cfbdf authored 作者: 张光耀's avatar 张光耀

pullandbear、zara、unqlo的爬虫代码,待完善

上级 ad7bdcf5
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* 西班牙年轻时尚品牌-pullandbear 数据爬虫
*
* TODO 图片路径未处理
* 图片路径为:
*/
@Component("pullandbearSpider")
public class PullandbearSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(PullandbearSpider.class);
//PullandBear商品详情
private static final String pullandbearUrl="https://www.pullandbear.cn/itxrest/2/catalog/store/24009528/20309423/category/0/product/";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 提取链接中的商品 id 信息
String pId = targetUrl.substring(targetUrl.lastIndexOf("p")+1, targetUrl.lastIndexOf(".html"));
// 组成链接并获取商品详情信息的 Json
targetUrl = pullandbearUrl + pId + "/detail?languageId=-7&appId=1";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.PULLANDBEAR.getValue());
// 转换为Json格式
JSONObject json = JSONObject.fromObject(content);
// 简化Json格式
json = getMainData(json, content);
return json;
}
/**
* 简化Json格式
* @param json
* @param content
* @return
*/
private JSONObject getMainData(JSONObject json, String content) {
json = json.getJSONArray("bundleProductSummaries").getJSONObject(0);
// 商品id
String productId = json.getString("id");
// 商品名称
String productName = json.getString("name");
JSONArray colors = json.getJSONObject("detail").getJSONArray("colors");
// 返回数据
JSONObject returnJson = new JSONObject();
returnJson.put("id", productId);
returnJson.put("name", productName);
returnJson.put("data", colors);
return returnJson;
}
}
......@@ -5,35 +5,50 @@ import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.http.Consts;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* TODO 优衣库数据爬虫
* 优衣库数据爬虫
*
* TODO 图片路径未处理
* 图片路径为:"https://www.uniqlo.cn/hmall/test/" + 商品id + "/sku/40/" + 商品图片id + ".jpg"
*/
@Component("uniqloSpider")
public class UniqloSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(UniqloSpider.class);
// 优衣库数据爬虫
private static final String uniqloUrl = "https://www.uniqlo.cn/data/products/spu/";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
return null;
// 获取商品 id
String[] split = targetUrl.split("productCode=");
String pId = split[1];
// 获取商品详情的json链接
targetUrl = uniqloUrl + "zh_CN/" + pId + ".json";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.UNIQLO.getValue());
// 转换为 json
JSONObject json = JSONObject.fromObject(content);
// 商品id
String pName = json.getJSONObject("summary").getString("name");
// 商品价格
String pPrice = json.getJSONObject("summary").getString("originPrice");
// 格式化数据
JSONArray rowsJson = json.getJSONArray("rows");
JSONObject returnJson = new JSONObject();
returnJson.put("name", pName);
returnJson.put("price", pPrice);
returnJson.elementOpt("data", rowsJson);
return returnJson;
}
}
......@@ -18,10 +18,11 @@ import java.util.concurrent.TimeoutException;
/**
* Zara西班牙时尚品牌数据爬虫
* TODO 数据未处理完全
*/
@Component("zaraSpider")
public class ZaraSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(TmItemSpider.class);
private static Logger logger = LoggerFactory.getLogger(ZaraSpider.class);
//西班牙时尚品牌数据爬虫
private static final String zaraUrl = "https://www.zara.cn/cn/zh/";
......
package com.diaoyun.zion.chinafrica.controller;
import com.diaoyun.zion.chinafrica.service.TbCfFinanceService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import java.util.List;
import java.util.Map;
/**
* 财务明细Controller
*
* @author G
* @date 2019-08-14 09:11:48
*/
@RestController
@RequestMapping("tbcffinance")
public class TbCfFinanceController {
@Autowired
private TbCfFinanceService tbCfFinanceService;
}
......@@ -12,9 +12,12 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> {
TB("淘宝", "tb"),
TM("天猫", "tm"),
ZARA("西班牙时尚品牌", "zara"),
PULLANDBEAR("Pullandbear","pullandbear"),
GAP("Gap", "gap"),
ZARA("Zara", "zara"),
UNIQLO("优衣库", "uniqlo"),
GAP("Gap美国休闲品牌", "gap"),
NIKE("Nike", "nike"),
UN("未知", "un");
private String label;
......
......@@ -14,29 +14,32 @@ public class ItemSpiderFactory {
public static IItemSpider getSpider(PlatformEnum platformEnum) {
IItemSpider iItemSpider;
switch (platformEnum.getValue()) {
// 淘宝
case "tb":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("tbItemSpider");
break;
}
// 天猫
case "tm":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("tmItemSpider");
break;
}
// zara 西班牙快时尚品牌
case "pullandbear":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("pullandbearSpider");
break;
}
case "zara":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("zaraSpider");
break;
}
// 优衣库
case "uniqlo":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("uniqloSpider");
break;
}
// Gap 美国休闲品牌
case "gap":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("gapSpider");
iItemSpider= (IItemSpider) SpringContextUtil.getBean("gapItemSpider");
break;
}
case "nike":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("nikeItemSpider");
break;
}
default:{
......
......@@ -26,7 +26,7 @@ public class SpiderServiceImpl implements SpiderService {
public JSONObject getItemDetail(String targetUrl) throws InterruptedException, IOException, ExecutionException, URISyntaxException, TimeoutException {
//判断链接属于哪个平台
PlatformEnum platformEnum=judgeUrlType(targetUrl);
PlatformEnum platformEnum = judgeUrlType(targetUrl);
IItemSpider iItemSpider=ItemSpiderFactory.getSpider(platformEnum);
return iItemSpider.captureItem(targetUrl);
}
......@@ -43,14 +43,20 @@ public class SpiderServiceImpl implements SpiderService {
private PlatformEnum judgeUrlType(String targetUrl) {
PlatformEnum platformEnum = PlatformEnum.UN;
if(targetUrl.contains("taobao.com")&&(targetUrl.contains("item.htm")||targetUrl.contains("detail.htm"))) {
platformEnum=PlatformEnum.TB;
} else if(targetUrl.contains("tmall.com/item.htm")) {
if (targetUrl.contains("taobao.com")&&(targetUrl.contains("item.htm")||targetUrl.contains("detail.htm"))) {
platformEnum = PlatformEnum.TB;
} else if (targetUrl.contains("tmall.com/item.htm")) {
platformEnum=PlatformEnum.TM;
} else if(targetUrl.contains("www.gap.cn/pdp/")) {
platformEnum=PlatformEnum.GAP;
} else if(targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum=PlatformEnum.NIKE;
} else if (targetUrl.contains("https://www.pullandbear.cn")) {
platformEnum = PlatformEnum.PULLANDBEAR;
} else if (targetUrl.contains("www.gap.cn/pdp/")) {
platformEnum = PlatformEnum.GAP;
} else if (targetUrl.contains("zara.cn")) {
platformEnum = platformEnum.ZARA;
} else if (targetUrl.contains("uniqlo.cn/product-detail.html")) {
platformEnum = platformEnum.UNIQLO;
} else if (targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum = platformEnum.NIKE;
}
return platformEnum;
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论