提交 fa12c353 authored 作者: zhengfg's avatar zhengfg

1、还原版本

上级 76d9d962
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import net.sf.json.JSONObject;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* TODO Gap美国休闲品牌数据爬虫
*
* 分析:产品数据在 F12 的 Network 中的 "spu.do" 文件中,该文件返回产品的 Json 数据
*/
@Component("gapSpider")
public class GapSpider implements IItemSpider {
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
return null;
}
}
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* 西班牙年轻时尚品牌-pullandbear 数据爬虫
*
* TODO 图片路径未处理
* 图片路径为:
*/
@Component("pullandbearSpider")
public class PullandbearSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(PullandbearSpider.class);
//PullandBear商品详情
private static final String pullandbearUrl="https://www.pullandbear.cn/itxrest/2/catalog/store/24009528/20309423/category/0/product/";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 提取链接中的商品 id 信息
String pId = targetUrl.substring(targetUrl.lastIndexOf("p")+1, targetUrl.lastIndexOf(".html"));
// 组成链接并获取商品详情信息的 Json
targetUrl = pullandbearUrl + pId + "/detail?languageId=-7&appId=1";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.PULLANDBEAR.getValue());
// 转换为Json格式
JSONObject json = JSONObject.fromObject(content);
// 简化Json格式
json = getMainData(json, content);
return json;
}
/**
* 简化Json格式
* @param json
* @param content
* @return
*/
private JSONObject getMainData(JSONObject json, String content) {
json = json.getJSONArray("bundleProductSummaries").getJSONObject(0);
// 商品id
String productId = json.getString("id");
// 商品名称
String productName = json.getString("name");
JSONArray colors = json.getJSONObject("detail").getJSONArray("colors");
// 返回数据
JSONObject returnJson = new JSONObject();
returnJson.put("id", productId);
returnJson.put("name", productName);
returnJson.put("data", colors);
return returnJson;
}
}
...@@ -94,6 +94,8 @@ public class TbItemSpider implements IItemSpider { ...@@ -94,6 +94,8 @@ public class TbItemSpider implements IItemSpider {
return returnJson; return returnJson;
} }
/** /**
* 翻译规格属性 * 翻译规格属性
* @param propMap 规格属性MAP * @param propMap 规格属性MAP
......
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* 优衣库数据爬虫
*
* TODO 图片路径未处理
* 图片路径为:"https://www.uniqlo.cn/hmall/test/" + 商品id + "/sku/40/" + 商品图片id + ".jpg"
*/
@Component("uniqloSpider")
public class UniqloSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(UniqloSpider.class);
// 优衣库数据爬虫
private static final String uniqloUrl = "https://www.uniqlo.cn/data/products/spu/";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 获取商品 id
String[] split = targetUrl.split("productCode=");
String pId = split[1];
// 获取商品详情的json链接
targetUrl = uniqloUrl + "zh_CN/" + pId + ".json";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.UNIQLO.getValue());
// 转换为 json
JSONObject json = JSONObject.fromObject(content);
// 商品id
String pName = json.getJSONObject("summary").getString("name");
// 商品价格
String pPrice = json.getJSONObject("summary").getString("originPrice");
// 格式化数据
JSONArray rowsJson = json.getJSONArray("rows");
JSONObject returnJson = new JSONObject();
returnJson.put("name", pName);
returnJson.put("price", pPrice);
returnJson.elementOpt("data", rowsJson);
return returnJson;
}
}
package com.diaoyun.zion.chinafrica.bis.impl;
import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.HttpClientUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
/**
* Zara西班牙时尚品牌数据爬虫
* TODO 数据未处理完全
*/
@Component("zaraSpider")
public class ZaraSpider implements IItemSpider {
private static Logger logger = LoggerFactory.getLogger(ZaraSpider.class);
//西班牙时尚品牌数据爬虫
private static final String zaraUrl = "https://www.zara.cn/cn/zh/";
@Override
public JSONObject captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException, TimeoutException {
// 获取url中的网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue());
// 截取主要的 Json 内容
String jsonDataStr = getDataJson(content, "dataLayer", ";window.zara.viewPayload")
.replace("dataLayer = ", "");
// 转换为 Json 格式
JSONObject jsonObject = JSONObject.fromObject(jsonDataStr);
return jsonObject;
}
/**
* 根据首位字符串内容进行截取
* @param jsonStr
* @param startStr 起始字符串
* @param lastStr 结尾字符串(不包含)
* @return
*/
private static String getDataJson(String jsonStr, String startStr, String lastStr) {
int startIndex = jsonStr.indexOf(startStr);
int lastIndex = jsonStr.lastIndexOf(lastStr);
return jsonStr.substring(startIndex, lastIndex);
}
public static void main(String[] args) throws IOException, URISyntaxException {
// URL链接
String targetUrl = "https://www.zara.cn/cn/zh/%E5%BA%9C%E7%BB%B8%E9%95%BF%E7%89%88%E8%A1%AC%E8%A1%AB-p08053157.html?v1=31979171&v2=1319321";
// 获取网页内容
String content = HttpClientUtil.getContentByUrl(targetUrl, PlatformEnum.ZARA.getValue());
// 截取主要的商品数据
int labelHeadIndex = content.indexOf("dataLayer");
int labelTailIndex = content.lastIndexOf(";window.zara.viewPayload");
String abv = content.substring(labelHeadIndex, labelTailIndex).replace("dataLayer = ", "");
System.err.println(abv);
// 转换为 Json 格式
JSONObject json = JSONObject.fromObject(abv);
// System.err.println(json);
// product 对象节点
JSONObject responseData = json.getJSONObject("product");
// System.err.println(responseData);
// detail 对象节点
JSONObject details = responseData.getJSONObject("detail");
// System.err.println(details);
// colors 数组节点
JSONArray colorsArray = details.getJSONArray("colors");
Map<Integer, JSONObject> colorMap = new HashMap<>();
for (int i = 0; i < colorsArray.size(); i++) {
colorMap.put(i, colorsArray.getJSONObject(i));
}
System.out.println(colorMap);
// TODO 取出颜色属性
// sizes 数组节点
Map<Integer, JSONObject> sizesMap = new HashMap<>();
for(Map.Entry<Integer, JSONObject> entry : colorMap.entrySet()){
// 遍历出每个 colors 对象节点
JSONObject colors = entry.getValue();
JSONArray sizesArray = colors.getJSONArray("sizes");
for (int i = 0; i < sizesArray.size(); i++) {
sizesMap.put(i, sizesArray.getJSONObject(i));
}
}
System.out.println(sizesMap);
// TODO 取出价格和尺码属性
// TODO 取出图片属性(图片实体类未知)
}
}
package com.diaoyun.zion.chinafrica.controller;
import com.diaoyun.zion.chinafrica.service.TbCfFinanceService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import java.util.List;
import java.util.Map;
/**
* 财务明细Controller
*
* @author G
* @date 2019-08-14 09:11:48
*/
@RestController
@RequestMapping("tbcffinance")
public class TbCfFinanceController {
@Autowired
private TbCfFinanceService tbCfFinanceService;
}
...@@ -12,12 +12,8 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> { ...@@ -12,12 +12,8 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> {
TB("淘宝", "tb"), TB("淘宝", "tb"),
TM("天猫", "tm"), TM("天猫", "tm"),
PULLANDBEAR("Pullandbear","pullandbear"), GAP("GAP", "gap"),
GAP("Gap", "gap"), NIKE("NIKE", "nike"),
ZARA("Zara", "zara"),
UNIQLO("优衣库", "uniqlo"),
NIKE("Nike", "nike"),
UN("未知", "un"); UN("未知", "un");
private String label; private String label;
......
...@@ -4,8 +4,6 @@ import com.diaoyun.zion.chinafrica.bis.IItemSpider; ...@@ -4,8 +4,6 @@ import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.chinafrica.enums.PlatformEnum; import com.diaoyun.zion.chinafrica.enums.PlatformEnum;
import com.diaoyun.zion.master.util.SpringContextUtil; import com.diaoyun.zion.master.util.SpringContextUtil;
import javax.swing.*;
/** /**
* 商品爬虫 * 商品爬虫
*/ */
...@@ -22,18 +20,6 @@ public class ItemSpiderFactory { ...@@ -22,18 +20,6 @@ public class ItemSpiderFactory {
iItemSpider= (IItemSpider) SpringContextUtil.getBean("tmItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("tmItemSpider");
break; break;
} }
case "pullandbear":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("pullandbearSpider");
break;
}
case "zara":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("zaraSpider");
break;
}
case "uniqlo":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("uniqloSpider");
break;
}
case "gap":{ case "gap":{
iItemSpider= (IItemSpider) SpringContextUtil.getBean("gapItemSpider"); iItemSpider= (IItemSpider) SpringContextUtil.getBean("gapItemSpider");
break; break;
......
...@@ -26,7 +26,7 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -26,7 +26,7 @@ public class SpiderServiceImpl implements SpiderService {
public JSONObject getItemDetail(String targetUrl) throws InterruptedException, IOException, ExecutionException, URISyntaxException, TimeoutException { public JSONObject getItemDetail(String targetUrl) throws InterruptedException, IOException, ExecutionException, URISyntaxException, TimeoutException {
//判断链接属于哪个平台 //判断链接属于哪个平台
PlatformEnum platformEnum = judgeUrlType(targetUrl); PlatformEnum platformEnum=judgeUrlType(targetUrl);
IItemSpider iItemSpider=ItemSpiderFactory.getSpider(platformEnum); IItemSpider iItemSpider=ItemSpiderFactory.getSpider(platformEnum);
return iItemSpider.captureItem(targetUrl); return iItemSpider.captureItem(targetUrl);
} }
...@@ -43,20 +43,14 @@ public class SpiderServiceImpl implements SpiderService { ...@@ -43,20 +43,14 @@ public class SpiderServiceImpl implements SpiderService {
private PlatformEnum judgeUrlType(String targetUrl) { private PlatformEnum judgeUrlType(String targetUrl) {
PlatformEnum platformEnum = PlatformEnum.UN; PlatformEnum platformEnum = PlatformEnum.UN;
if (targetUrl.contains("taobao.com")&&(targetUrl.contains("item.htm")||targetUrl.contains("detail.htm"))) { if(targetUrl.contains("taobao.com")&&(targetUrl.contains("item.htm")||targetUrl.contains("detail.htm"))) {
platformEnum = PlatformEnum.TB; platformEnum=PlatformEnum.TB;
} else if (targetUrl.contains("tmall.com/item.htm")) { } else if(targetUrl.contains("tmall.com/item.htm")) {
platformEnum=PlatformEnum.TM; platformEnum=PlatformEnum.TM;
} else if (targetUrl.contains("https://www.pullandbear.cn")) { } else if(targetUrl.contains("www.gap.cn/pdp/")) {
platformEnum = PlatformEnum.PULLANDBEAR; platformEnum=PlatformEnum.GAP;
} else if (targetUrl.contains("www.gap.cn/pdp/")) { } else if(targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum = PlatformEnum.GAP; platformEnum=PlatformEnum.NIKE;
} else if (targetUrl.contains("zara.cn")) {
platformEnum = platformEnum.ZARA;
} else if (targetUrl.contains("uniqlo.cn/product-detail.html")) {
platformEnum = platformEnum.UNIQLO;
} else if (targetUrl.contains("www.nike.com/cn/t/")) {
platformEnum = platformEnum.NIKE;
} }
return platformEnum; return platformEnum;
} }
......
...@@ -20,7 +20,6 @@ import java.util.regex.Pattern; ...@@ -20,7 +20,6 @@ import java.util.regex.Pattern;
public class JsoupUtil { public class JsoupUtil {
public static String unknow = "未知"; public static String unknow = "未知";
private static Logger logger = LoggerFactory.getLogger(JsoupUtil.class); private static Logger logger = LoggerFactory.getLogger(JsoupUtil.class);
/** /**
* 获取淘宝商品详情的信息 店铺id 名字 主图 sibUrl 等 * 获取淘宝商品详情的信息 店铺id 名字 主图 sibUrl 等
* *
...@@ -98,7 +97,6 @@ public class JsoupUtil { ...@@ -98,7 +97,6 @@ public class JsoupUtil {
} }
return configGroup; return configGroup;
} }
/** /**
* 根据script中的变量名获取script中变量相关的内容,特指天猫的返回信息 没什么用,没有商品的规格信息 * 根据script中的变量名获取script中变量相关的内容,特指天猫的返回信息 没什么用,没有商品的规格信息
* *
......
...@@ -100,6 +100,6 @@ security: ...@@ -100,6 +100,6 @@ security:
jwt: jwt:
token: token:
secret-key: secret-key secret-key: secret-key
expire-length: 604800000 # one week or 300000 5 minutes duration by default: 5 minutes * 60 seconds * 1000 miliseconds expire-length: 60480000000 # one week 604800000 or 300000 5 minutes duration by default: 5 minutes * 60 seconds * 1000 miliseconds
...@@ -22,8 +22,8 @@ email.subject=Verification Code from Chinafrica ...@@ -22,8 +22,8 @@ email.subject=Verification Code from Chinafrica
###################################邮箱 END############################################### ###################################邮箱 END###############################################
################redis有效时间################### ################redis有效时间###################
#登录token second 默认30分 一个星期 #登录token second 默认30分 一个星期 604800
redis.token.expiredTime=604800 redis.token.expiredTime=60480000
#订单 30分 second #订单 30分 second
redis.order.expiredTime=1800 redis.order.expiredTime=1800
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论