Skip to content
项目
群组
代码片段
帮助
正在加载...
帮助
为 GitLab 提交贡献
登录/注册
切换导航
Z
zion
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分枝图
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
计划
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
分枝图
统计图
创建新议题
作业
提交
议题看板
打开侧边栏
zhengfg
zion
Commits
edd3e9e1
提交
edd3e9e1
authored
10月 03, 2019
作者:
zhengfg
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
1、增加nike、gap 爬虫
2、对爬虫返回的数据结构进行调整
上级
591f8b9e
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
21 个修改的文件
包含
6325 行增加
和
56 行删除
+6325
-56
GapItemSpider.java
...a/com/diaoyun/zion/chinafrica/bis/impl/GapItemSpider.java
+73
-0
NikeItemSpider.java
.../com/diaoyun/zion/chinafrica/bis/impl/NikeItemSpider.java
+43
-0
TmItemSpider.java
...va/com/diaoyun/zion/chinafrica/bis/impl/TmItemSpider.java
+2
-2
KeyConstant.java
...ava/com/diaoyun/zion/chinafrica/constant/KeyConstant.java
+3
-0
PlatformEnum.java
.../java/com/diaoyun/zion/chinafrica/enums/PlatformEnum.java
+2
-0
ItemSpiderFactory.java
...om/diaoyun/zion/chinafrica/factory/ItemSpiderFactory.java
+8
-0
SpiderServiceImpl.java
...aoyun/zion/chinafrica/service/impl/SpiderServiceImpl.java
+4
-0
DynStock.java
src/main/java/com/diaoyun/zion/chinafrica/vo/DynStock.java
+17
-1
OriginalPrice.java
...in/java/com/diaoyun/zion/chinafrica/vo/OriginalPrice.java
+1
-1
ProductPromotion.java
...java/com/diaoyun/zion/chinafrica/vo/ProductPromotion.java
+1
-1
ProductProp.java
...main/java/com/diaoyun/zion/chinafrica/vo/ProductProp.java
+20
-0
ProductProps.java
...ain/java/com/diaoyun/zion/chinafrica/vo/ProductProps.java
+7
-6
ProductResponse.java
.../java/com/diaoyun/zion/chinafrica/vo/ProductResponse.java
+50
-17
ProductSkuStock.java
.../java/com/diaoyun/zion/chinafrica/vo/ProductSkuStock.java
+5
-5
HttpClientUtil.java
...ain/java/com/diaoyun/zion/master/util/HttpClientUtil.java
+26
-4
JsoupUtil.java
src/main/java/com/diaoyun/zion/master/util/JsoupUtil.java
+6
-19
SpiderUtil.java
src/main/java/com/diaoyun/zion/master/util/SpiderUtil.java
+263
-0
gap_data.js
src/main/resources/static/js/angularjs/js/gap_data.js
+1861
-0
gap_part.js
src/main/resources/static/js/angularjs/js/gap_part.js
+679
-0
nike_data.js
src/main/resources/static/js/angularjs/js/nike_data.js
+2692
-0
nike_part.js
src/main/resources/static/js/angularjs/js/nike_part.js
+562
-0
没有找到文件。
src/main/java/com/diaoyun/zion/chinafrica/bis/impl/GapItemSpider.java
0 → 100644
浏览文件 @
edd3e9e1
package
com
.
diaoyun
.
zion
.
chinafrica
.
bis
.
impl
;
import
com.diaoyun.zion.chinafrica.bis.IItemSpider
;
import
com.diaoyun.zion.chinafrica.enums.PlatformEnum
;
import
com.diaoyun.zion.chinafrica.vo.ProductResponse
;
import
com.diaoyun.zion.master.util.*
;
import
net.sf.json.JSONArray
;
import
net.sf.json.JSONObject
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.text.StringEscapeUtils
;
import
org.apache.http.message.BasicHeader
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.stereotype.Component
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.URISyntaxException
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.ExecutionException
;
import
java.util.concurrent.TimeoutException
;
/**
* Gap数据爬虫
*/
@Component
(
"gapItemSpider"
)
public
class
GapItemSpider
implements
IItemSpider
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
GapItemSpider
.
class
);
//Gap商品详情
private
static
final
String
gapUrl
=
"https://apicn.gap.cn/gap/store/product/list/searchProductByCondition.do"
;
@Override
public
JSONObject
captureItem
(
String
targetUrl
)
throws
URISyntaxException
,
IOException
,
ExecutionException
,
InterruptedException
,
TimeoutException
{
JSONObject
resultObj
;
//获取链接中的商品spuCode
String
itemId
=
getItemId
(
targetUrl
);
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<>();
JSONArray
conditionList
=
new
JSONArray
();
JSONObject
valueObj
=
new
JSONObject
();
JSONObject
condition
=
new
JSONObject
();
valueObj
.
put
(
"key"
,
"style"
);
valueObj
.
put
(
"valueType"
,
"basic"
);
valueObj
.
put
(
"value"
,
new
String
[]
{
itemId
});
conditionList
.
add
(
valueObj
);
condition
.
put
(
"conditionList"
,
conditionList
);
paramMap
.
put
(
"data"
,
condition
);
//获取请求结果
String
content
=
HttpClientUtil
.
sendPostWithBodyParameter
(
gapUrl
,
paramMap
);
resultObj
=
JSONObject
.
fromObject
(
content
);
if
(
resultObj
.
getBoolean
(
"success"
))
{
//格式化为封装数据
ProductResponse
productResponse
=
SpiderUtil
.
formatGapProductResponse
(
resultObj
.
getJSONObject
(
"data"
));
resultObj
=
JSONObject
.
fromObject
(
productResponse
);
}
return
resultObj
;
}
private
String
getItemId
(
String
targetUrl
)
{
String
spuCode
=
targetUrl
.
substring
(
targetUrl
.
lastIndexOf
(
"/"
)+
1
);
int
firstUnder
=
spuCode
.
indexOf
(
"_"
);
int
lastUnder
=
spuCode
.
lastIndexOf
(
"_"
);
return
spuCode
.
substring
(
firstUnder
+
1
,
lastUnder
);
}
}
src/main/java/com/diaoyun/zion/chinafrica/bis/impl/NikeItemSpider.java
0 → 100644
浏览文件 @
edd3e9e1
package
com
.
diaoyun
.
zion
.
chinafrica
.
bis
.
impl
;
import
com.diaoyun.zion.chinafrica.bis.IItemSpider
;
import
com.diaoyun.zion.chinafrica.enums.PlatformEnum
;
import
com.diaoyun.zion.chinafrica.vo.ProductResponse
;
import
com.diaoyun.zion.master.util.*
;
import
net.sf.json.JSONArray
;
import
net.sf.json.JSONObject
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.stereotype.Component
;
import
java.io.IOException
;
import
java.net.URISyntaxException
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.ExecutionException
;
import
java.util.concurrent.TimeoutException
;
/**
* Gap数据爬虫
*/
@Component
(
"nikeItemSpider"
)
public
class
NikeItemSpider
implements
IItemSpider
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
NikeItemSpider
.
class
);
@Override
public
JSONObject
captureItem
(
String
targetUrl
)
throws
URISyntaxException
,
IOException
,
ExecutionException
,
InterruptedException
,
TimeoutException
{
JSONObject
resultObj
;
//获取url中的网页内容 >
String
content
=
HttpClientUtil
.
getContentByUrl
(
targetUrl
,
PlatformEnum
.
NIKE
.
getValue
());
//获取商品相关信息,详情放在<script> 标签的 window.INITIAL_REDUX_STATE 变量中
resultObj
=
JsoupUtil
.
getItemDetailByName
(
content
,
"window.INITIAL_REDUX_STATE"
);
//格式化为封装数据
ProductResponse
productResponse
=
SpiderUtil
.
formatNikeProductResponse
(
resultObj
);
resultObj
=
JSONObject
.
fromObject
(
productResponse
);
return
resultObj
;
}
}
src/main/java/com/diaoyun/zion/chinafrica/bis/impl/TmItemSpider.java
浏览文件 @
edd3e9e1
...
...
@@ -38,8 +38,8 @@ public class TmItemSpider implements IItemSpider {
List
<
Map
<
String
,
Object
>>
futureList
=
new
ArrayList
<>();
//获取url中的网页内容
String
content
=
HttpClientUtil
.
getContentByUrl
(
targetUrl
,
PlatformEnum
.
TM
.
getValue
());
//获取商品详情
JSONObject
infoMap
=
JsoupUtil
.
get
TmItemDetail
(
content
);
//获取商品详情
观察数据可发现商品数据在 _DATA_Detail 变量中
JSONObject
infoMap
=
JsoupUtil
.
get
ItemDetailByName
(
content
,
"_DATA_Detail"
);
JSONObject
skuBaseMap
=
(
JSONObject
)
infoMap
.
get
(
"skuBase"
);
if
(!(
skuBaseMap
.
get
(
"props"
)
instanceof
JSONNull
))
{
JSONArray
propsArray
=
(
JSONArray
)
skuBaseMap
.
get
(
"props"
);
...
...
src/main/java/com/diaoyun/zion/chinafrica/constant/KeyConstant.java
浏览文件 @
edd3e9e1
...
...
@@ -24,4 +24,7 @@ public class KeyConstant {
/////////////////订单 END////////////////
//验证码前缀
public
final
static
String
CAPTCHA
=
"captcha_"
;
//自定义id头部
public
final
static
String
CUSTOMIZE_ID
=
"customizeId_"
;
}
src/main/java/com/diaoyun/zion/chinafrica/enums/PlatformEnum.java
浏览文件 @
edd3e9e1
...
...
@@ -12,6 +12,8 @@ public enum PlatformEnum implements EnumItemable<PlatformEnum> {
TB
(
"淘宝"
,
"tb"
),
TM
(
"天猫"
,
"tm"
),
GAP
(
"GAP"
,
"gap"
),
NIKE
(
"NIKE"
,
"nike"
),
UN
(
"未知"
,
"un"
);
private
String
label
;
...
...
src/main/java/com/diaoyun/zion/chinafrica/factory/ItemSpiderFactory.java
浏览文件 @
edd3e9e1
...
...
@@ -20,6 +20,14 @@ public class ItemSpiderFactory {
iItemSpider
=
(
IItemSpider
)
SpringContextUtil
.
getBean
(
"tmItemSpider"
);
break
;
}
case
"gap"
:{
iItemSpider
=
(
IItemSpider
)
SpringContextUtil
.
getBean
(
"gapItemSpider"
);
break
;
}
case
"nike"
:{
iItemSpider
=
(
IItemSpider
)
SpringContextUtil
.
getBean
(
"nikeItemSpider"
);
break
;
}
default
:{
iItemSpider
=
(
IItemSpider
)
SpringContextUtil
.
getBean
(
"emptyItemSpider"
);
break
;
...
...
src/main/java/com/diaoyun/zion/chinafrica/service/impl/SpiderServiceImpl.java
浏览文件 @
edd3e9e1
...
...
@@ -47,6 +47,10 @@ public class SpiderServiceImpl implements SpiderService {
platformEnum
=
PlatformEnum
.
TB
;
}
else
if
(
targetUrl
.
contains
(
"tmall.com/item.htm"
))
{
platformEnum
=
PlatformEnum
.
TM
;
}
else
if
(
targetUrl
.
contains
(
"www.gap.cn/pdp/"
))
{
platformEnum
=
PlatformEnum
.
GAP
;
}
else
if
(
targetUrl
.
contains
(
"www.nike.com/cn/t/"
))
{
platformEnum
=
PlatformEnum
.
NIKE
;
}
return
platformEnum
;
}
...
...
src/main/java/com/diaoyun/zion/chinafrica/vo/DynStock.java
浏览文件 @
edd3e9e1
...
...
@@ -9,5 +9,21 @@ public class DynStock {
//可用总的库存数
private
int
sellableQuantity
;
//sku对应的库存数
private
List
<
ProductSku
>
sku
;
private
List
<
ProductSkuStock
>
productSkuStockList
;
public
int
getSellableQuantity
()
{
return
sellableQuantity
;
}
public
void
setSellableQuantity
(
int
sellableQuantity
)
{
this
.
sellableQuantity
=
sellableQuantity
;
}
public
List
<
ProductSkuStock
>
getProductSkuStockList
()
{
return
productSkuStockList
;
}
public
void
setProductSkuStockList
(
List
<
ProductSkuStock
>
productSkuStockList
)
{
this
.
productSkuStockList
=
productSkuStockList
;
}
}
src/main/java/com/diaoyun/zion/chinafrica/vo/OriginalPrice.java
浏览文件 @
edd3e9e1
...
...
@@ -4,7 +4,7 @@ package com.diaoyun.zion.chinafrica.vo;
* 原始价格
*/
public
class
OriginalPrice
{
//sku
字符串
;1627207:425613015;
//sku
id标识
;1627207:425613015;
private
String
skuStr
;
//sku对应价格
private
String
price
;
...
...
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductPromotion.java
浏览文件 @
edd3e9e1
...
...
@@ -4,7 +4,7 @@ package com.diaoyun.zion.chinafrica.vo;
* 商品促销价格
*/
public
class
ProductPromotion
{
//sku
字符串
;1627207:425613015;
//sku
id标识
;1627207:425613015;
private
String
skuStr
;
//sku对应价格
private
String
price
;
...
...
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductProp.java
浏览文件 @
edd3e9e1
...
...
@@ -44,4 +44,24 @@ public class ProductProp {
public
void
setTranslate
(
String
translate
)
{
this
.
translate
=
translate
;
}
@Override
public
boolean
equals
(
Object
obj
)
{
if
(
obj
==
null
)
return
false
;
if
(
this
==
obj
)
return
true
;
if
(
obj
instanceof
ProductProp
)
{
ProductProp
productProp
=(
ProductProp
)
obj
;
if
(
productProp
.
propId
.
equals
(
this
.
propId
))
{
return
true
;
}
}
return
false
;
}
@Override
public
int
hashCode
()
{
return
propId
.
hashCode
();
}
}
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductProps.java
浏览文件 @
edd3e9e1
package
com
.
diaoyun
.
zion
.
chinafrica
.
vo
;
import
java.util.
Lis
t
;
import
java.util.
Se
t
;
/**
* 商品属性list
*/
@Deprecated
public
class
ProductProps
{
//属性名 比如颜色
private
String
name
;
//翻译
private
String
translate
;
//商品属性
private
List
<
ProductProp
>
prop
;
private
Set
<
ProductProp
>
propSet
;
public
String
getName
()
{
return
name
;
...
...
@@ -29,11 +30,11 @@ public class ProductProps {
this
.
translate
=
translate
;
}
public
List
<
ProductProp
>
getProp
()
{
return
prop
;
public
Set
<
ProductProp
>
getPropSet
()
{
return
prop
Set
;
}
public
void
setProp
(
List
<
ProductProp
>
prop
)
{
this
.
prop
=
prop
;
public
void
setProp
Set
(
Set
<
ProductProp
>
propSet
)
{
this
.
prop
Set
=
propSet
;
}
}
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductResponse.java
浏览文件 @
edd3e9e1
package
com
.
diaoyun
.
zion
.
chinafrica
.
vo
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Set
;
/**
* 爬取数据后,返回页面的商品详情数据
...
...
@@ -8,36 +10,59 @@ import java.util.List;
public
class
ProductResponse
{
//原始价格 有优惠的话还有优惠价
private
List
<
OriginalPrice
>
originalPrice
;
private
List
<
OriginalPrice
>
originalPriceList
;
//是否包促销价格 true 有促销价格,false\null没有促销价格
private
boolean
promotionFlag
;
//促销价格
private
List
<
ProductPromotion
>
promotion
;
//
一口价,就是商品一开始展示的价格,比如多sku
的情况下展示 18.80-49.90
private
List
<
ProductPromotion
>
promotion
List
;
//
原价一口价,就是商品一开始展示的价格,比如多sku多价格
的情况下展示 18.80-49.90
private
String
price
;
//促销一口价
private
String
salePrice
;
//是否包含库存信息 有些商品没有库存信息,可以当作是有货 true 有库存信息,false没有
private
boolean
stockFlag
;
//库存
private
DynStock
dynStock
;
//是否包含商品属性,有些商品没有属性
private
boolean
propFlag
;
//商品属性
private
List
<
ProductProps
>
propLis
t
;
//商品属性
颜色:红色,蓝色;尺码:S,l,M
private
Map
<
String
,
Set
<
ProductProp
>>
productPropSe
t
;
//商品信息
private
ItemInfo
itemInfo
;
//商品来源平台 PlatformEnum
private
String
platform
;
public
List
<
OriginalPrice
>
getOriginalPrice
()
{
return
originalPrice
;
public
boolean
isStockFlag
()
{
return
stockFlag
;
}
public
void
setStockFlag
(
boolean
stockFlag
)
{
this
.
stockFlag
=
stockFlag
;
}
public
List
<
OriginalPrice
>
getOriginalPriceList
()
{
return
originalPriceList
;
}
public
void
setOriginalPriceList
(
List
<
OriginalPrice
>
originalPriceList
)
{
this
.
originalPriceList
=
originalPriceList
;
}
public
void
setOriginalPrice
(
List
<
OriginalPrice
>
originalPrice
)
{
this
.
originalPrice
=
originalPrice
;
public
List
<
ProductPromotion
>
getPromotionList
(
)
{
return
promotionList
;
}
public
List
<
ProductPromotion
>
getPromotion
()
{
return
promotion
;
public
String
getSalePrice
()
{
return
salePrice
;
}
public
void
setPromotion
(
List
<
ProductPromotion
>
promotion
)
{
this
.
promotion
=
promotion
;
public
void
setSalePrice
(
String
salePrice
)
{
this
.
salePrice
=
salePrice
;
}
public
void
setPromotionList
(
List
<
ProductPromotion
>
promotionList
)
{
this
.
promotionList
=
promotionList
;
}
public
String
getPrice
()
{
...
...
@@ -64,12 +89,12 @@ public class ProductResponse {
this
.
propFlag
=
propFlag
;
}
public
List
<
ProductProps
>
getPropLis
t
()
{
return
pro
pLis
t
;
public
Map
<
String
,
Set
<
ProductProp
>>
getProductPropSe
t
()
{
return
pro
ductPropSe
t
;
}
public
void
setPro
pList
(
List
<
ProductProps
>
propLis
t
)
{
this
.
pro
pList
=
propLis
t
;
public
void
setPro
ductPropSet
(
Map
<
String
,
Set
<
ProductProp
>>
productPropSe
t
)
{
this
.
pro
ductPropSet
=
productPropSe
t
;
}
public
ItemInfo
getItemInfo
()
{
...
...
@@ -87,4 +112,12 @@ public class ProductResponse {
public
void
setPlatform
(
String
platform
)
{
this
.
platform
=
platform
;
}
public
boolean
isPromotionFlag
()
{
return
promotionFlag
;
}
public
void
setPromotionFlag
(
boolean
promotionFlag
)
{
this
.
promotionFlag
=
promotionFlag
;
}
}
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductSku.java
→
src/main/java/com/diaoyun/zion/chinafrica/vo/ProductSku
Stock
.java
浏览文件 @
edd3e9e1
...
...
@@ -3,11 +3,11 @@ package com.diaoyun.zion.chinafrica.vo;
/**
* sku 库存
*/
public
class
ProductSku
{
//sku
拼接的字符串
;1627207:425613015;
public
class
ProductSku
Stock
{
//sku
id标识
;1627207:425613015;
private
String
skuStr
;
//可销售库存数量
private
String
sellableQuantity
;
private
int
sellableQuantity
;
public
String
getSkuStr
()
{
return
skuStr
;
...
...
@@ -17,11 +17,11 @@ public class ProductSku {
this
.
skuStr
=
skuStr
;
}
public
String
getSellableQuantity
()
{
public
int
getSellableQuantity
()
{
return
sellableQuantity
;
}
public
void
setSellableQuantity
(
String
sellableQuantity
)
{
public
void
setSellableQuantity
(
int
sellableQuantity
)
{
this
.
sellableQuantity
=
sellableQuantity
;
}
}
src/main/java/com/diaoyun/zion/master/util/HttpClientUtil.java
浏览文件 @
edd3e9e1
...
...
@@ -22,12 +22,12 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.URI
;
import
java.net.URISyntaxException
;
import
java.net.URL
;
import
java.io.UnsupportedEncodingException
;
import
java.net.*
;
import
java.nio.charset.Charset
;
import
java.util.*
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
public
class
HttpClientUtil
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClientUtil
.
class
);
...
...
@@ -40,6 +40,7 @@ public class HttpClientUtil {
* @throws IOException
*/
public
static
String
getContentByUrl
(
String
sourceUrl
,
String
sourceType
)
throws
URISyntaxException
,
IOException
{
sourceUrl
=
urlEncode
(
sourceUrl
,
Consts
.
UTF_8
.
name
());
URL
url
=
new
URL
(
sourceUrl
);
//构建URI
URI
uri
=
new
URI
(
url
.
getProtocol
(),
url
.
getHost
(),
url
.
getPath
(),
url
.
getQuery
(),
null
);
...
...
@@ -235,4 +236,25 @@ public class HttpClientUtil {
sibClient
.
close
();
return
sibContent
;
}
/**
* 对链接进行url编码
* @param url
* @param chartSet
* @return
*/
public
static
String
urlEncode
(
String
url
,
String
chartSet
)
{
try
{
Matcher
matcher
=
Pattern
.
compile
(
"[^\\x00-\\xff]"
).
matcher
(
url
);
//双字节,包括中文和中文符号[^\x00-\xff] 中文[\u4e00-\u9fa5]
while
(
matcher
.
find
())
{
String
tmp
=
matcher
.
group
();
url
=
url
.
replaceAll
(
tmp
,
java
.
net
.
URLEncoder
.
encode
(
tmp
,
chartSet
));
}
}
catch
(
UnsupportedEncodingException
e
)
{
logger
.
error
(
"双字节编码异常:"
,
e
);
}
return
url
;
}
}
src/main/java/com/diaoyun/zion/master/util/JsoupUtil.java
浏览文件 @
edd3e9e1
...
...
@@ -33,7 +33,7 @@ public class JsoupUtil {
String
varArr
[]
=
configGroup
.
split
(
";"
);
for
(
String
variable
:
varArr
)
{
//获取g_config 变量
Pattern
variablePattern
=
Pattern
.
compile
(
"(
var){1,1}\\s+(
g_config){1,1}\\s+={1,1}[\\s\\S]*"
);
// Regex for the value of the key
Pattern
variablePattern
=
Pattern
.
compile
(
"(g_config){1,1}\\s+={1,1}[\\s\\S]*"
);
// Regex for the value of the key
Matcher
varMatcher
=
variablePattern
.
matcher
(
variable
);
while
(
varMatcher
.
find
())
{
String
configStr
=
varMatcher
.
group
();
...
...
@@ -86,7 +86,7 @@ public class JsoupUtil {
for
(
DataNode
dataNode
:
element
.
dataNodes
())
{
String
dataStr
=
dataNode
.
getWholeData
();
//获取带有 g_config 变量的 script 标签
Pattern
p
=
Pattern
.
compile
(
"(
var){1,1}\\s+("
+
variableName
+
"){1,1}\\s+
={1,1}[\\s\\S]*(;){1,1}"
);
// Regex for the value of the key
Pattern
p
=
Pattern
.
compile
(
"(
"
+
variableName
+
"){1,1}\\s*
={1,1}[\\s\\S]*(;){1,1}"
);
// Regex for the value of the key
Matcher
m
=
p
.
matcher
(
dataStr
);
// you have to use html here and NOT text! Text will drop the 'key' part
while
((
m
.
find
()))
{
//System.out.println(m.group());
...
...
@@ -210,33 +210,20 @@ public class JsoupUtil {
}
/**
* 获取
天猫商品详情 手机端的,手机端在香港会返回与大陆不一样的页面信息
* 获取
变量的值
*
* @param content
* @return
*/
public
static
JSONObject
getTmItemDetail
(
String
content
)
{
String
variableName
=
"_DATA_Detail"
;
public
static
JSONObject
getItemDetailByName
(
String
content
,
String
variableName
)
{
String
detailStr
=
getScriptContent
(
content
,
variableName
);
//Map<String, String> returnMap = new HashMap<>();
int
firstBrackets
=
detailStr
.
indexOf
(
"{"
);
int
lastbrackets
=
detailStr
.
lastIndexOf
(
"}"
);
detailStr
=
detailStr
.
substring
(
firstBrackets
,
lastbrackets
+
1
);
JSONObject
dataMap
=
JSONObject
.
fromObject
(
detailStr
);
return
dataMap
;
}
/**
* 获取天猫商品详情
*
* @param content
* @return
*/
/* public static JSONObject getTmItemDetail(String content) {
//String variableName = "TShop.Setup";
String detailStr = getTmScriptContent(content);
JSONObject dataMap= JSONObject.fromObject(detailStr);
return dataMap;
}*/
}
src/main/java/com/diaoyun/zion/master/util/SpiderUtil.java
0 → 100644
浏览文件 @
edd3e9e1
差异被折叠。
点击展开。
src/main/resources/static/js/angularjs/js/gap_data.js
0 → 100644
浏览文件 @
edd3e9e1
差异被折叠。
点击展开。
src/main/resources/static/js/angularjs/js/gap_part.js
0 → 100644
浏览文件 @
edd3e9e1
差异被折叠。
点击展开。
src/main/resources/static/js/angularjs/js/nike_data.js
0 → 100644
浏览文件 @
edd3e9e1
差异被折叠。
点击展开。
src/main/resources/static/js/angularjs/js/nike_part.js
0 → 100644
浏览文件 @
edd3e9e1
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论