package com.diaoyun.zion.chinafrica.bis.impl;

import com.diaoyun.zion.chinafrica.bis.IItemSpider;
import com.diaoyun.zion.master.bo.TencentTranslateParam;
import com.diaoyun.zion.master.thread.TaskLimitSemaphore;
import com.diaoyun.zion.master.thread.TranslateCallable;
import com.diaoyun.zion.master.util.HttpClientUtil;
import com.diaoyun.zion.master.util.JsoupUtil;
import com.diaoyun.zion.master.util.ValidateUtils;
import net.sf.json.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;

/**
 * 淘宝网数据爬虫
 */
@Component("tbItemSpider")
public class TbItemSpider implements IItemSpider {
    private static Logger logger = LoggerFactory.getLogger(TbItemSpider.class);

    //默认20条线程跑翻译
    private final TaskLimitSemaphore taskLimitSemaphore=new TaskLimitSemaphore(20);


    @Override
    public Map<String, Object> captureItem(String targetUrl) throws URISyntaxException, IOException, ExecutionException, InterruptedException {
        List<Map<String, Object>> futureList= new ArrayList<>();
        //获取url中的网页内容
        String content = HttpClientUtil.getContentByUrl(targetUrl);
        //获取商品相关信息，比如详情url
        Map<String,String> infoMap= JsoupUtil.getUsefulInfo(content);
        String usableSibUrl="https:"+infoMap.get("sibUrl");
        //解析商品sku信息
        Map<String,Object> propMap=JsoupUtil.getPropMap(content);

        //调用腾讯ai，翻译规格
        translateProp(futureList,propMap);

        /* ****************获取商品详情******************* */
        //删除需要登录的模块
        usableSibUrl=deleteLoginModule(usableSibUrl);
        //需要加入头部
        BasicHeader basicHeader=new BasicHeader("referer", targetUrl);
        String sibContent = HttpClientUtil.getContentByUrl(usableSibUrl,basicHeader);
        //unicode 解码
        sibContent= StringEscapeUtils.unescapeJava(sibContent);

        //logger.info(sibMap.toString());
        //从请求结果中获取Cookie，此时的Cookie已经带有登录信息了
        //CookieStore store = httpClientContext.getCookieStore();
        /*Long c=System.currentTimeMillis();
        logger.info("获取详情时间(毫秒)："+(c-b));*/

        //等待翻译结果
        for(Map<String,Object> futureMap:futureList) {
            Future<Map<String,Object>> future= (Future<Map<String, Object>>) futureMap.get("future");
            Map<String,Object> valeMap = (Map<String,Object> ) futureMap.get("value");
            while(!future.isDone());//Future返回如果没有完成，则一直循环等待，直到Future返回完成
            Map<String,Object> resultMap=future.get();
            String targetText="unknow";
            if(resultMap!=null&&(int)resultMap.get("ret")==0) {
                Map<String,Object> dataMap=(Map<String,Object>)resultMap.get("data");
                targetText= (String) dataMap.get("target_text");
                valeMap.put("translate",targetText);
            } else {
                logger.error("翻译出错");
            }
        }
        //关闭线程池 不能关闭，否则下次调用不了
        //taskLimitSemaphore.shutdown();
       /* Long d=System.currentTimeMillis();
        logger.info("翻译时间(毫秒)："+(d-c));

        logger.info("爬取数据总共耗费时间(毫秒)："+(d-a));*/
        Map sibMap= JSONObject.fromObject(sibContent);
        sibMap.put("Jprop",propMap);
        sibMap.put("itemProp",infoMap);
        return sibMap;
    }



    /**
     * 翻译规格属性
     * @param propMap 规格属性MAP
     * @throws ExecutionException
     * @throws InterruptedException
     */
    private void translateProp(List<Map<String, Object>> futureList,Map<String, Object> propMap) throws ExecutionException, InterruptedException {
        /*腾讯翻译*/
        for(Map.Entry<String,Object>entry : propMap.entrySet()) {
            String key=entry.getKey();
            Map <String,Object> value= (Map<String, Object>) entry.getValue();
            //翻译属性名
            if(ValidateUtils.isChinese(key)) {
                translateText(futureList,value,key);
            }
            //翻译sku title
            if(value!=null&&value.size()>0) {
                translateTitle(futureList,value);
            }
        }
    }

    /**
     * 翻译规格说明
     * @param skuMap
     * @throws ExecutionException
     * @throws InterruptedException
     */
    private void translateTitle(List<Map<String, Object>> futureList,Map<String, Object> skuMap) throws ExecutionException, InterruptedException {
        for(Map.Entry<String,Object>entry : skuMap.entrySet()) {
            String key=entry.getKey();
            if(entry.getValue() instanceof Map) {
                Map<String, Object> value = (Map<String, Object>) entry.getValue();
                String title = (String) value.get("title");
                if (StringUtils.isNotBlank(title)) {
                    //翻译属性名
                    if (ValidateUtils.isContainChinese(title)) {
                        translateText(futureList,value,title);
                        //value.put("translate", translate);
                    }
                }
            }
        }
    }

    /**
     * 翻译文本
     * @param valeMap
     * @param text
     * @throws ExecutionException
     * @throws InterruptedException
     */
    private void translateText(List<Map<String, Object>> futureList,Map<String,Object> valeMap,String text) throws ExecutionException, InterruptedException {
        TencentTranslateParam tencentTranslateParam =new TencentTranslateParam(text);
        Future<Map<String,Object>> future = null;
        try {
            future = taskLimitSemaphore.submit(new TranslateCallable(tencentTranslateParam));
            Map<String,Object> map=new HashMap<>();
            map.put("future",future);
            map.put("value",valeMap);
            futureList.add(map);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 去除需要登录或者不需要返回的参数
     * @param usableSibUrl
     * @return
     */
    private String deleteLoginModule(String usableSibUrl) {
        usableSibUrl=usableSibUrl.replaceAll("couponActivity,","");
        usableSibUrl=usableSibUrl.replaceAll("soldQuantity,","");
        usableSibUrl=usableSibUrl.replaceAll("tradeContract,","");
        usableSibUrl=usableSibUrl.replaceAll("upp,","");
        // TODO 运费格式有问题，暂去除
        usableSibUrl=usableSibUrl.replaceAll("deliveryFee,","");
        usableSibUrl=usableSibUrl.replaceAll("delivery,","");
        return usableSibUrl;
    }

}
