Get content inside of script tag

做~自己de王妃 提交于 2021-02-19 03:57:22

问题


Hello everyone I'm trying to fetch content inside of script tag.

http://www.teknosa.com/urunler/145051447/samsung-hm1500-bluetooth-kulaklik

this is the website.

Also this is script tag which I want to enter inside.

$.Teknosa.ProductDetail = {"ProductComputedIndex":145051447,"ProductName":"SAMSUNG HM1500 BLUETOOTH KULAKLIK","ProductSeoName":"samsung-hm1500-bluetooth-kulaklik","ProductBarcode":"8808993790425","ProductPriceInclTax":79.9,"ProductDiscountedPriceInclTax":null,"ProductStockQuantity":1,"ProductMinStockQuantity":null,"ProductShortDescription":null,"ProductFullDescription":null,"ProductModelName":"HM1500","ProductAdminComment":null,"ProductMetaTitle":null,"ProductMetaKeywords":null,"ProductMetaDescription":null,"ProductBrandId":299,"ProductBrandName":"SAMSUNG","ProductBrandImageName":"//img-teknosa.mncdn.com/StaticContent/images/Brand/SAMSUNG-medium.png","ProductCommentCout":29,"ProductQuestionAnswerCout":0,"ProductRatingStar":4,"ProductType":1,"ProductOriginalComputedIndex":null,"ProductIsSolo":false,"ProductIsClickCollect":true,"ProductStoreStockAmount":1,"ProductGroupDisplayName":null,"ProductOrigin":"PRC","ProductIsTss":false,"ProductIsKit":false,"AddBasketButtonType":0,"ProductViewType":0,"ProductDetailDefaultPicture":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductRatingStarText":"Çok İyi","ProductPrice":"79,9","IsThereOutletProduct":false,"ProductIsActiveProductOriginal":false,"ProductErpCatalogCode":"_TELEKOM","ProductErpCategoryCode":"_BLUETOOTH_KULAKLIKLAR1636","ProductCategory":{"CategoryName":"Bluetooth Kulaklık ve Kit","CategorySeoName":"bluetooth-kulaklik-ve-kit","CategoryDescription":null,"CategoryParentId":134,"CategoryLevel":2,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon Aksesuarları","CategorySeoName":"telefon-aksesuarlari","CategoryDescription":null,"CategoryParentId":108,"CategoryLevel":1,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon","CategorySeoName":"telefon","CategoryDescription":null,"CategoryParentId":null,"CategoryLevel":0,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":null,"DisplayOrder":6,"StatusId":100110,"StartDate":"\/Date(1434351061000)\/","EndDate":null,"Id":108},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351245000)\/","EndDate":null,"Id":134},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351367000)\/","EndDate":null,"Id":173},"ProductDetailPictures":[{"ProductPictureName":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":1,"ProductPictureIsDefault":true},{"ProductPictureName":"145051447-2-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":2,"ProductPictureIsDefault":false}],"ProductDetailAttributes":[{"Key":"Ağırlık","Value":"18.1","UnitItemName":"gr","ProductAttributeDisplayOrder":0,"DisplayOrder":2,"Description":null},{"Key":"Model","Value":"HM1500","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":4,"Description":null},{"Key":"Şarj Kullanım Süresi","Value":"2 Saat","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":80,"Description":null},{"Key":"Bekleme Süresi (Saat)","Value":"250 Saat (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":116,"Description":null},{"Key":"Kullanım Mesafesi","Value":"10 m. (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":145,"Description":null},{"Key":"Bluetooth Profili","Value":"HSP (Kulaklık), HFP (Ahizesiz)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":149,"Description":null}],"ProductSuggestions":[],"ProductContents":[],"ProductKitItems":[],"ProductVideos":[],"ProductGroups":[],"ProductBadges":[{"BadgeItemBadgeId":7,"BadgeItemApplicationId":1,"BadgeItemText":null,"BadgeItemImageName":"//img-teknosa.mncdn.com/StaticContent/images/Badge/ucretsiz-kargo.png","BadgeItemDescription":null,"BadgeItemPagePosition":"ImageBottom","BadgeItemImagePosition":null,"BadgeItemDisplayView":"ProductDetail","BadgeItemType":"Image","BadgeItemDynamicType":"WebStock","BadgeItemDynamicTypeText1":null,"BadgeItemDynamicTypeText2":null,"BadgeItemDynamicTypeCalculationType":null,"BadgeItemDynamicTypeDisplayType":null,"BadgeItemEvaluationExpression":null,"BadgeItemClassName":null,"DisplayOrder":0,"StatusId":100110,"StartDate":"\/Date(1474440397000)\/","EndDate":null,"Id":5}],"DisplayOrder":1000,"StatusId":100110,"StartDate":"\/Date(1429000863000)\/","EndDate":null,"Id":4715};

And I tried this.

yield scrapy.Request(response.urljoin(url), callback = self.parseProduct, meta={
                                'splash': {
                                 'endpoint': 'render.html',
                                 'args': {'wait': 0.09}},
                                'url': url
                            })
 def parseProduct(self, response):
    data_bundles = {}
            script = response.xpath('/html/body/div[1]/div[2]/script[2]/text()').extract_first()
            print script
            jstree = js2xml.parse(script)
            for a in jstree.xpath('//assign[left//property/identifier/@name="$.Teknosa.ProductDetail" and right/object]'):
                bundle_prop = a.xpath('./left/bracketaccessor/property/string/text()')
                print bundle_prop
                if bundle_prop is not None:
                    curr_prop = bundle_prop[0]
                data_bundles[curr_prop] = {}

Thanks for your help.


回答1:


This should do it:

response.xpath("//script[re:test(text(),'Teknosa.ProductDetail =','i')]").extract()

You can select script tag that contains "Teknosa.ProductDetails =" in it's text.

Edit: If you want to load up javascript dictionary from script you need to extract text from the script and you can simply load it up with python's json module.

xp = "//script[re:test(text(),'Teknosa.ProductDetail =','i')]/text()"
data = response.xpath(xp).re(" = (\{.+\})")[0]
import json
data = json.loads(data)
print(data['ProductBarcode'])
> '8808993790425'


来源:https://stackoverflow.com/questions/41333902/get-content-inside-of-script-tag

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!