I am working on my Naver Crawler (its a Korea Google :P). I have working on this code for a week now, and I have one last task to solve! So my code below shows Data Crawling through Naver API and receiving data to "js" in each loop. All I need to do is combine each dataframe (dfdfdf) and combine at the bottom. But my result always shows the last looped data. Bottom line is that I want to add DataFrame for each loop that I am taking. I tried merge, join but it seems to be not working. Please let me know and if my code below does not make sense (or too dirty) let me know!
import os import sys import urllib.request import pandas as pd import json import numpy as np from datetime import datetime, timedelta import time ex = pd.ExcelFile('mat_hierarchy.xlsx').parse('Sheet1') DNA1 = [] #adding list to DNA DNA1.extend(ex.iloc[:,3]) DNA1.extend(ex.iloc[:,2]) seen = set() DNA = [] for item in DNA1: if item not in seen: seen.add(item) DNA.append(item) # len(DNA) #Setting Date weekly or daily #dd = pd.date_range('2016-01-01',datetime.now().date() - timedelta(2)) dd = pd.date_range(start = '2016-01-01',end = datetime.now().date() - timedelta(2), freq = 'W-MON') setendDate = datetime.now().date() - timedelta(1) endDate = setendDate.strftime('%Y-%m-%d') #Setting DataFrame & List Data = pd.DataFrame(index=dd) #Naver API Connection client_id = "ID" client_secret = "PW" url = "https://openapi.naver.com/v1/datalab/search"; #Setting requests body_intro = "{\"startDate\":\"2016-01-01\",\"endDate\":\"" body_endDate = "\",\"timeUnit\":\"date\",\"keywordGroups\":[{\"groupName\":\"" body_keywords = "\",\"keywords\":[\"" body_groupName = "\"]},{\"groupName\":\"" body_last = "\"]}],\"ages\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\",\"11\"]}" df_list=[] for i in range(2270,len(DNA),5): if((len(DNA)%5==0) or (i < (len(DNA)-(len(DNA)%5)))): body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_groupName + DNA[i+4] + body_keywords + DNA[i+4] + body_last print("5") request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) request.add_header("Content-Type","application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if(rescode==200): response_body = response.read() js = response_body.decode('utf-8') else: print("Error Code:" + rescode) #checking empty values & append to df_list d = json.loads(js) lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']}) if len(r['data']) > 0 else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']]) for r in d['results']] df = pd.concat(lst, 1) dfdfdf = Data.join(df) df_list.append(dfdfdf) elif(len(DNA)%5==4): body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_last print("4") request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) request.add_header("Content-Type","application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if(rescode==200): response_body = response.read() js = response_body.decode('utf-8') else: print("Error Code:" + rescode) #checking empty values & append to df_list d = json.loads(js) lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']}) if len(r['data']) > 0 else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']]) for r in d['results']] df = pd.concat(lst, 1) dfdfdf = Data.join(df) df_list.append(dfdfdf) elif(len(DNA)%5==3): body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_last print("3") request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) request.add_header("Content-Type","application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if(rescode==200): response_body = response.read() js = response_body.decode('utf-8') else: print("Error Code:" + rescode) #checking empty values & append to df_list d = json.loads(js) lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']}) if len(r['data']) > 0 else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']]) for r in d['results']] df = pd.concat(lst, 1) dfdfdf = Data.join(df) df_list.append(dfdfdf) elif(len(DNA)%5==2): body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_last print("2") request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) request.add_header("Content-Type","application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if(rescode==200): response_body = response.read() js = response_body.decode('utf-8') else: print("Error Code:" + rescode) #checking empty values & append to df_list d = json.loads(js) lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']}) if len(r['data']) > 0 else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']]) for r in d['results']] df = pd.concat(lst, 1) dfdfdf = Data.join(df) df_list.append(dfdfdf) else: body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_last print("1") request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) request.add_header("Content-Type","application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if(rescode==200): response_body = response.read() js = response_body.decode('utf-8') else: print("Error Code:" + rescode) #checking empty values & append to df_list d = json.loads(js) lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']}) if len(r['data']) > 0 else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']]) for r in d['results']] df = pd.concat(lst, 1) dfdfdf = Data.join(df) df_list.append(dfdfdf) #Combining all Data #Naver = Data.join(dfdfdf) print("end") time.sleep(.5) Final = pd.concat(df_list, axis=1) Final.to_csv("Naver123.csv")