#!/usr/bin/env python # coding: utf-8 # # **식품안전나라** # 자료들 중 **유효값 필드선정** 및 **데이터 전처리** # 1. **[인증key : "8acba1823ae742359560"](http://openapi.foodsafetykorea.go.kr/api/인증키/I0580/xml/1/20)** # 1. **[Open API 메인 페이지](https://www.foodsafetykorea.go.kr/api/userApiKey.do#)**, **[API 활용 방법](https://www.foodsafetykorea.go.kr/api/howToUseApi.do?menu_grp=MENU_GRP34&menu_no=687)**, **[API 이용절차](https://www.foodsafetykorea.go.kr/api/board/boardDetail.do)** # 1. **[전자식품보감](http://kof-agri.com/FoodKind/Details/C0121050100000#)**, **[영양사협회 영양소 섭취기준표](http://www.kns.or.kr/FileRoom/FileRoom_view.asp?mode=mod&restring=%252FFileRoom%252FFileRoom.asp%253Fxsearch%253D0%253D%253Dxrow%253D10%253D%253DBoardID%253DKdr%253D%253Dpage%253D1&idx=79&page=1&BoardID=Kdr&xsearch=1&cn_search)** # In[1]: import pandas as pd from tqdm import tqdm from momukji import FoodSafe [(_, FoodSafe().apiKey[_]['name']) for _ in FoodSafe().apiKey.keys()] # ## **1 회수 및 판매중지 목록** # **'I0490' :** '회수판매중지' # In[3]: get_ipython().run_cell_magic('time', '', 'foodId = \'I0490\' # 회수제품 데이터 정보 (대략 500개 이내)\n# data = FoodSafe().getData(foodId, 1, 500, \n# FoodSafe().apiKey[foodId][\'cols\'], display=True)\n# data.등록일 = [str(_.date()) for _ in pd.DatetimeIndex(data.등록일)]\n# data.to_csv("data/food_recall.csv", index=None)\ndf_recall = pd.read_csv("data/food_recall.csv")\ndf_recall.head(2)\n') # ## **2 식품 영양정보 DB** # **'I0750' :** '식품영양정보DB' # 1. 13,824개의 정보를 제공 # 1. 식재료 및 식품관련 DB를 전처리 및 가공이 필요 # In[20]: get_ipython().run_cell_magic('time', '', "result, foodId = [], 'I0750' # 식품 영양정보\n# _ = FoodSafe().getData(foodId, 1, 2, FoodSafe().apiKey[foodId]['cols'], display=True)\n# for _ in tqdm(range(1, 13824+1, 1000)):\n# result.append(FoodSafe().getData(foodId, _, _+999, FoodSafe().apiKey[foodId]['cols'])) \n# pd.concat(result).to_excel('data/food_nutrient.xls', index=None)\ndf_db = pd.read_csv('data/food_nutrient.csv')\nprint(len(set(df_db.식품군)), len(set(df_db.식품코드)), list(set(df_db.식품군))[::20])\nprint(df_db.head(1))\n") # ## **3 조리식품_레시피_DB** # **'COOKRCP01' :** '조리식품_레시피_DB' # In[21]: get_ipython().run_cell_magic('time', '', "result, foodId = [], 'COOKRCP01' # 레시피 데이터\n# _ = FoodSafe().getData(foodId, 1, 2, FoodSafe().apiKey[foodId]['cols'], display=True)\n# for _ in tqdm(range(1, 1500+1, 1000)):\n# result.append(FoodSafe().getData(foodId, _, _+999, FoodSafe().apiKey[foodId]['cols']))\n# pd.concat(result).to_excel('data/food_recipe_info.xls', index=None)\ndf_recipe = pd.read_csv('data/food_recipe.csv')\ndf_recipe.head(2)\n") # ## **4 유통제품 바코드 정보** # **'I2570' :** '유통바코드' # In[5]: get_ipython().run_cell_magic('time', '', "result, foodId = [], 'C005' # 제품 바코드 번호\n_ = FoodSafe().getData(foodId, 1, 2, FoodSafe().apiKey[foodId]['cols'], display=True)\n# for _ in tqdm(range(1, 100200+1, 1000)):\n# from collections import OrderedDict\n# for _ in tqdm(range(1, 100200+1, 1000)):\n# result.append(FoodSafe().getData(foodId, _, _+999, \n# FoodSafe().apiKey[foodId]['cols']).loc[:,\n# list(OrderedDict(FoodSafe().apiKey['C005']['cols']).values())])\n# pd.concat(result).to_csv('data/food_barcode.csv', index=None)\n# pd.concat(result).to_excel('data/food_barcode.xls', index=None)\n") # In[6]: get_ipython().run_cell_magic('time', '', 'result, foodId = [], "I2570" # 유통 바코드\n_ = FoodSafe().getData(foodId, 1, 2, FoodSafe().apiKey[foodId][\'cols\'], display=True)\n# for _ in tqdm(range(1, 49000+1, 1000)):\n# result.append(FoodSafe().getData(foodId, _, _+999, FoodSafe().apiKey[foodId][\'cols\']))\n# pd.concat(result).to_excel(\'data/food_barcode_info.xls\', index=None)\n') # In[9]: df_temp = pd.read_csv("data/food_nutrient.csv") df_temp.head(3) #
# # # **기타 공산품 가격정보** # # **[Data.go.kr 공산품 가격정보](https://www.data.go.kr/dataset/3043385/openapi.do?lang=ko)** # ## **1 공산품 Id 정보수집** # **[Data.go.kr 공산품 가격정보](https://www.data.go.kr/dataset/3043385/openapi.do?lang=ko)** # 1. **[한국소비자원 참가격](https://www.price.go.kr/tprice/portal/pricenewsandtpriceintro/iteminfo/getItemList.do)** # 1. **[유통상품지식뱅크](http://35.200.32.201/)** # 1. 분류기준 명확하게 찾기 # In[3]: from momukji import Product item_list = Product().getList() item_list.shape