{ "cells": [ { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "# 1 Convertir los json.zip en csv para analizarlos con Tableau" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Descomprimir el fichero tar\n", "- Descomprimir uno de los ficheros\n", "- Generar un csv (comma sepparated values)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sys import argv\n", "import warnings\n", "import gzip\n", "import json\n", "import tarfile\n", "\n", "\n", "data = []\n", "\n", "tar = tarfile.open('Software.tar.gz',\"r:gz\")\n", "tar.extractall()\n", "tar.close()\n", "with gzip.open('Software/Software_5.json.gz', 'r') as f:\n", " for l in f:\n", " data.append(json.loads(l))\n", "\n", "df = pd.DataFrame(data)\n", "df.to_csv('Software_5.csv', index=True) " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
overallverifiedreviewTimereviewerIDasinstylereviewerNamereviewTextsummaryunixReviewTimevoteimage
04.0False10 20, 2010A38NELQT98S4H80321719816{'Format:': ' DVD-ROM'}WB HalperI've been using Dreamweaver (and it's predeces...A solid overview of Dreamweaver CS51287532800NaNNaN
14.0False10 18, 2010A3QJU4FEN8PQSZ0321719816{'Format:': ' DVD-ROM'}GrimmyThe demo is done with the PC version, with ref...A good value1287360000NaNNaN
25.0False10 16, 2010ACJT8MUC0LRF00321719816{'Format:': ' DVD-ROM'}D. FowlerIf you've been wanting to learn how to create ...This is excellent software for those who want ...12871872003NaN
\n", "
" ], "text/plain": [ " overall verified reviewTime reviewerID asin \\\n", "0 4.0 False 10 20, 2010 A38NELQT98S4H8 0321719816 \n", "1 4.0 False 10 18, 2010 A3QJU4FEN8PQSZ 0321719816 \n", "2 5.0 False 10 16, 2010 ACJT8MUC0LRF0 0321719816 \n", "\n", " style reviewerName \\\n", "0 {'Format:': ' DVD-ROM'} WB Halper \n", "1 {'Format:': ' DVD-ROM'} Grimmy \n", "2 {'Format:': ' DVD-ROM'} D. Fowler \n", "\n", " reviewText \\\n", "0 I've been using Dreamweaver (and it's predeces... \n", "1 The demo is done with the PC version, with ref... \n", "2 If you've been wanting to learn how to create ... \n", "\n", " summary unixReviewTime vote \\\n", "0 A solid overview of Dreamweaver CS5 1287532800 NaN \n", "1 A good value 1287360000 NaN \n", "2 This is excellent software for those who want ... 1287187200 3 \n", "\n", " image \n", "0 NaN \n", "1 NaN \n", "2 NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(3)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.info" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
overallunixReviewTime
count12805.0000001.280500e+04
mean3.8778601.350001e+09
std1.3620861.017569e+08
min1.0000009.619776e+08
25%3.0000001.266365e+09
50%4.0000001.371686e+09
75%5.0000001.427328e+09
max5.0000001.535242e+09
\n", "
" ], "text/plain": [ " overall unixReviewTime\n", "count 12805.000000 1.280500e+04\n", "mean 3.877860 1.350001e+09\n", "std 1.362086 1.017569e+08\n", "min 1.000000 9.619776e+08\n", "25% 3.000000 1.266365e+09\n", "50% 4.000000 1.371686e+09\n", "75% 5.000000 1.427328e+09\n", "max 5.000000 1.535242e+09" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12805" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['overall'].count()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.8778602108551348" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['overall'].mean()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.3620857242805136" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['overall'].std()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
overallverifiedreviewTimereviewerIDasinstylereviewerNamereviewTextsummaryunixReviewTimevoteimage
25.0False10 16, 2010ACJT8MUC0LRF00321719816{'Format:': ' DVD-ROM'}D. FowlerIf you've been wanting to learn how to create ...This is excellent software for those who want ...12871872003NaN
35.0False10 12, 2010AYUF7YETYOLNX0321719816{'Format:': ' DVD-ROM'}Bryan NewmanI've been creating websites with Dreamweaver f...A Fantastic Overview of Dream Weaver and Web D...1286841600NaNNaN
45.0False10 7, 2010A31ICLWQ9CSHRS0321719816{'Format:': ' DVD-ROM'}Al SwansonI decided (after trying a number of other prod...Excellent Tutorials!1286409600NaNNaN
55.0False09 26, 2010A2BVNVJOFXGZUB0321719816{'Format:': ' DVD-ROM'}J. HowardThe video is well-paced and delivered in an un...Excellent.1285459200NaNNaN
65.0False04 7, 2011A2JMJVNTBL7K7E0321719816{'Format:': ' DVD-ROM'}Yesuaini99I spent several hours on the lesson and I love...excellent video training material1302134400NaNNaN
.......................................
127855.0True04 6, 2018A2ONJRZVX2MLVEB01617VO2S{'Platform:': ' PC Download'}Angela-Clare PollardEasy to file your taxes correctly.Five Stars1522972800NaNNaN
127875.0True04 6, 2018A2ONJRZVX2MLVEB01637RHBI{'Platform:': ' PC Download'}Angela-Clare PollardEasy to file your taxes correctly.Five Stars1522972800NaNNaN
127885.0True06 20, 2018A3RNXWG0J64Z9ZB0169RGE7U{'Platform:': ' PC Disc'}LJworks great, easy to capture videos and saveVideo Capture Software1529452800NaNNaN
127925.0True03 1, 2018A3PGN4ZXMQRSYHB01DEG0SGCNaNAmazon CustomerArrived as described. I was pleased with purc...I was pleased with1519862400NaNNaN
127995.0False07 17, 2016A5U5T6EWH90O0B01FFVDY9M{'Platform:': ' Key Card'}LauriI am a total amateur when it comes to editing,...Works great for my purposes!1468713600NaNNaN
\n", "

5972 rows × 12 columns

\n", "
" ], "text/plain": [ " overall verified reviewTime reviewerID asin \\\n", "2 5.0 False 10 16, 2010 ACJT8MUC0LRF0 0321719816 \n", "3 5.0 False 10 12, 2010 AYUF7YETYOLNX 0321719816 \n", "4 5.0 False 10 7, 2010 A31ICLWQ9CSHRS 0321719816 \n", "5 5.0 False 09 26, 2010 A2BVNVJOFXGZUB 0321719816 \n", "6 5.0 False 04 7, 2011 A2JMJVNTBL7K7E 0321719816 \n", "... ... ... ... ... ... \n", "12785 5.0 True 04 6, 2018 A2ONJRZVX2MLVE B01617VO2S \n", "12787 5.0 True 04 6, 2018 A2ONJRZVX2MLVE B01637RHBI \n", "12788 5.0 True 06 20, 2018 A3RNXWG0J64Z9Z B0169RGE7U \n", "12792 5.0 True 03 1, 2018 A3PGN4ZXMQRSYH B01DEG0SGC \n", "12799 5.0 False 07 17, 2016 A5U5T6EWH90O0 B01FFVDY9M \n", "\n", " style reviewerName \\\n", "2 {'Format:': ' DVD-ROM'} D. Fowler \n", "3 {'Format:': ' DVD-ROM'} Bryan Newman \n", "4 {'Format:': ' DVD-ROM'} Al Swanson \n", "5 {'Format:': ' DVD-ROM'} J. Howard \n", "6 {'Format:': ' DVD-ROM'} Yesuaini99 \n", "... ... ... \n", "12785 {'Platform:': ' PC Download'} Angela-Clare Pollard \n", "12787 {'Platform:': ' PC Download'} Angela-Clare Pollard \n", "12788 {'Platform:': ' PC Disc'} LJ \n", "12792 NaN Amazon Customer \n", "12799 {'Platform:': ' Key Card'} Lauri \n", "\n", " reviewText \\\n", "2 If you've been wanting to learn how to create ... \n", "3 I've been creating websites with Dreamweaver f... \n", "4 I decided (after trying a number of other prod... \n", "5 The video is well-paced and delivered in an un... \n", "6 I spent several hours on the lesson and I love... \n", "... ... \n", "12785 Easy to file your taxes correctly. \n", "12787 Easy to file your taxes correctly. \n", "12788 works great, easy to capture videos and save \n", "12792 Arrived as described. I was pleased with purc... \n", "12799 I am a total amateur when it comes to editing,... \n", "\n", " summary unixReviewTime vote \\\n", "2 This is excellent software for those who want ... 1287187200 3 \n", "3 A Fantastic Overview of Dream Weaver and Web D... 1286841600 NaN \n", "4 Excellent Tutorials! 1286409600 NaN \n", "5 Excellent. 1285459200 NaN \n", "6 excellent video training material 1302134400 NaN \n", "... ... ... ... \n", "12785 Five Stars 1522972800 NaN \n", "12787 Five Stars 1522972800 NaN \n", "12788 Video Capture Software 1529452800 NaN \n", "12792 I was pleased with 1519862400 NaN \n", "12799 Works great for my purposes! 1468713600 NaN \n", "\n", " image \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "5 NaN \n", "6 NaN \n", "... ... \n", "12785 NaN \n", "12787 NaN \n", "12788 NaN \n", "12792 NaN \n", "12799 NaN \n", "\n", "[5972 rows x 12 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['overall'] == 5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": false, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "339px", "left": "1098px", "top": "216.141px", "width": "159px" }, "toc_section_display": false, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }