{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"# 1 Convertir los json.zip en csv para analizarlos con Tableau"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- Descomprimir el fichero tar\n",
"- Descomprimir uno de los ficheros\n",
"- Generar un csv (comma sepparated values)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sys import argv\n",
"import warnings\n",
"import gzip\n",
"import json\n",
"import tarfile\n",
"\n",
"\n",
"data = []\n",
"\n",
"tar = tarfile.open('Software.tar.gz',\"r:gz\")\n",
"tar.extractall()\n",
"tar.close()\n",
"with gzip.open('Software/Software_5.json.gz', 'r') as f:\n",
" for l in f:\n",
" data.append(json.loads(l))\n",
"\n",
"df = pd.DataFrame(data)\n",
"df.to_csv('Software_5.csv', index=True) "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" overall | \n",
" verified | \n",
" reviewTime | \n",
" reviewerID | \n",
" asin | \n",
" style | \n",
" reviewerName | \n",
" reviewText | \n",
" summary | \n",
" unixReviewTime | \n",
" vote | \n",
" image | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 4.0 | \n",
" False | \n",
" 10 20, 2010 | \n",
" A38NELQT98S4H8 | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" WB Halper | \n",
" I've been using Dreamweaver (and it's predeces... | \n",
" A solid overview of Dreamweaver CS5 | \n",
" 1287532800 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 4.0 | \n",
" False | \n",
" 10 18, 2010 | \n",
" A3QJU4FEN8PQSZ | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" Grimmy | \n",
" The demo is done with the PC version, with ref... | \n",
" A good value | \n",
" 1287360000 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 5.0 | \n",
" False | \n",
" 10 16, 2010 | \n",
" ACJT8MUC0LRF0 | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" D. Fowler | \n",
" If you've been wanting to learn how to create ... | \n",
" This is excellent software for those who want ... | \n",
" 1287187200 | \n",
" 3 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" overall verified reviewTime reviewerID asin \\\n",
"0 4.0 False 10 20, 2010 A38NELQT98S4H8 0321719816 \n",
"1 4.0 False 10 18, 2010 A3QJU4FEN8PQSZ 0321719816 \n",
"2 5.0 False 10 16, 2010 ACJT8MUC0LRF0 0321719816 \n",
"\n",
" style reviewerName \\\n",
"0 {'Format:': ' DVD-ROM'} WB Halper \n",
"1 {'Format:': ' DVD-ROM'} Grimmy \n",
"2 {'Format:': ' DVD-ROM'} D. Fowler \n",
"\n",
" reviewText \\\n",
"0 I've been using Dreamweaver (and it's predeces... \n",
"1 The demo is done with the PC version, with ref... \n",
"2 If you've been wanting to learn how to create ... \n",
"\n",
" summary unixReviewTime vote \\\n",
"0 A solid overview of Dreamweaver CS5 1287532800 NaN \n",
"1 A good value 1287360000 NaN \n",
"2 This is excellent software for those who want ... 1287187200 3 \n",
"\n",
" image \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.info"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" overall | \n",
" unixReviewTime | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 12805.000000 | \n",
" 1.280500e+04 | \n",
"
\n",
" \n",
" mean | \n",
" 3.877860 | \n",
" 1.350001e+09 | \n",
"
\n",
" \n",
" std | \n",
" 1.362086 | \n",
" 1.017569e+08 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 9.619776e+08 | \n",
"
\n",
" \n",
" 25% | \n",
" 3.000000 | \n",
" 1.266365e+09 | \n",
"
\n",
" \n",
" 50% | \n",
" 4.000000 | \n",
" 1.371686e+09 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.000000 | \n",
" 1.427328e+09 | \n",
"
\n",
" \n",
" max | \n",
" 5.000000 | \n",
" 1.535242e+09 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" overall unixReviewTime\n",
"count 12805.000000 1.280500e+04\n",
"mean 3.877860 1.350001e+09\n",
"std 1.362086 1.017569e+08\n",
"min 1.000000 9.619776e+08\n",
"25% 3.000000 1.266365e+09\n",
"50% 4.000000 1.371686e+09\n",
"75% 5.000000 1.427328e+09\n",
"max 5.000000 1.535242e+09"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12805"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['overall'].count()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.8778602108551348"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['overall'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.3620857242805136"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['overall'].std()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" overall | \n",
" verified | \n",
" reviewTime | \n",
" reviewerID | \n",
" asin | \n",
" style | \n",
" reviewerName | \n",
" reviewText | \n",
" summary | \n",
" unixReviewTime | \n",
" vote | \n",
" image | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" 5.0 | \n",
" False | \n",
" 10 16, 2010 | \n",
" ACJT8MUC0LRF0 | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" D. Fowler | \n",
" If you've been wanting to learn how to create ... | \n",
" This is excellent software for those who want ... | \n",
" 1287187200 | \n",
" 3 | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 5.0 | \n",
" False | \n",
" 10 12, 2010 | \n",
" AYUF7YETYOLNX | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" Bryan Newman | \n",
" I've been creating websites with Dreamweaver f... | \n",
" A Fantastic Overview of Dream Weaver and Web D... | \n",
" 1286841600 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" 5.0 | \n",
" False | \n",
" 10 7, 2010 | \n",
" A31ICLWQ9CSHRS | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" Al Swanson | \n",
" I decided (after trying a number of other prod... | \n",
" Excellent Tutorials! | \n",
" 1286409600 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 5.0 | \n",
" False | \n",
" 09 26, 2010 | \n",
" A2BVNVJOFXGZUB | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" J. Howard | \n",
" The video is well-paced and delivered in an un... | \n",
" Excellent. | \n",
" 1285459200 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 6 | \n",
" 5.0 | \n",
" False | \n",
" 04 7, 2011 | \n",
" A2JMJVNTBL7K7E | \n",
" 0321719816 | \n",
" {'Format:': ' DVD-ROM'} | \n",
" Yesuaini99 | \n",
" I spent several hours on the lesson and I love... | \n",
" excellent video training material | \n",
" 1302134400 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 12785 | \n",
" 5.0 | \n",
" True | \n",
" 04 6, 2018 | \n",
" A2ONJRZVX2MLVE | \n",
" B01617VO2S | \n",
" {'Platform:': ' PC Download'} | \n",
" Angela-Clare Pollard | \n",
" Easy to file your taxes correctly. | \n",
" Five Stars | \n",
" 1522972800 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12787 | \n",
" 5.0 | \n",
" True | \n",
" 04 6, 2018 | \n",
" A2ONJRZVX2MLVE | \n",
" B01637RHBI | \n",
" {'Platform:': ' PC Download'} | \n",
" Angela-Clare Pollard | \n",
" Easy to file your taxes correctly. | \n",
" Five Stars | \n",
" 1522972800 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12788 | \n",
" 5.0 | \n",
" True | \n",
" 06 20, 2018 | \n",
" A3RNXWG0J64Z9Z | \n",
" B0169RGE7U | \n",
" {'Platform:': ' PC Disc'} | \n",
" LJ | \n",
" works great, easy to capture videos and save | \n",
" Video Capture Software | \n",
" 1529452800 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12792 | \n",
" 5.0 | \n",
" True | \n",
" 03 1, 2018 | \n",
" A3PGN4ZXMQRSYH | \n",
" B01DEG0SGC | \n",
" NaN | \n",
" Amazon Customer | \n",
" Arrived as described. I was pleased with purc... | \n",
" I was pleased with | \n",
" 1519862400 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12799 | \n",
" 5.0 | \n",
" False | \n",
" 07 17, 2016 | \n",
" A5U5T6EWH90O0 | \n",
" B01FFVDY9M | \n",
" {'Platform:': ' Key Card'} | \n",
" Lauri | \n",
" I am a total amateur when it comes to editing,... | \n",
" Works great for my purposes! | \n",
" 1468713600 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
5972 rows × 12 columns
\n",
"
"
],
"text/plain": [
" overall verified reviewTime reviewerID asin \\\n",
"2 5.0 False 10 16, 2010 ACJT8MUC0LRF0 0321719816 \n",
"3 5.0 False 10 12, 2010 AYUF7YETYOLNX 0321719816 \n",
"4 5.0 False 10 7, 2010 A31ICLWQ9CSHRS 0321719816 \n",
"5 5.0 False 09 26, 2010 A2BVNVJOFXGZUB 0321719816 \n",
"6 5.0 False 04 7, 2011 A2JMJVNTBL7K7E 0321719816 \n",
"... ... ... ... ... ... \n",
"12785 5.0 True 04 6, 2018 A2ONJRZVX2MLVE B01617VO2S \n",
"12787 5.0 True 04 6, 2018 A2ONJRZVX2MLVE B01637RHBI \n",
"12788 5.0 True 06 20, 2018 A3RNXWG0J64Z9Z B0169RGE7U \n",
"12792 5.0 True 03 1, 2018 A3PGN4ZXMQRSYH B01DEG0SGC \n",
"12799 5.0 False 07 17, 2016 A5U5T6EWH90O0 B01FFVDY9M \n",
"\n",
" style reviewerName \\\n",
"2 {'Format:': ' DVD-ROM'} D. Fowler \n",
"3 {'Format:': ' DVD-ROM'} Bryan Newman \n",
"4 {'Format:': ' DVD-ROM'} Al Swanson \n",
"5 {'Format:': ' DVD-ROM'} J. Howard \n",
"6 {'Format:': ' DVD-ROM'} Yesuaini99 \n",
"... ... ... \n",
"12785 {'Platform:': ' PC Download'} Angela-Clare Pollard \n",
"12787 {'Platform:': ' PC Download'} Angela-Clare Pollard \n",
"12788 {'Platform:': ' PC Disc'} LJ \n",
"12792 NaN Amazon Customer \n",
"12799 {'Platform:': ' Key Card'} Lauri \n",
"\n",
" reviewText \\\n",
"2 If you've been wanting to learn how to create ... \n",
"3 I've been creating websites with Dreamweaver f... \n",
"4 I decided (after trying a number of other prod... \n",
"5 The video is well-paced and delivered in an un... \n",
"6 I spent several hours on the lesson and I love... \n",
"... ... \n",
"12785 Easy to file your taxes correctly. \n",
"12787 Easy to file your taxes correctly. \n",
"12788 works great, easy to capture videos and save \n",
"12792 Arrived as described. I was pleased with purc... \n",
"12799 I am a total amateur when it comes to editing,... \n",
"\n",
" summary unixReviewTime vote \\\n",
"2 This is excellent software for those who want ... 1287187200 3 \n",
"3 A Fantastic Overview of Dream Weaver and Web D... 1286841600 NaN \n",
"4 Excellent Tutorials! 1286409600 NaN \n",
"5 Excellent. 1285459200 NaN \n",
"6 excellent video training material 1302134400 NaN \n",
"... ... ... ... \n",
"12785 Five Stars 1522972800 NaN \n",
"12787 Five Stars 1522972800 NaN \n",
"12788 Video Capture Software 1529452800 NaN \n",
"12792 I was pleased with 1519862400 NaN \n",
"12799 Works great for my purposes! 1468713600 NaN \n",
"\n",
" image \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"5 NaN \n",
"6 NaN \n",
"... ... \n",
"12785 NaN \n",
"12787 NaN \n",
"12788 NaN \n",
"12792 NaN \n",
"12799 NaN \n",
"\n",
"[5972 rows x 12 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['overall'] == 5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Edit Metadata",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": false,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "339px",
"left": "1098px",
"top": "216.141px",
"width": "159px"
},
"toc_section_display": false,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}