Real crawling scripts and floorplan detection
1. get all listings 2. get all detail jsons 3. get all images 4. get all floorplans 5. detecting floorplans Also updating dependencies for huggingface etc.
This commit is contained in:
parent
46bb641026
commit
508aa02812
12 changed files with 1531 additions and 170 deletions
|
|
@ -24,7 +24,8 @@
|
|||
"source": [
|
||||
"from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration\n",
|
||||
"from PIL import Image\n",
|
||||
"import pandas as pd"
|
||||
"import pandas as pd\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -34,7 +35,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image = Image.open(\"floorplans/46001_32532509_FLP_00_0000.jpeg\")\n",
|
||||
"# image = Image.open(\"floorplans/46001_32532509_FLP_00_0000.jpeg\")\n",
|
||||
"image = Image.open(\"/Users/kadir/code/realestate/crawler/data/floorplans/15508_EnfieldRd_FLP_02_0000.jpg\")\n",
|
||||
"question = \"How many living rooms are displayed on this floor plan?\""
|
||||
]
|
||||
},
|
||||
|
|
@ -60,7 +62,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 4,
|
||||
"id": "ea990e1f-a660-4efb-be48-d095ab05b50d",
|
||||
"metadata": {
|
||||
"editable": true,
|
||||
|
|
@ -74,7 +76,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TITLE | <0x0A> Fifth Floor<0x0A>Floor Area 642 Sq Ft - 59.64 Sq M | LUX<0x0A>Floor Area 642 Sq Ft - 59.64 Sq M<0x0A>Reception Room<0x0A>1210×1210<0x0A>3.9×3.9m | 12.10×1210<0x0A>Reception Room<0x0A>3.9×3.9m | 12.10×1210<0x0A>Balcony<0x0A>1.5×4.5′ | 1.5×4.5 <0x0A> Bathroom<0x0A>131°×102°<0x0A>4.0×3.1m | 1.40×2.7m | 1.40×2.7m <0x0A> Kitchen<0x0A>102°×710°<0x0A>3.1×2.4m | 1.10×3.1m | 1.00×3.1m <0x0A> Bedroom<0x0A>131°×810<0x0A>4.0×2.7m | 1.00×3.1m | 1.00×3.1m <0x0A> Kitchen<0x0A>102°×710<0x0A>3.1×2.4m | 1.00×3.1m | 1.00×3.1m <0x0A> Reception Room<0x0A>1210×1210<0x0A>3.9×3.9m | 1.50×4.5 | 1.00×3.5m <0x0A> Balcony<0x0A>1.5×4.5 | 1.50×4.5 | 1.00×3.5m\n"
|
||||
"TITLE | Approx. Gross Internal Area *<0x0A>636 Ft * - 59.08 M *<0x0A>First Floor | Store<0x0A>Reception<0x0A>Room<0x0A>16'8\" x 129\"<0x0A>5.08 x 3.89m | 1<0x0A>Reception<0x0A>Room<0x0A>16'8\" x 129\"<0x0A>5.08 x 3.89m | 1<0x0A>Reception<0x0A>Room<0x0A>16'8\" x 129\"<0x0A>5.08 x 3.89m | 1<0x0A>Reception<0x0A>Room<0x0A>16'8\" x 129\"<0x0A>5.08 x 3.89m | 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -85,47 +87,52 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 6,
|
||||
"id": "b4a8b5fb-7cc5-441d-ad60-cff7c03a103e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TITLE | \n",
|
||||
" Fifth Floor\n",
|
||||
"Floor Area 642 Sq Ft - 59.64 Sq M | LUX\n",
|
||||
"Floor Area 642 Sq Ft - 59.64 Sq M\n",
|
||||
"Reception Room\n",
|
||||
"1210×1210\n",
|
||||
"3.9×3.9m | 12.10×1210\n",
|
||||
"Reception Room\n",
|
||||
"3.9×3.9m | 12.10×1210\n",
|
||||
"Balcony\n",
|
||||
"1.5×4.5′ | 1.5×4.5 \n",
|
||||
" Bathroom\n",
|
||||
"131°×102°\n",
|
||||
"4.0×3.1m | 1.40×2.7m | 1.40×2.7m \n",
|
||||
" Kitchen\n",
|
||||
"102°×710°\n",
|
||||
"3.1×2.4m | 1.10×3.1m | 1.00×3.1m \n",
|
||||
" Bedroom\n",
|
||||
"131°×810\n",
|
||||
"4.0×2.7m | 1.00×3.1m | 1.00×3.1m \n",
|
||||
" Kitchen\n",
|
||||
"102°×710\n",
|
||||
"3.1×2.4m | 1.00×3.1m | 1.00×3.1m \n",
|
||||
" Reception Room\n",
|
||||
"1210×1210\n",
|
||||
"3.9×3.9m | 1.50×4.5 | 1.00×3.5m \n",
|
||||
" Balcony\n",
|
||||
"1.5×4.5 | 1.50×4.5 | 1.00×3.5m\n"
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"59.08"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def extract_total_sqm(deplot_input_str):\n",
|
||||
" sqmregex = r'(\\d+\\.\\d*) ?(sqm|sq.m|sq m|m)'\n",
|
||||
" matches = re.findall(sqmregex, deplot_input_str.lower())\n",
|
||||
" sqms = [float(m[0]) for m in matches]\n",
|
||||
" # print(sqms)\n",
|
||||
" return max(sqms)\n",
|
||||
"\n",
|
||||
"extract_total_sqm(x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "06d50e06-fead-40be-bd9d-7e8207d8df4e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "'>' not supported between instances of 'int' and 'NoneType'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mmax\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[0;31mTypeError\u001b[0m: '>' not supported between instances of 'int' and 'NoneType'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = r'\\d+.\\d*'"
|
||||
"max([None,1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
33
vqa/floorplans/Untitled.ipynb
Normal file
33
vqa/floorplans/Untitled.ipynb
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "96658076-0295-4a52-9b82-2083198fbd57",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue