From 3f4be8b7fff57bc90acfe45408b6d9e662304937 Mon Sep 17 00:00:00 2001 From: Kadir Date: Mon, 12 May 2025 01:01:19 +0100 Subject: [PATCH] decisions + logger --- crawler/data/decisions.json | 2 +- crawler/exploration.ipynb | 406 ++++++++++++++++++------------------ crawler/logger.py | 12 ++ 3 files changed, 217 insertions(+), 203 deletions(-) create mode 100644 crawler/logger.py diff --git a/crawler/data/decisions.json b/crawler/data/decisions.json index a7e1e56..9264aac 100644 --- a/crawler/data/decisions.json +++ b/crawler/data/decisions.json @@ -1 +1 @@ -{"identifier":{"351000000":123629195.0,"1327000000":130941260.0,"3990000000":137017046.0,"6149000000":139277669.0,"6518000000":139590377.0,"6533000000":139612598.0,"7943000000":140724923.0,"8614000000":141072356.0,"10086000000":141881306.0,"10885000000":142207478.0,"11527000000":142497332.0,"12239000000":142789514.0,"20339000000":144509942.0,"25591000000":145209752.0,"29287000000":145680089.0,"29314000000":145681742.0,"29631000000":145712417.0,"30500000000":145801379.0,"30617000000":145815869.0,"31099000000":145870028.0,"35066000000":146250143.0,"37953000000":146544002.0,"38329000000":146570870.0,"42871000000":146985392.0,"44592000000":147163907.0,"44909000000":147195593.0,"48902000000":147585674.0,"49946000000":147710042.0,"50039000000":147723032.0,"52512000000":148011845.0,"52796000000":148033676.0,"58046000000":148526264.0,"60308000000":148884251.0,"60309000000":148884416.0,"61481000000":149130020.0,"61859000000":149240837.0,"62028000000":149291402.0,"62654000000":149405942.0,"63680000000":149587160.0,"63982000000":149631728.0,"65029000000":149811341.0,"66004000000":149970839.0,"67597000000":150253052.0,"68064000000":150318137.0,"68466000000":150380609.0,"68568000000":150393980.0,"68710000000":150412859.0,"68751000000":150421832.0,"69137000000":150489485.0,"72498000000":150913952.0,"74158000000":151085624.0,"74214000000":151091798.0,"74314000000":151101077.0,"75258000000":151207043.0,"75952000000":151519580.0,"75983000000":151534727.0,"76066000000":151562780.0,"77032000000":151937687.0,"77123000000":151965605.0,"77225000000":151992926.0,"77230000000":151993904.0,"77442000000":152048477.0,"77646000000":152100245.0,"77706000000":152125295.0,"78179000000":152261252.0,"78407000000":152314865.0,"78595000000":152365064.0,"78715000000":152396093.0,"78935000000":152489237.0,"78947000000":152493332.0,"78977000000":152497586.0,"80185000000":152884436.0,"80440000000":152948756.0,"80514000000":152952956.0,"81811000000":153011024.0,"82408000000":153195794.0,"82484000000":153226244.0,"82621000000":153272789.0,"82641000000":153281690.0,"82829000000":153342827.0,"83494000000":153541325.0,"83607000000":153567278.0,"83709000000":153593624.0,"83934000000":153647309.0,"83980000000":153659252.0,"84044000000":153681545.0,"84397000000":153806711.0,"84718000000":153885416.0,"84885000000":153931382.0,"86274000000":154331711.0,"86335000000":154344398.0,"86598000000":154422317.0,"87192000000":154597238.0,"87219000000":154605446.0,"87250000000":154612913.0,"87446000000":154665839.0,"87577000000":154695602.0,"87711000000":154725011.0,"87734000000":154728881.0,"87743000000":154730432.0,"88732000000":155006288.0,"88797000000":155022773.0,"89262000000":155154182.0,"89318000000":155170724.0,"89319000000":155170730.0,"89732000000":155268164.0,"91569000000":155902118.0,"97470000000":86598888.0,"99359000000":86914923.0,"99793000000":87036150.0,"168.0":118752221.0,"1116.0":129764555.0,"12266.0":142793525.0,"17432.0":144018401.0,"34626.0":146214332.0,"66162.0":150019004.0,"81936.0":153020663.0,"83641.0":153554591.0,"85525.0":154068566.0,"87419.0":154631246.0,"90071.0":155351495.0,"95738.0":156977627.0,"51028.0":147842672.0,"74112.0":151074929.0,"82593.0":153234077.0,"87246.0":154582079.0,"97168.0":157316006.0,"76146.0":151576967.0,"91330.0":155814131.0,"58084.0":148528400.0,"58507.0":148564979.0,"75774.0":151413677.0,"69228.0":150497033.0,"81663.0":152988521.0,"87396.0":154626839.0,"1207.0":130397936.0,"102352.0":86868255.0,"78361.0":152290028.0,"96793.0":157248374.0,"81681.0":152988767.0,"78028.0":152207876.0,"83041.0":153385421.0,"39386.0":146660870.0,"77578.0":152069327.0,"84701.0":153856181.0,"90014.0":155334455.0,"91725.0":155908628.0,"84111.0":153677939.0,"9976.0":141826334.0,"26740.0":145386389.0,"99724.0":157936007.0,"91456.0":155851703.0,"83716.0":153572045.0,"89988.0":155324624.0,"95344.0":156885272.0,"84174.0":153706538.0,"97915.0":157504865.0,"94239.0":156578249.0,"92197.0":156072476.0,"87774.0":154718651.0,"91271.0":155802566.0,"84257.0":153733907.0,"95414.0":156896918.0,"95561.0":156925268.0,"80437.0":152940305.0,"92504.0":156170672.0,"68112.0":150321023.0,"98710.0":157690190.0,"77595.0":152073473.0,"70465.0":150652604.0,"84830.0":153890633.0,"92027.0":156021026.0,"95632.0":156953489.0,"95008.0":156808571.0,"95031.0":156814538.0,"95728.0":156976181.0,"98090.0":157540418.0,"87845.0":154730567.0,"92674.0":156195326.0,"73304.0":151006262.0,"86093.0":154229807.0,"82196.0":153095771.0,"90439.0":155510237.0,"67507.0":150237866.0,"98129.0":157549670.0,"86829.0":154455989.0,"95361.0":156889457.0,"97655.0":157453736.0,"79116.0":152516225.0,"98232.0":157576313.0,"80377.0":152929475.0,"97651.0":157452611.0,"77853.0":152146979.0,"80376.0":152929442.0,"81709.0":152989007.0,"99818.0":157952573.0,"91888.0":155968343.0,"94113.0":156538847.0,"80003.0":152793491.0,"1299.0":130849793.0,"99527.0":157882673.0,"98495.0":157633430.0,"86471.0":154356437.0,"90167.0":155387021.0,"78364.0":152290523.0,"88222.0":154849886.0,"96294.0":157133192.0,"78503.0":152328149.0,"50727.0":147806654.0,"99960.0":157988078.0,"97808.0":157483067.0,"99858.0":157959716.0,"61537.0":149137697.0,"80571.0":152952782.0,"80395.0":152930249.0,"87347.0":154612952.0,"98964.0":157741886.0,"94528.0":156666197.0,"80391.0":152930138.0,"99434.0":157866818.0,"98612.0":157647914.0,"98819.0":157711766.0,"14591.0":143408849.0,"98990.0":157746227.0,"92808.0":156202805.0,"98654.0":157663343.0,"84640.0":153841115.0,"71694.0":150816677.0,"76567.0":151755572.0,"97548.0":157430714.0,"89039.0":155061086.0,"94109.0":156538406.0,"80394.0":152930234.0,"93963.0":156478166.0,"80386.0":152929910.0,"87472.0":154650440.0,"89165.0":155103575.0,"93945.0":156464552.0,"90064.0":155349557.0,"80747.0":152961734.0,"91637.0":155877203.0,"87344.0":154612229.0,"94908.0":156788207.0},"decision":{"351000000":"n, 82 years","1327000000":"n","3990000000":"n","6149000000":"n, zu klein fuer den preis","6518000000":"y, shortlist","6533000000":"n, 52","7943000000":"maybe","8614000000":"n","10086000000":"n, SC","10885000000":"n, small","11527000000":"n, nur 91 jahre lease","12239000000":"n, lease expiring","20339000000":"n, bungalow","25591000000":"n, schmal","29287000000":"y, maybe","29314000000":"n","29631000000":"maybe","30500000000":"maybe","30617000000":"y, maybe. Ground floor though","31099000000":"n, alt","35066000000":"y, love it","37953000000":"n, schlecht erhalten","38329000000":"n, kein licht","42871000000":"melden","44592000000":"n, small, expensive, far","44909000000":"n","48902000000":"y, weit aber maybe? Freehold!","49946000000":"y, maybe","50039000000":"n, zu klein","52512000000":"n, too far from station","52796000000":"n","58046000000":"n, too narrow","60308000000":"n, zu viel laufen","60309000000":"n","61481000000":"y, maybe","61859000000":"n, am arsch der welt","62028000000":"n, removed","62654000000":"n","63680000000":"n, 43 years","63982000000":"n","65029000000":"n, ground floor","66004000000":"y","67597000000":"n, removed","68064000000":"maybe, altes listing","68466000000":"revisit","68568000000":"n, teuer + klein","68710000000":"n","68751000000":"n, boat","69137000000":"n, nicht genug licht fuer den preis","72498000000":"n, klein","74158000000":"y, aber zu teuer","74214000000":"n, ground floor","74314000000":"n, zu teuer","75258000000":"y aber billiger","75952000000":"n, zu teuer","75983000000":"n, lease fuer den preis?","76066000000":"n","77032000000":"n","77123000000":"y","77225000000":"y","77230000000":"n, zu weit zu teuer","77442000000":"n, ground floor","77646000000":"n, schoen aber wohnzimmer zu klein","77706000000":"n","78179000000":"n","78407000000":"y, maybe, dup of 152365064","78595000000":"y, dup 152314865","78715000000":"n","78935000000":"n, ground floor","78947000000":"gone","78977000000":"gone","80185000000":"j, vll","80440000000":"n","80514000000":"n, zu teuer","81811000000":"n","82408000000":"removed","82484000000":"n, klein","82621000000":"y","82641000000":"n,","82829000000":"n","83494000000":"n, klein + high SC","83607000000":"n","83709000000":"n","83934000000":"n, no lease + under offer","83980000000":"n, sueden","84044000000":"n, small","84397000000":"n, erdgeschoss","84718000000":"maybe","84885000000":"n","86274000000":"n, schrott","86335000000":"n, too much work","86598000000":"y","87192000000":"n","87219000000":"n, 36sqm","87250000000":"n","87446000000":"n, 46sqm","87577000000":"n, zu duenn","87711000000":"n, 50sqm","87734000000":"n, zu duenn","87743000000":"n, under offer","88732000000":"gone","88797000000":"revisit","89262000000":"gone","89318000000":"n, ground floor","89319000000":"y, maybe","89732000000":"n, gone","91569000000":"n","97470000000":"n","99359000000":"n, weit, schlecht erhalten","99793000000":"maybe","168.0":"n, teuer","1116.0":"n, nur land","12266.0":"n, ugly","17432.0":"n","34626.0":"maybe","66162.0":"n, refurb needed","81936.0":"n, klein","83641.0":"n, small","85525.0":"n, small","87419.0":"n","90071.0":"n, wrong qm","95738.0":"n","51028.0":"n","74112.0":"n, arbeit noetig","82593.0":"n, erdgeschoss","87246.0":"maybe","97168.0":"n, zuviel arbeit","76146.0":"n, ground floor","91330.0":"j, aber under offer","58084.0":"n, schoen aber bad design","58507.0":"n, schmal","75774.0":"n, zu teuer","69228.0":"n, far","81663.0":"maybe, anschauen","87396.0":"n, 1br","1207.0":"n, nicht genug licht","102352.0":"n","78361.0":"y, same as 146970764","96793.0":"n","81681.0":"n, station far","78028.0":"y","83041.0":"y, maybe","39386.0":"y, maybe","77578.0":"n","84701.0":"n","90014.0":"maybe? irgendwas ist kaputt hier","91725.0":"n","84111.0":"n","9976.0":"n, SC","26740.0":"later","99724.0":"n, am stadium","91456.0":"n","83716.0":"n, too old style","89988.0":"later","95344.0":"y, but sold stc","84174.0":"y","97915.0":"n, sueden","94239.0":"dup","92197.0":"y, but under offer","87774.0":"y, maybe, long walk","91271.0":"n, ground floor","84257.0":"n, ground floor","95414.0":"y","95561.0":"m, gut aber zu teuer","80437.0":"dup","92504.0":"n, 5800 SC","68112.0":"n, meh","98710.0":"n, south","77595.0":"n, schrecklich","70465.0":"n, zu teuer","84830.0":"n, ground floor","92027.0":"y, aber zu teuer eigentlich","95632.0":"m, SC anfragen","95008.0":"n, plus under offer anyway","95031.0":"n, plus under offer anyway + dup","95728.0":"n, uberzeugt mich irgendwie nicht","98090.0":"y, 8min laufweg","87845.0":"y, aber nur fuer 730, und was ist die leasehold kost?","92674.0":"n, under offer + zu weit von next transport","73304.0":"n, sc hoch","86093.0":"n, SC 8000","82196.0":"n","90439.0":"n, eher nicht","67507.0":"n, SC","98129.0":"n, zu weit","86829.0":"later","95361.0":"n, schlecht geschnitten, zu teuer","97655.0":"later","79116.0":"n, SC","98232.0":"n","80377.0":"vll, lease left zu kurz","97651.0":"y, maybe","77853.0":"n, sueden","80376.0":"n, 1b","81709.0":"y, strong","99818.0":"y, maybe, expensive, a bit far out but really nice","91888.0":"n, ground floor","94113.0":"y, maybe","80003.0":"n, nice aber wohnzimmer","1299.0":"j","99527.0":"n, zu teuer fuer suden","98495.0":"n, falsches OCR","86471.0":"n, SC vermutlich zu hoch","90167.0":"n, dup","78364.0":"y, maybe","88222.0":"n, weit weg","96294.0":"n, kleines wohnzimmer","78503.0":"maybe","50727.0":"y, aber billiger?","99960.0":"n, sueden","97808.0":"n, kein licht","99858.0":"n, gibt bessere fuer den preis","61537.0":"y, again same apartment","80571.0":"n, south + too far from subway","80395.0":"n, SC","87347.0":"n, dunkel fuer den preis","98964.0":"n, gibt bessere","94528.0":"n, irgendwie nicht so","80391.0":"n","99434.0":"n, zuviel SC und zuwenig licht fuer den preis","98612.0":"y, maybe, second look","98819.0":"n, gibt bessere","14591.0":"zu klein, der floorplan lueft","98990.0":"n, schlechtes licht","92808.0":"n, 1 br","98654.0":"n, 1 br fuer den preis zu teuer","84640.0":"n, niemals","71694.0":"n, too far south","76567.0":"y","97548.0":"n, wtf","89039.0":"y, maybe","94109.0":"n, kein licht","80394.0":"n, SC","93963.0":"n, SC 5000","80386.0":"n, SC","87472.0":"n, direkt an zug und stadium","89165.0":"n, SC 9600","93945.0":"y","90064.0":"n","80747.0":"n, small living room","91637.0":"maybe","87344.0":"n, zu teuer 770k","94908.0":"n, nicht in london"}} \ No newline at end of file +{"identifier":{"351000000":123629195,"1327000064":130941260,"3990000128":137017046,"6149000192":139277669,"6518000128":139590377,"7943000064":140724923,"8613999616":141072356,"10085999616":141881306,"11527000064":142497332,"12238999552":142789514,"20338999296":144509942,"25590999040":145209752,"29287000064":145680089,"29631000576":145712417,"30616999936":145815869,"31099000832":145870028,"35065999360":146250143,"37952999424":146544002,"38328999936":146570870,"42871001088":146985392,"44592001024":147163907,"44908998656":147195593,"48902000640":147585674,"49946001408":147710042,"50039001088":147723032,"52511997952":148011845,"52795998208":148033676,"58046001152":148526264,"60308000768":148884251,"60309000192":148884416,"61481000960":149130020,"61859000320":149240837,"62028001280":149291402,"62654001152":149405942,"63680000000":149587160,"63981998080":149631728,"65029001216":149811341,"66004000768":149970839,"67597000704":150253052,"68064002048":150318137,"68465999872":150380609,"68709998592":150412859,"69136998400":150489485,"72498003968":150913952,"74157998080":151085624,"74213998592":151091798,"74313998336":151101077,"75951996928":151519580,"75983003648":151534727,"76065996800":151562780,"77031997440":151937687,"77123002368":151965605,"77225000960":151992926,"77229998080":151993904,"77441998848":152048477,"77645996032":152100245,"77706002432":152125295,"78179000320":152261252,"78407000064":152314865,"78594998272":152365064,"78715002880":152396093,"78934999040":152489237,"78947000320":152493332,"78976999424":152497586,"80184999936":152884436,"80440000512":152948756,"80513998848":152952956,"81810997248":153011024,"82407997440":153195794,"82484002816":153226244,"82620997632":153272789,"82641002496":153281690,"82829000704":153342827,"83494002688":153541325,"83607003136":153567278,"83709001728":153593624,"83934003200":153647309,"83980001280":153659252,"84043997184":153681545,"84396998656":153806711,"84718002176":153885416,"84884996096":153931382,"86273998848":154331711,"86334996480":154344398,"86598000640":154422317,"87192002560":154597238,"87219003392":154605446,"87250001920":154612913,"87446003712":154665839,"87577001984":154695602,"87710998528":154725011,"87734001664":154728881,"87742996480":154730432,"88732000256":155006288,"88797003776":155022773,"89261998080":155154182,"89731997696":155268164,"91568996352":155902118,"99358998528":86914923,"99793002496":87036150,"90071":155351495,"95738":156977627,"51028":147842672,"74112":151074929,"82593":153234077,"97168":157316006,"76146":151576967,"91330":155814131,"75774":151413677,"69228":150497033,"87396":154626839,"102352":86868255,"78028":152207876,"39386":146660870,"84701":153856181,"90014":155334455,"84111":153677939,"9976":141826334,"83716":153572045,"95344":156885272,"97915":157504865,"92197":156072476,"87774":154718651,"68112":150321023,"98710":157690190,"77595":152073473,"70465":150652604,"84830":153890633,"95632":156953489,"95008":156808571,"95031":156814538,"95728":156976181,"98090":157540418,"87845":154730567,"92674":156195326,"73304":151006262,"82196":153095771,"90439":155510237,"67507":150237866,"86829":154455989,"95361":156889457,"97655":157453736,"79116":152516225,"97651":157452611,"77853":152146979,"80376":152929442,"81709":152989007,"99818":157952573,"80003":152793491,"99527":157882673,"98495":157633430,"78364":152290523,"96294":157133192,"99960":157988078,"97808":157483067,"61537":149137697,"80571":152952782,"87347":154612952,"98964":157741886,"80391":152930138,"99434":157866818,"14591":143408849,"98990":157746227,"71694":150816677,"76567":151755572,"97548":157430714,"89039":155061086,"87472":154650440,"93945":156464552,"80747":152961734,"94908":156788207,"2959":150421832,"850":142793525,"1446":146214332,"5491":153020663,"35":118752221,"7585":154631246,"103":129764555,"107":130397936,"115":130849793,"321":139612598,"425":142207478,"516":144018401,"619":145159991,"644":145386389,"663":145681742,"681":145801379,"721":146205650,"888":147806654,"978":148528400,"988":148564979,"1097":149239397,"1237":150019004,"1316":150393980,"1490":151207043,"1706":152059181,"1718":152069327,"1794":152290028,"1804":152328149,"1875":152600453,"1963":152929475,"1967":152929910,"1969":152930234,"1970":152930249,"1986":152940305,"2193":152988521,"2195":152988767,"2358":153385421,"2378":153437861,"2441":153554591,"2495":153706538,"2509":153733907,"2548":153841115,"2653":154068566,"2733":154229807,"2770":154356437,"2876":154582079,"2889":154612229,"2991":154849886,"3117":155103575,"3157":155170724,"3158":155170730,"3173":155208854,"3223":155324624,"3233":155337818,"3243":155349557,"3265":155387021,"3379":155604431,"3432":155724947,"3473":155802566,"3517":155851703,"3553":155877203,"3573":155908628,"3617":155968343,"3646":156021026,"3751":156170672,"3825":156202805,"3935":156382646,"3941":156388613,"3969":156420899,"4007":156478166,"4047":156538406,"4048":156538847,"4077":156578249,"4149":156666197,"4201":156712517,"4360":156896918,"4387":156925268,"4496":157040054,"4674":157248374,"4976":157549667,"4977":157549670,"5000":157576313,"5018":157587788,"5099":157647914,"5104":157663343,"5108":157677716,"5158":157711571,"5159":157711766,"5324":157859840,"5410":157936007,"5435":157959716,"5646":158181218,"5719":158237948,"6238":158648693,"6256":158667422,"6362":158736311,"6499":158842319,"6574":158884778,"6967":159131294,"7371":159403016,"7438":159438686,"7515":159508601,"7659":159583694,"7876":159695318,"7996":159770402,"8042":159798212,"8080":159822821,"8132":159840542,"8140":159840629,"8323":159932615,"8392":159967094,"8666":160181372,"8768":160253327,"9496":160805444,"9870":161086217,"9929":161119049,"10065":86598888,"10204":87115215,"10226":87125088},"decision":{"351000000":"n, 82 years","1327000064":"n","3990000128":"n","6149000192":"n, zu klein fuer den preis","6518000128":"y, shortlist","7943000064":"maybe","8613999616":"n","10085999616":"n, SC","11527000064":"n, nur 91 jahre lease","12238999552":"n, lease expiring","20338999296":"n, bungalow","25590999040":"n, schmal","29287000064":"y, maybe","29631000576":"maybe","30616999936":"y, maybe. Ground floor though","31099000832":"n, alt","35065999360":"y, love it","37952999424":"n, schlecht erhalten","38328999936":"n, kein licht","42871001088":"melden","44592001024":"n, small, expensive, far","44908998656":"n","48902000640":"y, weit aber maybe? Freehold!","49946001408":"y, maybe","50039001088":"n, zu klein","52511997952":"n, too far from station","52795998208":"n","58046001152":"n, too narrow","60308000768":"n, zu viel laufen","60309000192":"n","61481000960":"y, maybe","61859000320":"n, am arsch der welt","62028001280":"n, removed","62654001152":"n","63680000000":"n, 43 years","63981998080":"n","65029001216":"n, ground floor","66004000768":"y","67597000704":"n, removed","68064002048":"maybe, altes listing","68465999872":"revisit","68709998592":"n","69136998400":"n, nicht genug licht fuer den preis","72498003968":"n, klein","74157998080":"y, aber zu teuer","74213998592":"n, ground floor","74313998336":"n, zu teuer","75951996928":"n, zu teuer","75983003648":"n, lease fuer den preis?","76065996800":"n","77031997440":"n","77123002368":"y","77225000960":"y","77229998080":"n, zu weit zu teuer","77441998848":"n, ground floor","77645996032":"n, schoen aber wohnzimmer zu klein","77706002432":"n","78179000320":"n","78407000064":"y, maybe, dup of 152365064","78594998272":"y, dup 152314865","78715002880":"n","78934999040":"n, ground floor","78947000320":"gone","78976999424":"gone","80184999936":"j, vll","80440000512":"n","80513998848":"n, zu teuer","81810997248":"n","82407997440":"removed","82484002816":"n, klein","82620997632":"y","82641002496":"n,","82829000704":"n","83494002688":"n, klein + high SC","83607003136":"n","83709001728":"n","83934003200":"n, no lease + under offer","83980001280":"n, sueden","84043997184":"n, small","84396998656":"n, erdgeschoss","84718002176":"maybe","84884996096":"n","86273998848":"n, schrott","86334996480":"n, too much work","86598000640":"y","87192002560":"n","87219003392":"n, 36sqm","87250001920":"n","87446003712":"n, 46sqm","87577001984":"n, zu duenn","87710998528":"n, 50sqm","87734001664":"n, zu duenn","87742996480":"n, under offer","88732000256":"gone","88797003776":"revisit","89261998080":"gone","89731997696":"n, gone","91568996352":"n","99358998528":"n, weit, schlecht erhalten","99793002496":"maybe","90071":"n, wrong qm","95738":"n","51028":"n","74112":"n, arbeit noetig","82593":"n, erdgeschoss","97168":"n, zuviel arbeit","76146":"n, ground floor","91330":"j, aber under offer","75774":"n, zu teuer","69228":"n, far","87396":"n, 1br","102352":"n","78028":"y","39386":"y, maybe","84701":"n","90014":"maybe? irgendwas ist kaputt hier","84111":"n","9976":"n, SC","83716":"n, too old style","95344":"y, but sold stc","97915":"n, sueden","92197":"y, but under offer","87774":"y, maybe, long walk","68112":"n, meh","98710":"n, south","77595":"n, schrecklich","70465":"n, zu teuer","84830":"n, ground floor","95632":"m, SC anfragen","95008":"n, plus under offer anyway","95031":"n, plus under offer anyway + dup","95728":"n, uberzeugt mich irgendwie nicht","98090":"y, 8min laufweg","87845":"y, aber nur fuer 730, und was ist die leasehold kost?","92674":"n, under offer + zu weit von next transport","73304":"n, sc hoch","82196":"n","90439":"n, eher nicht","67507":"n, SC","86829":"later","95361":"n, schlecht geschnitten, zu teuer","97655":"later","79116":"n, SC","97651":"y, maybe","77853":"n, sueden","80376":"n, 1b","81709":"y, strong","99818":"y, maybe, expensive, a bit far out but really nice","80003":"n, nice aber wohnzimmer","99527":"n, zu teuer fuer suden","98495":"n, falsches OCR","78364":"y, maybe","96294":"n, kleines wohnzimmer","99960":"n, sueden","97808":"n, kein licht","61537":"y, again same apartment","80571":"n, south + too far from subway","87347":"n, dunkel fuer den preis","98964":"n, gibt bessere","80391":"n","99434":"n, zuviel SC und zuwenig licht fuer den preis","14591":"zu klein, der floorplan lueft","98990":"n, schlechtes licht","71694":"n, too far south","76567":"y","97548":"n, wtf","89039":"y, maybe","87472":"n, direkt an zug und stadium","93945":"y","80747":"n, small living room","94908":"n, nicht in london","2959":"n, boat","850":"n, ugly","1446":"maybe","5491":"n, klein","35":"n, teuer","7585":"n","103":"n, nur land","107":"n, nicht genug licht","115":"j","321":"n, 52","425":"n, small","516":"n","619":"n","644":"later","663":"n","681":"maybe","721":"y, shortlist","888":"y, aber billiger?","978":"y, maybe","988":"n, schmal","1097":"n","1237":"n, refurb needed","1316":"n, teuer + klein","1490":"y aber billiger","1706":"n","1718":"n","1794":"y, same as 146970764","1804":"maybe","1875":"removed","1963":"vll, lease left zu kurz","1967":"n, SC","1969":"n, SC","1970":"n, SC","1986":"dup","2193":"maybe, anschauen","2195":"n, station far","2358":"y, maybe","2378":"n, 61 jahre left","2441":"n, small","2495":"y","2509":"n, ground floor","2548":"n, niemals","2653":"n, small","2733":"n, SC 8000","2770":"n, SC vermutlich zu hoch","2876":"maybe","2889":"n, zu teuer 770k","2991":"n, weit weg","3117":"n, SC 9600","3157":"n, ground floor","3158":"y, maybe","3173":"y","3223":"later","3233":"n","3243":"n","3265":"n, dup","3379":"n","3432":"n","3473":"n, ground floor","3517":"n","3553":"maybe","3573":"n","3617":"n, ground floor","3646":"y, aber zu teuer eigentlich","3751":"n, 5800 SC","3825":"n, 1 br","3935":"vll","3941":"n","3969":"n, ground floor","4007":"n, SC 5000","4047":"n, kein licht","4048":"y, maybe","4077":"dup","4149":"n, irgendwie nicht so","4201":"y","4360":"y","4387":"m, gut aber zu teuer","4496":"maybe","4674":"n","4976":"vll","4977":"n, zu weit","5000":"n","5018":"removed","5099":"y, maybe, second look","5104":"n, 1 br fuer den preis zu teuer","5108":"yes, yes yes","5158":"n","5159":"n, gibt bessere","5324":"y","5410":"n, am stadium","5435":"n, gibt bessere fuer den preis","5646":"n, feels shitty","5719":"n","6238":"n","6256":"n, qm falsch","6362":"n, gleich wand da","6499":"n","6574":"n","6967":"n","7371":"n, ground floor","7438":"n","7515":"n, kein licht","7659":"y","7876":"maybe","7996":"n","8042":"n","8080":"n ,removed","8132":"n, ground floor","8140":"n, sold stc","8323":"n","8392":"n, sth wrong","8666":"y","8768":"y","9496":"n","9870":"n","9929":"y","10065":"n","10204":"n, STC","10226":"n"}} \ No newline at end of file diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb index 9246ebe..93e7751 100644 --- a/crawler/exploration.ipynb +++ b/crawler/exploration.ipynb @@ -33,18 +33,7 @@ "execution_count": 2, "id": "424501ab-ecc6-42f5-b87e-b0d2871bdc74", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n", - " decisions = pd.read_json(decisions_path)\n", - "/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n", - " decisions = pd.read_json(decisions_path)\n" - ] - } - ], + "outputs": [], "source": [ "# read decisions on file\n", "decisions_path = 'data/decisions.json'\n", @@ -147,7 +136,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "18508\n" + "10574\n" ] } ], @@ -220,25 +209,6 @@ " \n", " \n", " 0\n", - " 101369066\n", - " NaN\n", - " 875000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/101369066\n", - " 3\n", - " {'duration': 2252, 'distance': 7140, 'duration...\n", - " {'duration': 2465, 'distance': 7502, 'duration...\n", - " 0.0\n", - " NaN\n", - " False\n", - " Share of Freehold\n", - " 12\n", - " None\n", - " 0\n", - " None\n", - " \n", - " \n", - " 1\n", " 105484772\n", " 45.7\n", " 325000.0\n", @@ -251,13 +221,13 @@ " 641.53\n", " False\n", " Leasehold\n", - " 36\n", + " 116\n", " None\n", " 0\n", " None\n", " \n", " \n", - " 2\n", + " 1\n", " 105827126\n", " 58.5\n", " 950000.0\n", @@ -270,32 +240,13 @@ " NaN\n", " True\n", " Leasehold\n", - " 2\n", + " 83\n", " None\n", " 0\n", " None\n", " \n", " \n", - " 3\n", - " 105836849\n", - " NaN\n", - " 400000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/105836849\n", - " 3\n", - " {'duration': 2565, 'distance': 14070, 'duratio...\n", - " {'duration': 2565, 'distance': 14070, 'duratio...\n", - " NaN\n", - " NaN\n", - " False\n", - " Leasehold\n", - " 393\n", - " None\n", - " 20\n", - " None\n", - " \n", - " \n", - " 4\n", + " 2\n", " 108102476\n", " 53.7\n", " 515000.0\n", @@ -308,7 +259,45 @@ " NaN\n", " False\n", " Leasehold\n", - " 16\n", + " 97\n", + " None\n", + " 0\n", + " None\n", + " \n", + " \n", + " 3\n", + " 108171770\n", + " 45.0\n", + " 650000.0\n", + " 14444.444444\n", + " https://www.rightmove.co.uk/properties/108171770\n", + " 2\n", + " {'duration': 1591, 'distance': 7827, 'duration...\n", + " {'duration': 1591, 'distance': 7827, 'duration...\n", + " 962.0\n", + " 2000.00\n", + " False\n", + " Leasehold\n", + " 261\n", + " None\n", + " 0\n", + " None\n", + " \n", + " \n", + " 4\n", + " 109595123\n", + " NaN\n", + " 1000000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/109595123\n", + " 1\n", + " {'duration': 2463, 'distance': 9565, 'duration...\n", + " {'duration': 2463, 'distance': 9565, 'duration...\n", + " NaN\n", + " NaN\n", + " True\n", + " Please confirm if this is a freehold or leaseh...\n", + " 96\n", " None\n", " 0\n", " None\n", @@ -333,7 +322,45 @@ " ...\n", " \n", " \n", - " 18503\n", + " 10569\n", + " 88731877\n", + " NaN\n", + " 570000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/88731877\n", + " 1\n", + " {'duration': 912, 'distance': 6329, 'duration_...\n", + " {'duration': 852, 'distance': 6329, 'duration_...\n", + " 998.0\n", + " NaN\n", + " False\n", + " Leasehold\n", + " 407\n", + " None\n", + " 0\n", + " None\n", + " \n", + " \n", + " 10570\n", + " 89825950\n", + " 48.9\n", + " 680000.0\n", + " 13905.930470\n", + " https://www.rightmove.co.uk/properties/89825950\n", + " 1\n", + " {'duration': 273, 'distance': 762, 'duration_s...\n", + " {'duration': 273, 'distance': 762, 'duration_s...\n", + " 112.0\n", + " 1700.00\n", + " False\n", + " Leasehold\n", + " 113\n", + " None\n", + " 0\n", + " None\n", + " \n", + " \n", + " 10571\n", " 94206080\n", " 49.6\n", " 899000.0\n", @@ -346,32 +373,13 @@ " NaN\n", " True\n", " Leasehold\n", - " 256\n", + " 337\n", " None\n", " 0\n", " None\n", " \n", " \n", - " 18504\n", - " 94206329\n", - " NaN\n", - " 700000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/94206329\n", - " 1\n", - " {'duration': 2172, 'distance': 12497, 'duratio...\n", - " {'duration': 2112, 'distance': 12497, 'duratio...\n", - " NaN\n", - " NaN\n", - " False\n", - " Leasehold\n", - " 256\n", - " None\n", - " 20\n", - " None\n", - " \n", - " \n", - " 18505\n", + " 10572\n", " 94508306\n", " 94.0\n", " 1000000.0\n", @@ -384,13 +392,13 @@ " NaN\n", " False\n", " Leasehold\n", - " 149\n", + " 230\n", " None\n", - " 0\n", + " 9\n", " None\n", " \n", " \n", - " 18506\n", + " 10573\n", " 95975483\n", " NaN\n", " 800000.0\n", @@ -403,115 +411,109 @@ " 0.00\n", " False\n", " Leasehold\n", - " 3\n", + " 84\n", " None\n", " 0\n", " None\n", " \n", - " \n", - " 18507\n", - " 96773996\n", - " 70.8\n", - " 1000000.0\n", - " 14124.293785\n", - " https://www.rightmove.co.uk/properties/96773996\n", - " 2\n", - " {'duration': 1608, 'distance': 8301, 'duration...\n", - " {'duration': 1608, 'distance': 8301, 'duration...\n", - " 992.0\n", - " 4716.36\n", - " True\n", - " Leasehold\n", - " 227\n", - " None\n", - " 20\n", - " None\n", - " \n", " \n", "\n", - "

18508 rows × 16 columns

\n", + "

10574 rows × 16 columns

\n", "" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", - "0 101369066 NaN 875000.0 NaN \n", - "1 105484772 45.7 325000.0 7111.597374 \n", - "2 105827126 58.5 950000.0 16239.316239 \n", - "3 105836849 NaN 400000.0 NaN \n", - "4 108102476 53.7 515000.0 9590.316574 \n", + "0 105484772 45.7 325000.0 7111.597374 \n", + "1 105827126 58.5 950000.0 16239.316239 \n", + "2 108102476 53.7 515000.0 9590.316574 \n", + "3 108171770 45.0 650000.0 14444.444444 \n", + "4 109595123 NaN 1000000.0 NaN \n", "... ... ... ... ... \n", - "18503 94206080 49.6 899000.0 18125.000000 \n", - "18504 94206329 NaN 700000.0 NaN \n", - "18505 94508306 94.0 1000000.0 10638.297872 \n", - "18506 95975483 NaN 800000.0 NaN \n", - "18507 96773996 70.8 1000000.0 14124.293785 \n", + "10569 88731877 NaN 570000.0 NaN \n", + "10570 89825950 48.9 680000.0 13905.930470 \n", + "10571 94206080 49.6 899000.0 18125.000000 \n", + "10572 94508306 94.0 1000000.0 10638.297872 \n", + "10573 95975483 NaN 800000.0 NaN \n", "\n", " url bedrooms \\\n", - "0 https://www.rightmove.co.uk/properties/101369066 3 \n", - "1 https://www.rightmove.co.uk/properties/105484772 1 \n", - "2 https://www.rightmove.co.uk/properties/105827126 1 \n", - "3 https://www.rightmove.co.uk/properties/105836849 3 \n", - "4 https://www.rightmove.co.uk/properties/108102476 1 \n", + "0 https://www.rightmove.co.uk/properties/105484772 1 \n", + "1 https://www.rightmove.co.uk/properties/105827126 1 \n", + "2 https://www.rightmove.co.uk/properties/108102476 1 \n", + "3 https://www.rightmove.co.uk/properties/108171770 2 \n", + "4 https://www.rightmove.co.uk/properties/109595123 1 \n", "... ... ... \n", - "18503 https://www.rightmove.co.uk/properties/94206080 1 \n", - "18504 https://www.rightmove.co.uk/properties/94206329 1 \n", - "18505 https://www.rightmove.co.uk/properties/94508306 2 \n", - "18506 https://www.rightmove.co.uk/properties/95975483 2 \n", - "18507 https://www.rightmove.co.uk/properties/96773996 2 \n", + "10569 https://www.rightmove.co.uk/properties/88731877 1 \n", + "10570 https://www.rightmove.co.uk/properties/89825950 1 \n", + "10571 https://www.rightmove.co.uk/properties/94206080 1 \n", + "10572 https://www.rightmove.co.uk/properties/94508306 2 \n", + "10573 https://www.rightmove.co.uk/properties/95975483 2 \n", "\n", " travel_time_fastest \\\n", - "0 {'duration': 2252, 'distance': 7140, 'duration... \n", - "1 {'duration': 1983, 'distance': 10095, 'duratio... \n", - "2 {'duration': 2478, 'distance': 9584, 'duration... \n", - "3 {'duration': 2565, 'distance': 14070, 'duratio... \n", - "4 {'duration': 1266, 'distance': 4042, 'duration... \n", + "0 {'duration': 1983, 'distance': 10095, 'duratio... \n", + "1 {'duration': 2478, 'distance': 9584, 'duration... \n", + "2 {'duration': 1266, 'distance': 4042, 'duration... \n", + "3 {'duration': 1591, 'distance': 7827, 'duration... \n", + "4 {'duration': 2463, 'distance': 9565, 'duration... \n", "... ... \n", - "18503 {'duration': 1125, 'distance': 4637, 'duration... \n", - "18504 {'duration': 2172, 'distance': 12497, 'duratio... \n", - "18505 {'duration': 1046, 'distance': 2193, 'duration... \n", - "18506 {'duration': 2281, 'distance': 7262, 'duration... \n", - "18507 {'duration': 1608, 'distance': 8301, 'duration... \n", + "10569 {'duration': 912, 'distance': 6329, 'duration_... \n", + "10570 {'duration': 273, 'distance': 762, 'duration_s... \n", + "10571 {'duration': 1125, 'distance': 4637, 'duration... \n", + "10572 {'duration': 1046, 'distance': 2193, 'duration... \n", + "10573 {'duration': 2281, 'distance': 7262, 'duration... \n", "\n", " travel_time_second lease_left \\\n", - "0 {'duration': 2465, 'distance': 7502, 'duration... 0.0 \n", - "1 {'duration': 2043, 'distance': 10083, 'duratio... 104.0 \n", - "2 {'duration': 2478, 'distance': 9584, 'duration... NaN \n", - "3 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", - "4 {'duration': 1861, 'distance': 4548, 'duration... 104.0 \n", + "0 {'duration': 2043, 'distance': 10083, 'duratio... 104.0 \n", + "1 {'duration': 2478, 'distance': 9584, 'duration... NaN \n", + "2 {'duration': 1861, 'distance': 4548, 'duration... 104.0 \n", + "3 {'duration': 1591, 'distance': 7827, 'duration... 962.0 \n", + "4 {'duration': 2463, 'distance': 9565, 'duration... NaN \n", "... ... ... \n", - "18503 {'duration': 1125, 'distance': 4641, 'duration... NaN \n", - "18504 {'duration': 2112, 'distance': 12497, 'duratio... NaN \n", - "18505 {'duration': 1046, 'distance': 2193, 'duration... 977.0 \n", - "18506 {'duration': 2815, 'distance': 5607, 'duration... 999.0 \n", - "18507 {'duration': 1608, 'distance': 8301, 'duration... 992.0 \n", + "10569 {'duration': 852, 'distance': 6329, 'duration_... 998.0 \n", + "10570 {'duration': 273, 'distance': 762, 'duration_s... 112.0 \n", + "10571 {'duration': 1125, 'distance': 4641, 'duration... NaN \n", + "10572 {'duration': 1046, 'distance': 2193, 'duration... 977.0 \n", + "10573 {'duration': 2815, 'distance': 5607, 'duration... 999.0 \n", "\n", - " service_charge development tenure_type updated_days status \\\n", - "0 NaN False Share of Freehold 12 None \n", - "1 641.53 False Leasehold 36 None \n", - "2 NaN True Leasehold 2 None \n", - "3 NaN False Leasehold 393 None \n", - "4 NaN False Leasehold 16 None \n", - "... ... ... ... ... ... \n", - "18503 NaN True Leasehold 256 None \n", - "18504 NaN False Leasehold 256 None \n", - "18505 NaN False Leasehold 149 None \n", - "18506 0.00 False Leasehold 3 None \n", - "18507 4716.36 True Leasehold 227 None \n", + " service_charge development \\\n", + "0 641.53 False \n", + "1 NaN True \n", + "2 NaN False \n", + "3 2000.00 False \n", + "4 NaN True \n", + "... ... ... \n", + "10569 NaN False \n", + "10570 1700.00 False \n", + "10571 NaN True \n", + "10572 NaN False \n", + "10573 0.00 False \n", + "\n", + " tenure_type updated_days status \\\n", + "0 Leasehold 116 None \n", + "1 Leasehold 83 None \n", + "2 Leasehold 97 None \n", + "3 Leasehold 261 None \n", + "4 Please confirm if this is a freehold or leaseh... 96 None \n", + "... ... ... ... \n", + "10569 Leasehold 407 None \n", + "10570 Leasehold 113 None \n", + "10571 Leasehold 337 None \n", + "10572 Leasehold 230 None \n", + "10573 Leasehold 84 None \n", "\n", " last_seen decision \n", "0 0 None \n", "1 0 None \n", "2 0 None \n", - "3 20 None \n", + "3 0 None \n", "4 0 None \n", "... ... ... \n", - "18503 0 None \n", - "18504 20 None \n", - "18505 0 None \n", - "18506 0 None \n", - "18507 20 None \n", + "10569 0 None \n", + "10570 0 None \n", + "10571 0 None \n", + "10572 9 None \n", + "10573 0 None \n", "\n", - "[18508 rows x 16 columns]" + "[10574 rows x 16 columns]" ] }, "execution_count": 8, @@ -534,7 +536,7 @@ { "data": { "text/plain": [ - "(18508, 16)" + "(10574, 16)" ] }, "execution_count": 9, @@ -600,7 +602,7 @@ { "data": { "text/plain": [ - "(17217, 18)" + "(9494, 18)" ] }, "execution_count": 12, @@ -650,12 +652,12 @@ "3 None\n", "4 None\n", " ... \n", - "18503 None\n", - "18504 None\n", - "18505 None\n", - "18506 None\n", - "18507 None\n", - "Name: status, Length: 17217, dtype: object" + "10569 None\n", + "10570 None\n", + "10571 None\n", + "10572 None\n", + "10573 None\n", + "Name: status, Length: 9494, dtype: object" ] }, "execution_count": 13, @@ -676,7 +678,7 @@ { "data": { "text/plain": [ - "(10396, 17)" + "(6578, 17)" ] }, "execution_count": 14, @@ -742,26 +744,6 @@ " \n", " \n", " 0\n", - " 101369066\n", - " -1.0\n", - " 875000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/101369066\n", - " 3\n", - " 0.0\n", - " -1.00\n", - " False\n", - " Share of Freehold\n", - " 12\n", - " 0\n", - " None\n", - " 38\n", - " 142\n", - " {'WALK': 797, 'TRANSIT': 1227}\n", - " 2\n", - " \n", - " \n", - " 1\n", " 105484772\n", " 45.7\n", " 325000.0\n", @@ -772,7 +754,7 @@ " 641.53\n", " False\n", " Leasehold\n", - " 36\n", + " 116\n", " 0\n", " None\n", " 33\n", @@ -780,30 +762,50 @@ " {'WALK': 609, 'TRANSIT': 1109}\n", " 2\n", " \n", + " \n", + " 2\n", + " 108102476\n", + " 53.7\n", + " 515000.0\n", + " 9590.316574\n", + " https://www.rightmove.co.uk/properties/108102476\n", + " 1\n", + " 104.0\n", + " -1.00\n", + " False\n", + " Leasehold\n", + " 97\n", + " 0\n", + " None\n", + " 21\n", + " 593\n", + " {'WALK': 819, 'TRANSIT': 445}\n", + " 1\n", + " \n", " \n", "\n", "" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", - "0 101369066 -1.0 875000.0 NaN \n", - "1 105484772 45.7 325000.0 7111.597374 \n", + "0 105484772 45.7 325000.0 7111.597374 \n", + "2 108102476 53.7 515000.0 9590.316574 \n", "\n", " url bedrooms lease_left \\\n", - "0 https://www.rightmove.co.uk/properties/101369066 3 0.0 \n", - "1 https://www.rightmove.co.uk/properties/105484772 1 104.0 \n", + "0 https://www.rightmove.co.uk/properties/105484772 1 104.0 \n", + "2 https://www.rightmove.co.uk/properties/108102476 1 104.0 \n", "\n", - " service_charge development tenure_type updated_days last_seen \\\n", - "0 -1.00 False Share of Freehold 12 0 \n", - "1 641.53 False Leasehold 36 0 \n", + " service_charge development tenure_type updated_days last_seen decision \\\n", + "0 641.53 False Leasehold 116 0 None \n", + "2 -1.00 False Leasehold 97 0 None \n", "\n", - " decision duration initial_walk_duration duration_per_transit \\\n", - "0 None 38 142 {'WALK': 797, 'TRANSIT': 1227} \n", - "1 None 33 372 {'WALK': 609, 'TRANSIT': 1109} \n", + " duration initial_walk_duration duration_per_transit \\\n", + "0 33 372 {'WALK': 609, 'TRANSIT': 1109} \n", + "2 21 593 {'WALK': 819, 'TRANSIT': 445} \n", "\n", " number_of_transit_stops \n", "0 2 \n", - "1 2 " + "2 1 " ] }, "execution_count": 15, diff --git a/crawler/logger.py b/crawler/logger.py new file mode 100644 index 0000000..a0676fb --- /dev/null +++ b/crawler/logger.py @@ -0,0 +1,12 @@ +import logging + +def createLogger(name): + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('app.log'), + logging.StreamHandler() + ] + ) + return logging.getLogger(name)