import re

import cdx_toolkit
from bs4 import BeautifulSoup

import json
import demjson

from IPython.display import HTML
import pandas as pd
cdx = cdx_toolkit.CDXFetcher(source='cc')
pd.options.display.max_colwidth = 150

Indeed

objs = list(cdx.iter('au.indeed.com/*',
                     from_ts='202004', to='202005',
                     limit=50, 
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 SUS5VGDGUMKJAZ7GN6T4LYMTNA2NWCSU crawl-data/CC-MAIN-2020-16/segments/1585371896913.98/warc/CC-MAIN-20200410110538-20200410141038-00441.warc.gz eng 87919 text/html text/html 335167477 200 20200410140752 https://au.indeed.com/$110,000-jobs-in-Dinmore-QLD com,indeed,au)/$110,000-jobs-in-dinmore-qld
1 UTF-8 FXO2T22MGLBZVE4S6DIOANRL5T7NUMPJ crawl-data/CC-MAIN-2020-16/segments/1585371624083.66/warc/CC-MAIN-20200406102322-20200406132822-00295.warc.gz eng 80016 text/html text/html 350051882 200 20200406122949 https://au.indeed.com/$110,000-jobs-in-Quinns-Rocks-WA com,indeed,au)/$110,000-jobs-in-quinns-rocks-wa
2 UTF-8 CYXMCOYDZZ2VI2FBC3QKT4EQB53POQTA crawl-data/CC-MAIN-2020-16/segments/1585371618784.58/warc/CC-MAIN-20200406035448-20200406065948-00352.warc.gz eng 87620 text/html text/html 348171424 200 20200406060907 https://au.indeed.com/$110,600-jobs-in-Rodd-Point-NSW com,indeed,au)/$110,600-jobs-in-rodd-point-nsw
3 UTF-8 D5QLEI7LBPZYG5IGSM7GJ3MEB7RWPA43 crawl-data/CC-MAIN-2020-16/segments/1585371807538.83/warc/CC-MAIN-20200408010207-20200408040707-00279.warc.gz eng 84586 text/html text/html 371777629 200 20200408011304 https://au.indeed.com/$110,700-jobs-in-Woolner-NT com,indeed,au)/$110,700-jobs-in-woolner-nt
4 UTF-8 PS46BDU6XKSHV4HO5KOBKUUDIYXHGOYQ crawl-data/CC-MAIN-2020-16/segments/1585371896913.98/warc/CC-MAIN-20200410110538-20200410141038-00012.warc.gz eng 86821 text/html text/html 351049090 200 20200410135327 https://au.indeed.com/$130,000-jobs-in-Flinders-Lane-VIC com,indeed,au)/$130,000-jobs-in-flinders-lane-vic
5 UTF-8 ZH53DGWTQZGXTDV2RDLP6PQACUZLEPIJ crawl-data/CC-MAIN-2020-16/segments/1585370506870.41/warc/CC-MAIN-20200402080824-20200402110824-00495.warc.gz eng 84190 text/html text/html 337139560 200 20200402095317 https://au.indeed.com/$132,700-jobs-in-Warwick-WA com,indeed,au)/$132,700-jobs-in-warwick-wa
6 UTF-8 BTKJNOLRSE2CREODQPZK4JLD6UBLDPZQ crawl-data/CC-MAIN-2020-16/segments/1585371806302.78/warc/CC-MAIN-20200407214925-20200408005425-00356.warc.gz eng 79981 text/html text/html 355360319 200 20200408002615 https://au.indeed.com/$140,000-jobs-in-Quinns-Rocks-WA com,indeed,au)/$140,000-jobs-in-quinns-rocks-wa
7 UTF-8 EGN7N53REECODNQBS6QZG26AYQNVKB7U crawl-data/CC-MAIN-2020-16/segments/1585370520039.50/warc/CC-MAIN-20200404042338-20200404072338-00073.warc.gz eng 86710 text/html text/html 364638210 200 20200404055103 https://au.indeed.com/$140,000-jobs-in-Rodd-Point-NSW com,indeed,au)/$140,000-jobs-in-rodd-point-nsw
8 UTF-8 YQB5EKFD72C2K47S5UB7ANL7ATCUTHN3 crawl-data/CC-MAIN-2020-16/segments/1585371805747.72/warc/CC-MAIN-20200407183818-20200407214318-00495.warc.gz eng 83900 text/html text/html 347012528 200 20200407211904 https://au.indeed.com/$301,000-jobs-in-Peak-Crossing-QLD com,indeed,au)/$301,000-jobs-in-peak-crossing-qld
9 UTF-8 3MSK4SGITTW75C5QANUX2LGS4ZCKBL23 crawl-data/CC-MAIN-2020-16/segments/1585371824409.86/warc/CC-MAIN-20200408202012-20200408232512-00433.warc.gz eng 85660 text/html text/html 337399156 200 20200408221826 https://au.indeed.com/$50,000-jobs-in-Bribie-Island-QLD com,indeed,au)/$50,000-jobs-in-bribie-island-qld
10 UTF-8 3B6ZUUSXQKQHAKQU2VG3EIBIBW54WKQM crawl-data/CC-MAIN-2020-16/segments/1585371821680.80/warc/CC-MAIN-20200408170717-20200408201217-00379.warc.gz eng 80779 text/html text/html 336543593 200 20200408190736 https://au.indeed.com/$60,700-jobs-in-Maitland-WA com,indeed,au)/$60,700-jobs-in-maitland-wa
11 UTF-8 EEEC27FKAMLNKWMQD4SQ4IC2FGV4FDI6 crawl-data/CC-MAIN-2020-16/segments/1585370524604.46/warc/CC-MAIN-20200404165658-20200404195658-00109.warc.gz eng 86323 text/html text/html 353871155 200 20200404194532 https://au.indeed.com/$61,200-jobs-in-St-Georges-SA com,indeed,au)/$61,200-jobs-in-st-georges-sa
12 UTF-8 XGX2X6JZ6HUINJI7UTE45ZE662OEG6TK crawl-data/CC-MAIN-2020-16/segments/1585371807538.83/warc/CC-MAIN-20200408010207-20200408040707-00399.warc.gz eng 81487 text/html text/html 344326173 200 20200408024726 https://au.indeed.com/$70,000-jobs-in-Bribie-Island-QLD com,indeed,au)/$70,000-jobs-in-bribie-island-qld
13 UTF-8 IXNGTNRWIOWBHXQQJJNFSUDX6QWFTWAV crawl-data/CC-MAIN-2020-16/segments/1585371861991.79/warc/CC-MAIN-20200409154025-20200409184525-00052.warc.gz eng 87495 text/html text/html 345689513 200 20200409162539 https://au.indeed.com/$70,000-jobs-in-Churchill-QLD com,indeed,au)/$70,000-jobs-in-churchill-qld
14 UTF-8 53WHVMCMR6K2L3TUXJHDHDUU6OIVNOQZ crawl-data/CC-MAIN-2020-16/segments/1585371858664.82/warc/CC-MAIN-20200409122719-20200409153219-00123.warc.gz eng 85231 text/html text/html 351197696 200 20200409152603 https://au.indeed.com/$70,000-jobs-in-Clarence-Gardens-SA com,indeed,au)/$70,000-jobs-in-clarence-gardens-sa
15 UTF-8 YNFHUQZCCFDQH4OV2ETYNFGIZK433XQZ crawl-data/CC-MAIN-2020-16/segments/1585371618784.58/warc/CC-MAIN-20200406035448-20200406065948-00446.warc.gz eng 84560 text/html text/html 350869513 200 20200406061558 https://au.indeed.com/$70,000-jobs-in-Quinns-Rocks-WA com,indeed,au)/$70,000-jobs-in-quinns-rocks-wa
16 UTF-8 PVHCQSZKD5MGBLYVIW3QO2M4VM3YWN4F crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00098.warc.gz eng 87537 text/html text/html 357067905 200 20200404092121 https://au.indeed.com/$70,000-jobs-in-Robertson-QLD com,indeed,au)/$70,000-jobs-in-robertson-qld
17 UTF-8 EKGPB2VWFWPECMFAXYZLFWOGBEXK3N3P crawl-data/CC-MAIN-2020-16/segments/1585371606067.71/warc/CC-MAIN-20200405150416-20200405180916-00460.warc.gz eng 86928 text/html text/html 368029548 200 20200405173423 https://au.indeed.com/$83,000-jobs-in-Canley-Heights-NSW com,indeed,au)/$83,000-jobs-in-canley-heights-nsw
18 UTF-8 HFSWDG7TAM73XTUVCLDZAITGAM7ZR2HU crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00493.warc.gz eng 79014 text/html text/html 360212774 200 20200401115138 https://au.indeed.com/$90,000-jobs-in-Bribie-Island-QLD com,indeed,au)/$90,000-jobs-in-bribie-island-qld
19 UTF-8 JISF54RGKFN2I7AXVVLUEGV2XUHSR52L crawl-data/CC-MAIN-2020-16/segments/1585370506870.41/warc/CC-MAIN-20200402080824-20200402110824-00530.warc.gz eng 86201 text/html text/html 348714754 200 20200402082540 https://au.indeed.com/$90,000-jobs-in-Churchill-QLD com,indeed,au)/$90,000-jobs-in-churchill-qld
20 UTF-8 K6QTF7L6B26U4YXU6CWIJOH5NXJU7PSD crawl-data/CC-MAIN-2020-16/segments/1585370520039.50/warc/CC-MAIN-20200404042338-20200404072338-00233.warc.gz eng 85533 text/html text/html 347240989 200 20200404061242 https://au.indeed.com/$90,000-jobs-in-Clarence-Gardens-SA com,indeed,au)/$90,000-jobs-in-clarence-gardens-sa
21 UTF-8 XUVPFFMQZ3B4P2CQOU7HXMNDZW6AHXAW crawl-data/CC-MAIN-2020-16/segments/1585370518767.60/warc/CC-MAIN-20200403220847-20200404010847-00526.warc.gz eng 85956 text/html text/html 358137219 200 20200404001354 https://au.indeed.com/$90,000-jobs-in-Dinmore-QLD com,indeed,au)/$90,000-jobs-in-dinmore-qld
22 UTF-8 6BI666K7YZPUFSCVQH5C3FCZA2WGRPHH crawl-data/CC-MAIN-2020-16/segments/1585371858664.82/warc/CC-MAIN-20200409122719-20200409153219-00186.warc.gz eng 87744 text/html text/html 330499220 200 20200409150221 https://au.indeed.com/$90,000-jobs-in-Kareela-NSW com,indeed,au)/$90,000-jobs-in-kareela-nsw
23 UTF-8 FL36UXRASQTNGLWM7543JQ7L5OIKANBW crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00200.warc.gz eng 82935 text/html text/html 350703294 200 20200406021312 https://au.indeed.com/$92,600-jobs-in-Peak-Crossing-QLD com,indeed,au)/$92,600-jobs-in-peak-crossing-qld
24 UTF-8 7RGKKP7UBQZDMJP2LXEUF5JS35PJ6MWM crawl-data/CC-MAIN-2020-16/segments/1585371805747.72/warc/CC-MAIN-20200407183818-20200407214318-00482.warc.gz eng 88979 text/html text/html 352936509 200 20200407210734 https://au.indeed.com/15-Year-Old,-Part-Time,-Cash-Register,-Retail-jobs-in-New-South-Wales com,indeed,au)/15-year-old,-part-time,-cash-register,-retail-jobs-in-new-south-wales
25 UTF-8 IB6JAXPVX5DHJFQE6STL3RYZBNI5Y767 crawl-data/CC-MAIN-2020-16/segments/1585371618784.58/warc/CC-MAIN-20200406035448-20200406065948-00540.warc.gz eng 75858 text/html text/html 360597776 200 20200406060848 https://au.indeed.com/1800-My-Catering-jobs com,indeed,au)/1800-my-catering-jobs
26 UTF-8 5DK7DP6NGRIEYN7UZENDDRDNHGM4IFWR crawl-data/CC-MAIN-2020-16/segments/1585371805747.72/warc/CC-MAIN-20200407183818-20200407214318-00019.warc.gz eng 79734 text/html text/html 341547567 200 20200407194506 https://au.indeed.com/2-Fat-Indians-jobs com,indeed,au)/2-fat-indians-jobs
27 UTF-8 OU27TVQSMLD2EDZTII5VNU2EP2P3L4GG crawl-data/CC-MAIN-2020-16/segments/1585370521876.48/warc/CC-MAIN-20200404103932-20200404133932-00293.warc.gz eng 76995 text/html text/html 357508875 200 20200404125602 https://au.indeed.com/2discover-jobs com,indeed,au)/2discover-jobs
28 UTF-8 2KEYL5RMSTK77JERPCEVGZ3CDOIEA4GT crawl-data/CC-MAIN-2020-16/segments/1585370510846.12/warc/CC-MAIN-20200403092656-20200403122656-00378.warc.gz eng 80834 text/html text/html 335385226 200 20200403110521 https://au.indeed.com/3d-Animation-$100,000-jobs com,indeed,au)/3d-animation-$100,000-jobs
29 UTF-8 IIYNVW276H3NHNCQ3IINVJQWAP4WVFDJ crawl-data/CC-MAIN-2020-16/segments/1585371876625.96/warc/CC-MAIN-20200409185507-20200409220007-00498.warc.gz eng 82519 text/html text/html 373457422 200 20200409214934 https://au.indeed.com/3d-Animation-jobs-in-Sydney-NSW com,indeed,au)/3d-animation-jobs-in-sydney-nsw
30 UTF-8 CURUYVE2IEK4KYHIIMZNE6QWP57W7KCY crawl-data/CC-MAIN-2020-16/segments/1585371805747.72/warc/CC-MAIN-20200407183818-20200407214318-00253.warc.gz eng 85117 text/html text/html 361947519 200 20200407213126 https://au.indeed.com/3d-Artist-jobs com,indeed,au)/3d-artist-jobs
31 UTF-8 DTCEUA2KUQLYMZPHXJVMFMAPB2WMOCV5 crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00236.warc.gz eng 84586 text/html text/html 370123020 200 20200406031453 https://au.indeed.com/3d-jobs-in-Brisbane-QLD com,indeed,au)/3d-jobs-in-brisbane-qld
32 UTF-8 A6KLS2SSVYXIOS75TXUU7KFCUVLQTZ26 crawl-data/CC-MAIN-2020-16/segments/1585370519111.47/warc/CC-MAIN-20200404011558-20200404041558-00134.warc.gz eng 46690 text/html text/html 346002220 200 20200404022316 https://au.indeed.com/?s_rid=theage%3Alhsnav%3Ajobs com,indeed,au)/?s_rid=theage:lhsnav:jobs
33 UTF-8 3DCW3YUA2PNRPHLXFAWN5QWHQLFONWXX crawl-data/CC-MAIN-2020-16/segments/1585370524604.46/warc/CC-MAIN-20200404165658-20200404195658-00155.warc.gz eng 85623 text/html text/html 355535353 200 20200404195008 https://au.indeed.com/A-Commercial-$60,000-jobs-in-Gold-Coast-QLD com,indeed,au)/a-commercial-$60,000-jobs-in-gold-coast-qld
34 UTF-8 Y7X24ZG46NP2F6N5EGDH7EBAHKQKNN6K crawl-data/CC-MAIN-2020-16/segments/1585370505359.23/warc/CC-MAIN-20200401003422-20200401033422-00213.warc.gz eng 59755 text/html text/html 348260244 200 20200401015649 https://au.indeed.com/A-Cut-Above-Family-Butcher-jobs com,indeed,au)/a-cut-above-family-butcher-jobs
35 UTF-8 LYJUAQUGZZZ7O443OG4PJMH4VNYNO6HI crawl-data/CC-MAIN-2020-16/segments/1585371893683.94/warc/CC-MAIN-20200410075105-20200410105605-00507.warc.gz eng 60094 text/html text/html 344555492 200 20200410103305 https://au.indeed.com/A-Mop-Above-the-Rest-jobs com,indeed,au)/a-mop-above-the-rest-jobs
36 UTF-8 TTSHM5OX56URJ322RZG5RHTNDUQUKQQE crawl-data/CC-MAIN-2020-16/segments/1585370506580.20/warc/CC-MAIN-20200402014600-20200402044600-00240.warc.gz eng 76911 text/html text/html 282423245 200 20200402035454 https://au.indeed.com/Abbott-Point-Coal-Terminal-jobs com,indeed,au)/abbott-point-coal-terminal-jobs
37 UTF-8 L6QJRDSKKHRTMOMCDAEHMIGN3FVWZNAU crawl-data/CC-MAIN-2020-16/segments/1585370505359.23/warc/CC-MAIN-20200401003422-20200401033422-00559.warc.gz eng 78926 text/html text/html 350455254 200 20200401025215 https://au.indeed.com/Aberglasslyn-Medical-Centre-jobs com,indeed,au)/aberglasslyn-medical-centre-jobs
38 UTF-8 JP27BHGJENGK7JM3MNHXATFKANVROS2M crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00111.warc.gz eng 82902 text/html text/html 338150223 200 20200406024217 https://au.indeed.com/Aboriginal-Identified-$82,500-jobs-in-Queensland com,indeed,au)/aboriginal-identified-$82,500-jobs-in-queensland
39 UTF-8 YMEETCQQ4XB77ECKGHAX3JH32OIKDMJM crawl-data/CC-MAIN-2020-16/segments/1585371893683.94/warc/CC-MAIN-20200410075105-20200410105605-00129.warc.gz eng 77929 text/html text/html 351313651 200 20200410104615 https://au.indeed.com/Aboriginal-Liaison-Officer-jobs-in-Eagleby-QLD com,indeed,au)/aboriginal-liaison-officer-jobs-in-eagleby-qld
40 UTF-8 6JD3HSDEVBO6SAAMPQ5CPVUFYJ54HRFL crawl-data/CC-MAIN-2020-16/segments/1585370505359.23/warc/CC-MAIN-20200401003422-20200401033422-00024.warc.gz eng 86013 text/html text/html 367050552 200 20200401025847 https://au.indeed.com/Access-Corporate-Group-jobs com,indeed,au)/access-corporate-group-jobs
41 UTF-8 A7EQF3RELM2JOFI2MTR7HA5GPUXWSFYM crawl-data/CC-MAIN-2020-16/segments/1585371896913.98/warc/CC-MAIN-20200410110538-20200410141038-00195.warc.gz eng 61341 text/html text/html 337157476 200 20200410140805 https://au.indeed.com/Accessory-Jewellery-$117,500-jobs-in-New-South-Wales com,indeed,au)/accessory-jewellery-$117,500-jobs-in-new-south-wales
42 UTF-8 KCG6Y2RRSKBL222OALB66F27GPIZL7OC crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00453.warc.gz eng 77285 text/html text/html 362548342 200 20200404100456 https://au.indeed.com/Accessory-Jewellery-$60,000-jobs-in-New-South-Wales com,indeed,au)/accessory-jewellery-$60,000-jobs-in-new-south-wales
43 UTF-8 F2LTENG2BWNYOAPNGAT4MTMLUCS4KNCQ crawl-data/CC-MAIN-2020-16/segments/1585370507738.45/warc/CC-MAIN-20200402173940-20200402203940-00479.warc.gz eng 82949 text/html text/html 361006163 200 20200402191948 https://au.indeed.com/Accommodation-jobs-in-Pilbara-WA com,indeed,au)/accommodation-jobs-in-pilbara-wa
44 UTF-8 LMSS3CAO3GWO7DGUYQLJVP6MET7B4FCM crawl-data/CC-MAIN-2020-16/segments/1585371606067.71/warc/CC-MAIN-20200405150416-20200405180916-00284.warc.gz eng 79581 text/html text/html 353990995 200 20200405180144 https://au.indeed.com/Accommodation-Support-Worker-$147,900-jobs-in-Queensland com,indeed,au)/accommodation-support-worker-$147,900-jobs-in-queensland
45 UTF-8 4Q2LWFAID3INQM4GE3MVZYKL7NHETXIP crawl-data/CC-MAIN-2020-16/segments/1585370519111.47/warc/CC-MAIN-20200404011558-20200404041558-00147.warc.gz eng 85577 text/html text/html 365859791 200 20200404033728 https://au.indeed.com/Accommodation-Support-Worker-$72,500-jobs-in-Queensland com,indeed,au)/accommodation-support-worker-$72,500-jobs-in-queensland
46 UTF-8 FGAWLKGI3HB2JTNYPHQY7QOPD6OEQUPU crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00446.warc.gz eng 60068 text/html text/html 347527770 200 20200408100520 https://au.indeed.com/Accor-Hotels-jobs com,indeed,au)/accor-hotels-jobs
47 UTF-8 L7FHP4OFDVWMAVI4QBWWPVECWGHYKV2D crawl-data/CC-MAIN-2020-16/segments/1585371876625.96/warc/CC-MAIN-20200409185507-20200409220007-00150.warc.gz eng 86467 text/html text/html 355292446 200 20200409214336 https://au.indeed.com/Account-Manager-Advertising-jobs-in-Victoria com,indeed,au)/account-manager-advertising-jobs-in-victoria
48 UTF-8 L6SA645B7EEPWBVLN72DOT5DN7JCMPFL crawl-data/CC-MAIN-2020-16/segments/1585370504930.16/warc/CC-MAIN-20200331212647-20200401002647-00231.warc.gz eng 84326 text/html text/html 372719076 200 20200401000139 https://au.indeed.com/Account-Myob-jobs com,indeed,au)/account-myob-jobs
49 UTF-8 LCRBCC6VI7JRL5VDT4UVV2EHO3YUYBCB crawl-data/CC-MAIN-2020-16/segments/1585370510846.12/warc/CC-MAIN-20200403092656-20200403122656-00015.warc.gz eng 86002 text/html text/html 349017854 200 20200403111452 https://au.indeed.com/Account-Payable-Bank-jobs com,indeed,au)/account-payable-bank-jobs

Indeed Contains:

  • Title
  • Company
  • Location
  • First 25 words of ad text
  • Sometimes salary
with open('test.html', 'w') as f:
    f.write(objs[0].content.decode('utf-8'))
None
soup = BeautifulSoup(objs[0].content)
urls = [a['href'] for a in soup.select('a.jobtitle')]
urls
['/rc/clk?jk=028bf2018beebedb&fccid=59b04a51f1164f7e&vjs=3',
 '/rc/clk?jk=d9ea2b72aae1bd1f&fccid=92dfe858c4b585f9&vjs=3',
 '/rc/clk?jk=34add443e5138142&fccid=a4a93a5cf946e3ad&vjs=3',
 '/rc/clk?jk=2d59eb05949f2081&fccid=edae4285faf6c2f0&vjs=3',
 '/rc/clk?jk=be8468dce830f059&fccid=ea26a03c73e2d4e9&vjs=3',
 '/rc/clk?jk=bf0dced454efd688&fccid=a7eb6e72c143133c&vjs=3',
 '/rc/clk?jk=e26927c40a677590&fccid=b155cdcdabd4ee03&vjs=3',
 '/rc/clk?jk=d5703b74c268f0b6&fccid=9eb3b6eca8bf5aac&vjs=3',
 '/rc/clk?jk=573786223f902b3b&fccid=ea26a03c73e2d4e9&vjs=3',
 '/rc/clk?jk=060a5ceb47fda90c&fccid=6e557affe98df478&vjs=3']
base_url = objs[0].data['url']
base_url = base_url[:base_url.find('/', 8)]
param = re.match('/rc/clk\?(jk=[^&]+)', urls[0]).group(1)
param
'jk=028bf2018beebedb'
import requests

Retrieve the job by some url manipulation

url = f'{base_url}/viewjob?{param}'
r = requests.get(url)
r.status_code
200
with open('test.html', 'wb') as f:
    f.write(r.content)

Metadata is a bit tricky to get

start = 'window._initialData='
start_idx = r.text.find(start) + len(start)
def get_object(text):
    depth = 0
    inquote = False
    escape = False
    for idx, char in enumerate(text):
        if escape:
            escape = False
            continue
        if char == '"':
            inquote = not inquote
        if  char == '\\':
            escape = True
        if (not inquote) and char == '{':
            depth += 1
        if (not inquote) and char == '}':
            depth -= 1
            if depth <= 0:
                break
    return text[:idx+1]
obj_text = get_object(r.text[start_idx:])
obj_text
'{"base64EncodedJson":"eyJhIjp0cnVlLCJjIjp0cnVlLCJkIjpmYWxzZSwiZSI6dHJ1ZSwiZyI6Imh0dHA6Ly9hdS5pbmRlZWQuY29tL20vYmFzZWNhbXAvdmlld2pvYj9qaz0wMjhiZjIwMThiZWViZWRiIiwiaCI6IlZhcmlvdXMgUHJvamVjdCBDb250cm9scyAmIFBsYW5uaW5nIFBvc2l0aW9ucyIsImkiOiJCcmlzYmFuZSBRTEQiLCJqIjoib3JnYW5pYyIsImwiOiIifQ","baseInboxUrl":"https:\\u002F\\u002Finbox.indeed.com","baseUrl":"https:\\u002F\\u002Fau.indeed.com","clientsideProctorGroups":{"mobcompanylinktst":true,"mobvj_hideapplyemail_tst":false,"mobvjpsfeedbacktst":false,"jasx_track_multisession_noapplies":false,"sal_insights_tab_redesign_tst":false,"jasx_hidephonenumber_tst":false},"companyFollowForm":{"addAlertUrl":"\\u002Falert?a=add&alert_params=followCompany%3Dadfbfa8ae907519e&q=company%3A%27SNC-Lavalin%27&alert_period=weekly&output=json&verified=0&tk=1eaook86k36bm000&hct=4c021f433b8dd134463a497cf3645afa","cancelText":"By creating a company alert you agree to our <a href=\\"\\u002Flegal\\" target=\\"_blank\\">Terms<\\u002Fa>. You can change your consent settings at any time by unsubscribing, or as detailed in our terms.","checkAlertUrl":"\\u002Frpc\\u002Fjobalert?a=check&app=acme&q=company%3A%27SNC-Lavalin%27&followCompany=adfbfa8ae907519e","confirmationHeader":"Please check your email","confirmationSubHeader":"we have sent a confirmation message","confirmationText":"Click on the link in this email to start receiving your Company Alert.","createAlertUrl":"\\u002Fmy\\u002Falerts?a=create&alert_params=followCompany%3Dadfbfa8ae907519e&alert_keywords=company%3A%27SNC-Lavalin%27&alert_period=weekly&output=json&followCompany=adfbfa8ae907519e&hct=4c021f433b8dd134463a497cf3645afa","cta":"Get job updates from SNC-Lavalin","duplicateEmailMessage":"You are already following this company.","followButton":{"buttonSize":"sm","buttonType":"tertiary","children":"Follow","disabled":false,"isBlock":false,"isResponsive":false,"size":"sm"},"followingText":"Following","input":{"disabled":false,"errorText":"This field is required","helpText":null,"id":null,"isSmall":false,"label":"My Email:","name":"email","type":"text","value":null},"invalidEmailMessage":"Please provide a valid email address.","saveButton":{"buttonSize":null,"buttonType":"secondary","children":"Save","disabled":false,"isBlock":true,"isResponsive":false,"size":"sm"}},"country":"AU","ctk":"1eaook85m10a3000","dcmModel":{"category":"jobse0","source":"8232301","type":"organic"},"desktop":true,"desktopSponsoredJobSeenData":"tk=1eaook86k36bm000","dgToken":"B1C91F9AB5B14CDA827FB6F92A2587D5","googleOneTapModel":{"baseSecureUrl":"https:\\u002F\\u002Fsecure.indeed.com","googleClientID":"1047839414793-v442kdo3pt0vb43l8nu2c5sh9lf4bsnj.apps.googleusercontent.com","redirectUrl":null},"indeedChatEmployerModel":{"chatEnabled":false},"jobKey":"028bf2018beebedb","jobLocation":"Brisbane QLD","jobSeenData":"tk=1eaook86k36bm000&context=viewjobrecs","jobTitle":"Various Project Controls & Planning Positions","language":"en","locale":"en_AU","localeData":{"":[null,"Project-Id-Version: \\nReport-Msgid-Bugs-To: \\nPOT-Creation-Date: 2020-06-11 04:00-0500\\nPO-Revision-Date: 2020-04-01 21:41+0000\\nLast-Translator: Auto Generated <noreply@indeed.com>\\nLanguage-Team: English (Australia) <https:\\u002F\\u002Fweblate.corp.indeed.com\\u002Fprojects\\u002Findeed\\u002Findeedmobile-i18n-content\\u002Fen_AU\\u002F>\\nLanguage: en_AU\\nMIME-Version: 1.0\\nContent-Type: text\\u002Fplain; charset=UTF-8\\nContent-Transfer-Encoding: 8bit\\nPlural-Forms: nplurals=2; plural=n != 1;\\nX-Generator: Weblate 3.9.1\\n"]},"mobtk":"1eaook86k36bm000","notifications":{"inboxLinkEnabled":false,"messagesLabel":"Messages","newMessagesCountPlurals":["{0} new","{0} new"],"notificationCenterEnabled":false,"updatingText":"checking..."},"originalJobLinkModel":{"cookieName":"RCLK","cookiePath":"\\u002F","cookieValue":"jk=028bf2018beebedb&vjtk=1eaook86k36bm000&ts=1592116519124&rd=&qd="},"pageId":"viewjob","relatedLinks":[{"href":"\\u002Fjobs?q=Project+Planner&l=Brisbane+QLD","linkText":"Project Planner jobs in Brisbane QLD"},{"href":"\\u002Fjobs?q=SNC-Lavalin&l=Brisbane+QLD","linkText":"Jobs at SNC-Lavalin in Brisbane QLD"},{"href":"\\u002Fsalary?q1=Project+Planner&l1=Brisbane+QLD","linkText":"Project Planner salaries in Brisbane QLD"}],"reportJobForm":{"additionalInformationPlaceholder":"Additional information","closeIconLabel":"Close","disclaimer":"All Job Ads are subject to Indeed\'s <a target=\\"_blank\\" href=\\"\\u002Flegal\\">Terms of Service<\\u002Fa>. We allow users to flag postings that may be in violation of those terms. Job Ads may also be flagged by Indeed. However, no moderation system is perfect, and flagging a posting does not ensure that it will be removed.","postHref":"\\u002Fm\\u002Frpc\\u002Flog\\u002Freport\\u002Fjob?jobKey=028bf2018beebedb&mobvjtk=1eaook86k36bm000&isMobile=false&indeedcsrftoken=7UWdjDPjNZTLybz4lALPJo4q6PhYIYzh","radioButtonGroup":{"errorText":null,"helpText":null,"isDisabled":false,"label":"Report this job","name":null,"radioButtons":[{"id":null,"isDisabled":false,"label":"It is offensive, discriminatory","name":"offensive","value":"offensive"},{"id":null,"isDisabled":false,"label":"It seems like a fake job","name":"fake","value":"fake"},{"id":null,"isDisabled":false,"label":"It is inaccurate","name":"inaccurate","value":"inaccurate"},{"id":null,"isDisabled":false,"label":"It is an advertisement","name":"advertisement","value":"advertisement"},{"id":null,"isDisabled":false,"label":"Other","name":"other","value":"other"}],"value":null},"submitButtonText":"Submit","successHeadline":"Job successfully reported","successText":"Thank you for helping us identify suspicious behavior on Indeed"},"saveJobButtonContainerModel":{"alreadySavedButtonModel":{"actions":["Saved","Applied","Interviewing","Offered","Hired"],"buttonSize":"block","buttonType":"secondary","contentHtml":"Saved","href":"\\u002F","iconSize":null},"applyFromComputerButtonModel":null,"applyFromComputerLogUrl":"\\u002Fm\\u002Frpc\\u002Flog\\u002Femailmyself?jk=028bf2018beebedb&mobvjtk=1eaook86k36bm000&sbt=4c021f433b8dd134463a497cf3645afa&ctk=1eaook85m10a3000&acctKey=","currentJobState":"VISITED","didYouApplyPromptModel":{"calloutModel":{"actionsList":null,"actionsMap":{"NO":{"children":"Not interested","className":null,"href":null,"target":null},"LATER":{"children":"Maybe later","className":null,"href":null,"target":null},"YES":{"children":"Yes","className":null,"href":null,"target":null}},"caretPosition":null,"children":null,"dismissAriaLabel":"Close","dismissAttributes":null,"dismissHref":null,"heading":"Did you apply?"},"jobKey":"028bf2018beebedb","possibleResponses":{"NO":"NO","LATER":"LATER","YES":"YES"},"userCanView":false},"didYouApplyResponseUrl":"\\u002Fm\\u002Frpc\\u002Fdidyouapply?tk=1eaook86k36bm000&jobKey=028bf2018beebedb&originPage=viewjob&from=viewjob","hashedCSRFToken":"4c021f433b8dd134463a497cf3645afa","isAlreadySavedButtonVisible":false,"isDisableJobStatusChange":false,"isLoggedIn":false,"isSaveWithoutLoginEnabled":false,"isSticky":false,"isSyncJobs":false,"mobtk":"1eaook86k36bm000","myIndeedLoginLink":"https:\\u002F\\u002Fau.indeed.com\\u002Faccount\\u002Flogin?dest=%2Fviewjob%3Fjk%3D028bf2018beebedb","myJobsAPIHref":"\\u002Frpc\\u002Flog\\u002Fmyjobs\\u002Ftransition_job_state?client=mobile&cause=statepicker&preserveTimestamp=false&tk=1eaook86k36bm000&jobKey=028bf2018beebedb&originPage=viewjob","myJobsURL":"\\u002Fmyjobs\\u002F?from=mobvj#","pageId":"viewjob","possibleJobActions":{"SAVED":"save","APPLIED":"apply","INTERVIEWING":"interview","OFFERED":"offer","HIRED":"hire","VISITED":"visit","ARCHIVED":"archive"},"possibleJobStates":{"SAVED":"Saved","APPLIED":"Applied","INTERVIEWING":"Interviewing","OFFERED":"Offered","HIRED":"Hired","VISITED":"Visited","ARCHIVED":"Archived"},"saveButtonModel":{"buttonSize":"block","buttonType":"secondary","contentHtml":"Save this job","dataHref":null,"href":"\\u002F","icon":{"iconTitle":"save-icon","iconType":"favorite-border"},"isBlock":false,"largeScreenSizeText":null,"openInNewTab":false,"referrerpolicy":null,"rel":null,"sanitizedHref":null,"sanitizedHtml":null,"sticky":false,"target":null,"title":null,"viewJobDisplay":"DESKTOP_STANDALONE"},"showSaveJobInlineCallout":true,"smallButtonModel":null,"uistates":{"INTERVIEWING":"INTERVIEWING","OFFERED":"OFFERED","SAVED":"SAVED","VISITED":"VISITED","HIRED":"HIRED","ARCHIVED":"ARCHIVED","APPLIED":"APPLIED"},"viewJobDisplay":"DESKTOP_STANDALONE"},"saveJobCalloutModel":{"actionsList":null,"actionsMap":{"createaccount":{"children":"Create account (it\'s free)","className":null,"href":"https:\\u002F\\u002Fau.indeed.com\\u002Faccount\\u002Fregister?dest=%2Fviewjob%3Fjk%3D028bf2018beebedb","target":"self"},"signin":{"children":"Sign in","className":null,"href":"https:\\u002F\\u002Fau.indeed.com\\u002Faccount\\u002Flogin?dest=%2Fviewjob%3Fjk%3D028bf2018beebedb","target":"self"}},"caretPosition":null,"children":"You must sign in to save jobs:","dismissAriaLabel":"Close","dismissAttributes":null,"dismissHref":null,"heading":"Save jobs and view them from any computer."},"saveJobFailureModalModel":{"closeAriaLabel":"Close","closeButtonText":"Close","message":"Please retry","signInButtonText":null,"signInHref":null,"title":"Failed to Save Job"},"saveJobLimitExceededModalModel":{"closeAriaLabel":"Close","closeButtonText":null,"message":"You reached the limit. Please log in to save additional jobs.","signInButtonText":"Sign in","signInHref":"https:\\u002F\\u002Fau.indeed.com\\u002Faccount\\u002Flogin?dest=%2Fviewjob%3Fjk%3D028bf2018beebedb&from=viewjob_savejoblimitmodal","title":"You\'ve already saved 20 jobs"},"stickyType":"ALWAYS","validationToken":"ZlZg1VqaEDWp2g+kdCQ9qUTkAV7sslXJUFUKPJAPMpE=","viewJobButtonLinkContainerModel":{"clickCookieName":"RCLK","clickCookieValue":"jk=028bf2018beebedb&vjtk=1eaook86k36bm000&ts=1592116519124&rd=&qd=","desktopScreenerQuestionsModel":null,"jobKey":"028bf2018beebedb","shouldSetClickTrackingCookie":true,"thirdPartyApplyCreateAccountModel":null,"viewJobButtonLinkModel":{"buttonSize":"block","buttonType":"primary","contentHtml":"Apply Now","dataHref":null,"href":"https:\\u002F\\u002Fau.indeed.com\\u002Frc\\u002Fclk?jk=028bf2018beebedb&from=vj&pos=bottom&sjdu=76Cn2YAIPzFIwtaQqpG01IDDplm6SwWjHcxyoDIphKnEOEJUiVSIY7daUBaXb4E_kN0wkll9wDHc3mnStM4Hmg","icon":null,"isBlock":true,"largeScreenSizeText":"Apply On Company Site","openInNewTab":true,"referrerpolicy":"origin","rel":"noopener","sanitizedHref":null,"sanitizedHtml":null,"sticky":false,"target":"_blank","title":null,"viewJobDisplay":null}},"viewJobDisplay":"DESKTOP_STANDALONE"}'
data = json.loads(obj_text)
data.keys()
dict_keys(['base64EncodedJson', 'baseInboxUrl', 'baseUrl', 'clientsideProctorGroups', 'companyFollowForm', 'country', 'ctk', 'dcmModel', 'desktop', 'desktopSponsoredJobSeenData', 'dgToken', 'googleOneTapModel', 'indeedChatEmployerModel', 'jobKey', 'jobLocation', 'jobSeenData', 'jobTitle', 'language', 'locale', 'localeData', 'mobtk', 'notifications', 'originalJobLinkModel', 'pageId', 'relatedLinks', 'reportJobForm', 'saveJobButtonContainerModel', 'saveJobCalloutModel', 'saveJobFailureModalModel', 'saveJobLimitExceededModalModel', 'stickyType', 'validationToken', 'viewJobButtonLinkContainerModel', 'viewJobDisplay'])
data['jobLocation']
'Brisbane QLD'
data['jobTitle']
'Various Project Controls & Planning Positions'
data['jobKey']
'028bf2018beebedb'
soup = BeautifulSoup(r.content)

Job Text

soup.select('#jobDescriptionText')[0]
<div class="jobsearch-jobDescriptionText" id="jobDescriptionText"><div><p>Founded in 1911, SNC-Lavalin Atkins is a global fully integrated professional services and project management company and a major player in the ownership of infrastructure. From offices around the world, SNC-Lavalin Atkins’ employees think beyond engineering. Our teams provide comprehensive end-to-end project solutions – including capital investment, consulting, design, engineering, construction management, sustaining capital and operations and maintenance – to clients across the EDPM (Engineering, Design and Project Management), Infrastructure and Resources businesses. http://www.snclavalin.com</p><p></p><p><b>
Join our SNC-Lavalin Atkins team, and you’ll be a part of a diverse, ambitious business with a strong team spirit.
</b></p><p></p><p>For more than 40 years in Australia, our people have been carving out rewarding careers on award winning projects. We think beyond engineering and push the boundaries of innovation for our clients across all major markets.</p><p>
SNC-Lavalin Atkins is built on our core values of Safety, Integrity, Collaboration and Innovation. Our people drive results and are helping our clients transform their projects from vision into reality. Working with diverse and multi-disciplinary teams we provide consultancy, design, engineering through to self-perform construction, completions &amp; commissioning and operations &amp; maintenance, all underpinned by our digital know-how.</p><p></p><p><b><i>
About the Opportunity
</i></b></p><p>Our Programme Management Office (PMO) consultancy team currently has specialists working on complex infrastructure, aviation and transport projects in Brisbane, Sydney and Melbourne.</p><p>
We are seeking a variety of project, programme and portfolio (P3) PMO roles for each of our Brisbane, Sydney and Melbourne State offices within our EDPM business. Please include in your cover letter, which state office you would like to be considered for.
</p><p></p><p>The PMO roles we are seeking include:</p><ul><li>
Project Controls Managers</li><li>
Forensic Planners</li><li>
P6 Planning Managers</li><li>
P6 Planners</li><li>
Risk Managers</li><li>
Cost Controllers</li><li>
Cost Managers</li><li>
Reporting Managers</li><li>
Estimating Leads</li><li>
Estimators</li><li>
Document Controllers</li></ul><p></p><p><b>
Education and Skills</b></p><ul><li>
Minimum 5 years’ experience in chosen field</li><li>
A Bachelor degree or higher, and/or equivalent in training and experience regarded</li><li>
Must have experience in one of the following sectors; infrastructure, aviation, transportation or mining</li><li>
Project, programme and portfolio (P3) knowledge and experience advantageous</li><li>
Effective problem solving and time management abilities</li><li>
Excellent communication and organisational skills</li><li>
Experience with large-scale projects
</li><li>Ability to work independently with minimal supervision or in an integrated team environment</li><li>
Process driven and attention to detail are necessary</li><li>
Drive to continuously seek innovation and improvement (self, project, strategic)</li></ul><p></p><p><b><i>
About the Benefits
</i></b></p><p>We offer rewarding careers to people who want to be part of our great stories and remarkable achievements. With the opportunity to work on diverse projects of varying sizes.</p><p></p><p>
SNC-Lavalin Atkins’ business offer a competitive compensation and benefits package with a great team environment. We have in place strong learning and development programs, training and career opportunities to keep you developing.</p><p>
We are looking for innovative, forward-thinking people who enjoy challenge and actively seek to develop and improve work processes and want to be part of a safe and healthy work environment.</p><p></p><p><b>
Why join our team?</b></p><p>
Located in 13 countries across the Asia Pacific region, SNC-Lavalin Atkins operates through its brands, SNC-Lavalin, Kentz and Atkins and our people have worked, and continue to work, on some of the region’s most iconic projects. So join today as a career with us opens up a world of possibilities - being part of a global organisation of over 50,000 employees opportunities await you to collaborate with colleagues on international projects or use your skills and knowledge to create a winning combination for our clients across our other markets.</p><p>
We’re focused on creating an inclusive, supportive workplace that will enable you to develop and thrive. You’ll work alongside some of the leaders in your field, with opportunities to reach your potential through training and professional development.</p><p></p><p>
Only current ‘Right to work in Australia’ applications will be considered.</p></div><p></p></div>

Seek

objs = list(cdx.iter('seek.com.au/job/*',
                     from_ts='202004', to='202005',
                     limit=50, 
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 QZF3KBX2P77DGDACHOO6VQO6TEMTWXVZ crawl-data/CC-MAIN-2020-16/segments/1585370506673.7/warc/CC-MAIN-20200402045741-20200402075741-00395.warc.gz eng 24473 text/html text/html 1137303128 200 20200402065023 https://www.seek.com.au/job/40480218?type=standard au,com,seek)/job/40480218?type=standard
1 UTF-8 GFQWJL3GA4HB4GZBANA5EJU7MODNVVKX crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00366.warc.gz eng 24220 text/html text/html 1137099417 200 20200402134520 https://www.seek.com.au/job/40673486?type=standard au,com,seek)/job/40673486?type=standard
2 UTF-8 3NM664ONG6P5DJXZSZQMTGKQPC642RQF crawl-data/CC-MAIN-2020-16/segments/1585371880945.85/warc/CC-MAIN-20200409220932-20200410011432-00128.warc.gz eng 29828 text/html text/html 1127547178 200 20200410002047 https://www.seek.com.au/job/40778851?_ga=2.217432918.1128169088.1579487610-1596147771.1579487610 au,com,seek)/job/40778851?_ga=2.217432918.1128169088.1579487610-1596147771.1579487610
3 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00294.warc.gz NaN 1715 text/html text/html 1133538036 200 20200402151926 https://www.seek.com.au/job/40790432/apply/linkout au,com,seek)/job/40790432/apply/linkout
4 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00141.warc.gz NaN 1714 text/html text/html 1134800452 200 20200406011717 https://www.seek.com.au/job/40800664/apply/linkout au,com,seek)/job/40800664/apply/linkout
5 UTF-8 IX5KPE6S4O4HIONEMZOGEBBDH7HJDG2J crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00470.warc.gz eng 24749 text/html text/html 1112222656 200 20200410032900 https://www.seek.com.au/job/40832664?type=standout au,com,seek)/job/40832664?type=standout
6 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00202.warc.gz NaN 1715 text/html text/html 1144074898 200 20200402162406 https://www.seek.com.au/job/40842263/apply/linkout au,com,seek)/job/40842263/apply/linkout
7 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00083.warc.gz NaN 1706 text/html text/html 1107682991 200 20200406025458 https://www.seek.com.au/job/40846293/apply/linkout au,com,seek)/job/40846293/apply/linkout
8 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00025.warc.gz NaN 1708 text/html text/html 1116159944 200 20200408084847 https://www.seek.com.au/job/40862183/apply/linkout au,com,seek)/job/40862183/apply/linkout
9 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00511.warc.gz NaN 1711 text/html text/html 1109546595 200 20200402161049 https://www.seek.com.au/job/40862233/apply/linkout au,com,seek)/job/40862233/apply/linkout
10 UTF-8 SU5PQRKEA2AVPRYDNFR4FXAB2PIBG3UC crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00410.warc.gz eng 23324 text/html text/html 1143762094 200 20200401103811 https://www.seek.com.au/job/40878691?type=promoted au,com,seek)/job/40878691?type=promoted
11 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370526982.53/warc/CC-MAIN-20200404231315-20200405021315-00523.warc.gz NaN 1707 text/html text/html 1147942773 200 20200405004211 https://www.seek.com.au/job/40899398/apply/linkout au,com,seek)/job/40899398/apply/linkout
12 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370507738.45/warc/CC-MAIN-20200402173940-20200402203940-00223.warc.gz NaN 1711 text/html text/html 1156221286 200 20200402184859 https://www.seek.com.au/job/40922447/apply/linkout au,com,seek)/job/40922447/apply/linkout
13 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370526982.53/warc/CC-MAIN-20200404231315-20200405021315-00195.warc.gz NaN 1709 text/html text/html 1119039973 200 20200405003128 https://www.seek.com.au/job/40937285/apply/linkout au,com,seek)/job/40937285/apply/linkout
14 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00300.warc.gz NaN 1707 text/html text/html 1136149459 200 20200402144654 https://www.seek.com.au/job/40938911/apply/linkout au,com,seek)/job/40938911/apply/linkout
15 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00202.warc.gz NaN 1706 text/html text/html 1114897640 200 20200408092032 https://www.seek.com.au/job/40939118/apply/linkout au,com,seek)/job/40939118/apply/linkout
16 UTF-8 QOJNF6KZWQD7V5RGTUHN7A3MZWVAB2IH crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00164.warc.gz eng 24431 text/html text/html 1130847247 200 20200408080650 https://www.seek.com.au/job/40939952 au,com,seek)/job/40939952
17 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00085.warc.gz NaN 1713 text/html text/html 1134732273 200 20200402162209 https://www.seek.com.au/job/40942814/apply/linkout au,com,seek)/job/40942814/apply/linkout
18 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371660550.75/warc/CC-MAIN-20200406200320-20200406230820-00200.warc.gz NaN 1702 text/html text/html 1153471119 200 20200406201945 https://www.seek.com.au/job/40942905/apply/linkout au,com,seek)/job/40942905/apply/linkout
19 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370515113.54/warc/CC-MAIN-20200403154746-20200403184746-00325.warc.gz NaN 1710 text/html text/html 1124048328 200 20200403171955 https://www.seek.com.au/job/40949035/apply/linkout au,com,seek)/job/40949035/apply/linkout
20 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00525.warc.gz NaN 1710 text/html text/html 1146260722 200 20200406021412 https://www.seek.com.au/job/40950055/apply/linkout au,com,seek)/job/40950055/apply/linkout
21 UTF-8 SBIPR2XKNNXOU6BDLCJDXYVKCW5ELTBZ crawl-data/CC-MAIN-2020-16/segments/1585371880945.85/warc/CC-MAIN-20200409220932-20200410011432-00131.warc.gz NaN 1713 text/html text/html 1118334178 200 20200410003652 https://www.seek.com.au/job/40951337/apply/linkout au,com,seek)/job/40951337/apply/linkout
22 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00127.warc.gz NaN 1713 text/html text/html 1117459171 200 20200408083658 https://www.seek.com.au/job/40952378/apply/linkout au,com,seek)/job/40952378/apply/linkout
23 UTF-8 UPI34GKA55O3CP3DC7WVTQ6ERDEPFQBD crawl-data/CC-MAIN-2020-16/segments/1585370505366.8/warc/CC-MAIN-20200401034127-20200401064127-00073.warc.gz eng 25661 text/html text/html 1146497518 200 20200401041840 https://www.seek.com.au/job/40953788?type=standout au,com,seek)/job/40953788?type=standout
24 UTF-8 JGI3B5JH6FTUAP34DNOCVV3FPVWJPAZK crawl-data/CC-MAIN-2020-16/segments/1585370515113.54/warc/CC-MAIN-20200403154746-20200403184746-00145.warc.gz eng 28241 text/html text/html 1115810277 200 20200403174236 https://www.seek.com.au/job/40954073 au,com,seek)/job/40954073
25 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371660550.75/warc/CC-MAIN-20200406200320-20200406230820-00003.warc.gz NaN 1706 text/html text/html 1131971218 200 20200406214733 https://www.seek.com.au/job/40956282/apply/linkout au,com,seek)/job/40956282/apply/linkout
26 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370515113.54/warc/CC-MAIN-20200403154746-20200403184746-00237.warc.gz NaN 1713 text/html text/html 1137446887 200 20200403171321 https://www.seek.com.au/job/40956358/apply/linkout au,com,seek)/job/40956358/apply/linkout
27 UTF-8 5A6GAGBZKB6QEBXXPAPUQUH3R2FEAIZI crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00552.warc.gz eng 25662 text/html text/html 1120301081 200 20200408074459 https://www.seek.com.au/job/40957644 au,com,seek)/job/40957644
28 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370505366.8/warc/CC-MAIN-20200401034127-20200401064127-00194.warc.gz NaN 1750 text/html text/html 1175885889 200 20200401052903 https://www.seek.com.au/job/40957854/apply/linkout?searchrequesttoken=196fd578-5779-41fa-b014-3d21cb5ca0f6 au,com,seek)/job/40957854/apply/linkout?searchrequesttoken=196fd578-5779-41fa-b014-3d21cb5ca0f6
29 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370507738.45/warc/CC-MAIN-20200402173940-20200402203940-00559.warc.gz NaN 1716 text/html text/html 1125843721 200 20200402185759 https://www.seek.com.au/job/40958604/apply/linkout au,com,seek)/job/40958604/apply/linkout
30 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00094.warc.gz NaN 1707 text/html text/html 1140931785 200 20200406005647 https://www.seek.com.au/job/40958834/apply/linkout au,com,seek)/job/40958834/apply/linkout
31 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370507738.45/warc/CC-MAIN-20200402173940-20200402203940-00178.warc.gz NaN 1714 text/html text/html 1152691856 200 20200402193733 https://www.seek.com.au/job/40961544/apply/linkout au,com,seek)/job/40961544/apply/linkout
32 UTF-8 F2TIBEVBIBVAGTXVMKUAOJYA7MZSXZ3H crawl-data/CC-MAIN-2020-16/segments/1585371612531.68/warc/CC-MAIN-20200406004220-20200406034720-00271.warc.gz eng 25755 text/html text/html 1168960284 200 20200406004849 https://www.seek.com.au/job/40961710 au,com,seek)/job/40961710
33 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00132.warc.gz NaN 1706 text/html text/html 1141736079 200 20200408082132 https://www.seek.com.au/job/40964291/apply/linkout au,com,seek)/job/40964291/apply/linkout
34 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00338.warc.gz NaN 1714 text/html text/html 1157306432 200 20200402160448 https://www.seek.com.au/job/40964800/apply/linkout au,com,seek)/job/40964800/apply/linkout
35 UTF-8 67FOF5FK3OGDZVD25SC5JCDTO6WMBVNK crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00150.warc.gz NaN 1705 text/html text/html 1104968024 200 20200408084458 https://www.seek.com.au/job/40966592/apply/linkout au,com,seek)/job/40966592/apply/linkout
36 UTF-8 6TXVXNZCFZPVBLE3YPROENOHGQYHGBN7 crawl-data/CC-MAIN-2020-16/segments/1585370520039.50/warc/CC-MAIN-20200404042338-20200404072338-00502.warc.gz eng 31254 text/html text/html 1111481280 200 20200404061056 https://www.seek.com.au/job/40987005?type=standout au,com,seek)/job/40987005?type=standout
37 UTF-8 YEH5B7Y53462ESRPORVU7WJQAENEBSSM crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00112.warc.gz eng 26881 text/html text/html 1114285420 200 20200410034736 https://www.seek.com.au/job/40990128?type=standout au,com,seek)/job/40990128?type=standout
38 UTF-8 EMYKQVSS4YL5CDMOGCF3ADB3VEVUBETZ crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00078.warc.gz eng 26852 text/html text/html 1113488574 200 20200410033112 https://www.seek.com.au/job/40990643?type=standout au,com,seek)/job/40990643?type=standout
39 UTF-8 6XTYNWDPTXQUJJ357R2SRKLFRQ2WMN5J crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00342.warc.gz eng 23720 text/html text/html 1136226194 200 20200402121900 https://www.seek.com.au/job/41021776?type=standard au,com,seek)/job/41021776?type=standard
40 UTF-8 A4WSOKJZQEUOTDFYBFCGGQSMEWVMPSXT crawl-data/CC-MAIN-2020-16/segments/1585371861991.79/warc/CC-MAIN-20200409154025-20200409184525-00489.warc.gz eng 29094 text/html text/html 1115383341 200 20200409165536 https://www.seek.com.au/job/41023396?type=standout au,com,seek)/job/41023396?type=standout
41 UTF-8 5CCOPJ2FALWPHJMCYOVGTPLZPWD7A2JW crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00236.warc.gz eng 31694 text/html text/html 1100082493 200 20200410022530 https://www.seek.com.au/job/41026600?type=standout au,com,seek)/job/41026600?type=standout
42 UTF-8 3JRC4NOIBQMHS3KH3BJ2TT3HP6ZBKFVC crawl-data/CC-MAIN-2020-16/segments/1585370519111.47/warc/CC-MAIN-20200404011558-20200404041558-00352.warc.gz eng 23326 text/html text/html 1137698543 200 20200404021509 https://www.seek.com.au/job/41041152?type=standard au,com,seek)/job/41041152?type=standard
43 UTF-8 UNITV4SZELSTZ2SUY5CTNFFNHQXJW5SE crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00258.warc.gz eng 24930 text/html text/html 1110674535 200 20200410034700 https://www.seek.com.au/job/41043865?type=standout au,com,seek)/job/41043865?type=standout
44 UTF-8 NNVFM3GLRVU3WCKOAWLVYH5ER6LY53QE crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00213.warc.gz eng 24224 text/html text/html 1136423327 200 20200402130715 https://www.seek.com.au/job/41046217?type=standard au,com,seek)/job/41046217?type=standard
45 UTF-8 FNB3SNDV4PC4H7N5HYE3A5Y4HLMQOXO5 crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00344.warc.gz eng 25365 text/html text/html 1125398898 200 20200404082309 https://www.seek.com.au/job/41047214?_ga=2.203597463.624167984.1582928577-333199963.1581114360 au,com,seek)/job/41047214?_ga=2.203597463.624167984.1582928577-333199963.1581114360
46 UTF-8 N4XD7VHWB7TCCCP4IUCYQGWLM4ZQUNXB crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00326.warc.gz eng 27174 text/html text/html 1155685826 200 20200401110801 https://www.seek.com.au/job/41051514?type=standout au,com,seek)/job/41051514?type=standout
47 UTF-8 ISMTOOTNSH4XUAEOVCKJARS2RCWDROWS crawl-data/CC-MAIN-2020-16/segments/1585370524604.46/warc/CC-MAIN-20200404165658-20200404195658-00285.warc.gz eng 23851 text/html text/html 1137407096 200 20200404174519 https://www.seek.com.au/job/41082355?_ga=2.144734647.241037096.1583184597-1542963780.1583184597&_gac=1.207905446.1583184597.EAIaIQobChMI-MXYmd785w... au,com,seek)/job/41082355?_ga=2.144734647.241037096.1583184597-1542963780.1583184597&_gac=1.207905446.1583184597.eaiaiqobchmi-mxymd785wivrkwwch3zi...
48 UTF-8 C23EWFC5G4SZO5HTN324MB57CQZJJHFK crawl-data/CC-MAIN-2020-16/segments/1585370506673.7/warc/CC-MAIN-20200402045741-20200402075741-00413.warc.gz eng 27069 text/html text/html 1133160885 200 20200402071245 https://www.seek.com.au/job/41090313?type=standard au,com,seek)/job/41090313?type=standard
49 UTF-8 6VVJ4EBK4MHG5ORVCVMB5AWGN2J55IIS crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00214.warc.gz eng 27803 text/html text/html 1142507080 200 20200402120415 https://www.seek.com.au/job/41096558?_ga=2.123495755.1320293108.1583192246-1372768251.1563851836 au,com,seek)/job/41096558?_ga=2.123495755.1320293108.1583192246-1372768251.1563851836

Full add (unless has /apply…)

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

Looking at the source it looks like all the relevant data is in a javascript object on a single line

data_re = re.compile('_REDUX_DATA = ([^\n]+);')
def sk_extract(text):
    return demjson.decode(data_re.search(text).group(1))
obj = sk_extract(objs[0].content.decode('utf-8'))
obj.keys()
dict_keys(['dashboard', 'experiments', 'featureFlags', 'jobdetails', 'joblistitem', 'lastSearch', 'lmis', 'location', 'nudges', 'results', 'savedJobs', 'saveSearch', 'search', 'seo', 'user', 'fitme', '@@redux-hotjar-state'])

All the data is in here

obj['jobdetails']['result']
{'id': 40480218,
 'listingDate': '2020-03-17T23:41:15.000Z',
 'expiryDate': '2020-04-25T13:00:00.000Z',
 'title': 'Transmission Coordinator - Broadcast TV Playout - Sydney',
 'teaser': "Immediate, full-time TV media operations job in Sydney's North. Coordinate live multi-channel TV content to air in a digital TV playout facility.",
 'advertiser': {'id': 30979201,
  'description': 'Lang Deacon',
  'searchParams': {'keywords': 'Lang Deacon'}},
 'locationHierarchy': {'nation': 'Australia',
  'state': 'New South Wales',
  'city': 'Sydney',
  'area': 'North Shore & Northern Beaches',
  'suburb': 'northsydney'},
 'locationId': 1000,
 'stateId': 3101,
 'workType': 'Full Time',
 'classification': {'id': 6304, 'description': 'Advertising, Arts & Media'},
 'subClassification': {'id': 6314, 'description': 'Programming & Production'},
 'salary': None,
 'salaryType': 'AnnualPackage',
 'automaticInclusion': False,
 'isLinkOut': False,
 'isScreenAssigned': False,
 'isSelectionCriteriaEnabled': False,
 'status': 'Active',
 'isRightToWorkRequired': False,
 'hasRoleRequirements': True,
 'roleRequirements': ['Which of the following statements best describes your right to work in Australia?',
  "What's your expected annual base salary?",
  'How much notice are you required to give your current employer?'],
 'mobileAdTemplate': '<ul> <li><strong>Live media TV operations; </strong></li> <li><strong>Based Sydney North; </strong></li> <li><strong>Career development &amp; team support. </strong></li></ul> <p>\xa0</p>  <p>Immediate opportunity based in Sydney’s North to join one of Australia’s leading multi-channel broadcast playout organisations and develop your media operations career to the next level.</p>  <p>\xa0</p>  <p><strong>The Job:</strong></p>  <p>Coordinate multiple live and scheduled channels to air in a state-of-the-art digital TV playout centre.\xa0 The role of Transmission Coordinator, sometimes referred to as Presentation Coordinator or Presentation Director, challenges your ability to coordinate multiple tasks, your attention to detail, and your calm nature under pressure.</p>  <p>\xa0</p>  <p>Working with a supportive team, the Transmission Coordinator will liaise with broadcast clients to ensure schedules and playlists are accurate, content is appropriate and transmission of multi-channel content to air runs smoothly.</p>  <p>\xa0</p>  <p><strong>What we need:</strong></p>  <p>We’re looking for an understanding of automated playout workflows in a contemporary TV or video environment combined with a genuine passion for working in the media industry.\xa0</p>  <p>Broadcasting is a 24 hour business and as such the Transmission Coordinator must be comfortable working varied hours across a 24/7 shift roster.</p>  <p><br /><strong>We need:</strong></p> <ul> <li>Career experience in media TV operations;</li> <li>Exposure to digital TV environment / workflows;</li> <li>Attention to detail, methodical and responsive;</li> <li>Experience managing multiple tasks and priorities;</li> <li>Ability to work to a 24/7 roster.</li> <li>Previous TX Coord experience highly regarded.</li></ul> <p>\xa0</p>  <p>In return the Transmission Coordinator will enjoy a genuinely supportive and enjoyable team culture, where work-life balance and simple team work is valued.\xa0 The Transmission Coordinator will earn a competitive salary and benefit from defined career development opportunities.</p>  <p>\xa0</p>  <p>At this stage only applicants with the right to live and work in Australia can be considered for this position.</p>  <p>\xa0</p>  <p>If you fulfil the above criteria and would be interested in a new full-time challenge, then apply online including a Word version of your CV immediately.</p>',
 'companyReview': None,
 'contactMatches': [],
 'hasCustomTemplate': False,
 'roleTitles': 'coordinator',
 'isPrivateAdvertiser': False}

How many objects?

objs = list(cdx.iter('seek.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200', '!~url:.*/apply/']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 QZF3KBX2P77DGDACHOO6VQO6TEMTWXVZ crawl-data/CC-MAIN-2020-16/segments/1585370506673.7/warc/CC-MAIN-20200402045741-20200402075741-00395.warc.gz eng 24473 text/html text/html 1137303128 200 20200402065023 https://www.seek.com.au/job/40480218?type=standard au,com,seek)/job/40480218?type=standard
1 UTF-8 GFQWJL3GA4HB4GZBANA5EJU7MODNVVKX crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00366.warc.gz eng 24220 text/html text/html 1137099417 200 20200402134520 https://www.seek.com.au/job/40673486?type=standard au,com,seek)/job/40673486?type=standard
2 UTF-8 3NM664ONG6P5DJXZSZQMTGKQPC642RQF crawl-data/CC-MAIN-2020-16/segments/1585371880945.85/warc/CC-MAIN-20200409220932-20200410011432-00128.warc.gz eng 29828 text/html text/html 1127547178 200 20200410002047 https://www.seek.com.au/job/40778851?_ga=2.217432918.1128169088.1579487610-1596147771.1579487610 au,com,seek)/job/40778851?_ga=2.217432918.1128169088.1579487610-1596147771.1579487610
3 UTF-8 IX5KPE6S4O4HIONEMZOGEBBDH7HJDG2J crawl-data/CC-MAIN-2020-16/segments/1585371883359.91/warc/CC-MAIN-20200410012405-20200410042905-00470.warc.gz eng 24749 text/html text/html 1112222656 200 20200410032900 https://www.seek.com.au/job/40832664?type=standout au,com,seek)/job/40832664?type=standout
4 UTF-8 SU5PQRKEA2AVPRYDNFR4FXAB2PIBG3UC crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00410.warc.gz eng 23324 text/html text/html 1143762094 200 20200401103811 https://www.seek.com.au/job/40878691?type=promoted au,com,seek)/job/40878691?type=promoted
... ... ... ... ... ... ... ... ... ... ... ... ...
713 UTF-8 X565OQM2DRRQZ6FQC7S2NAMRXCAWI2LT crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00503.warc.gz eng 27607 text/html text/html 1151045210 200 20200401121846 https://www.seek.com.au/job/41214294?type=standard au,com,seek)/job/41214294?type=standard
714 UTF-8 ZRO6RIHF3VULV5JBPUOGJLSUTOI7UNX6 crawl-data/CC-MAIN-2020-16/segments/1585370519111.47/warc/CC-MAIN-20200404011558-20200404041558-00100.warc.gz eng 28557 text/html text/html 1140480506 200 20200404033221 https://www.seek.com.au/job/41214307?type=standard au,com,seek)/job/41214307?type=standard
715 UTF-8 4H72NRAA3UFVRCGPSKNJHVKAX2553JKI crawl-data/CC-MAIN-2020-16/segments/1585370505730.14/warc/CC-MAIN-20200401100029-20200401130029-00421.warc.gz eng 28671 text/html text/html 1174895213 200 20200401113202 https://www.seek.com.au/job/41214308?type=standard au,com,seek)/job/41214308?type=standard
716 UTF-8 BBQKJO27RIE6J26SUXV4ZC3X3AXMQBEF crawl-data/CC-MAIN-2020-16/segments/1585370519111.47/warc/CC-MAIN-20200404011558-20200404041558-00292.warc.gz eng 27902 text/html text/html 1137053959 200 20200404023324 https://www.seek.com.au/job/41214450?type=standout au,com,seek)/job/41214450?type=standout
717 UTF-8 6XEMII3GSEUSYBIREZT7L5LZLWOAYXBZ crawl-data/CC-MAIN-2020-16/segments/1585370506121.24/warc/CC-MAIN-20200401192839-20200401222839-00381.warc.gz eng 26222 text/html text/html 1161048394 200 20200401212405 https://www.seek.com.au/job/41214657?type=standout au,com,seek)/job/41214657?type=standout

718 rows × 12 columns

data = [sk_extract(obj.content.decode('utf-8'))['jobdetails']['result'] for obj in objs[:5]]
None
None
None
None
None
pd.DataFrame(data)
id listingDate expiryDate title teaser advertiser locationHierarchy locationId stateId workType ... roleRequirements mobileAdTemplate companyReview contactMatches hasCustomTemplate roleTitles isPrivateAdvertiser desktopAdTemplate video branding
0 40480218 2020-03-17T23:41:15.000Z 2020-04-25T13:00:00.000Z Transmission Coordinator - Broadcast TV Playout - Sydney Immediate, full-time TV media operations job in Sydney's North. Coordinate live multi-channel TV content to air in a digital TV playout facility. {'id': 30979201, 'description': 'Lang Deacon', 'searchParams': {'keywords': 'Lang Deacon'}} {'nation': 'Australia', 'state': 'New South Wales', 'city': 'Sydney', 'area': 'North Shore & Northern Beaches', 'suburb': 'northsydney'} 1000 3101 Full Time ... [Which of the following statements best describes your right to work in Australia?, What's your expected annual base salary?, How much notice are ... <ul> <li><strong>Live media TV operations; </strong></li> <li><strong>Based Sydney North; </strong></li> <li><strong>Career development &amp; team... None [] False coordinator False NaN NaN NaN
1 40673486 2020-03-04T21:38:36.000Z 2020-04-06T02:37:47.000Z Caseworker - Aboriginal Identified Do you want a job that will give you the opportunity to make a real difference in the lives of Aboriginal children who are in need of support? {'id': 24524763, 'description': 'KARI', 'searchParams': {'keywords': 'KARI'}} {'nation': 'Australia', 'state': 'New South Wales', 'city': 'Sydney', 'area': 'South West & M5 Corridor', 'suburb': 'liverpool'} 1000 3101 Full Time ... [Which of the following statements best describes your right to work in Australia?, Do you have a current Australian driver's licence?] <p><strong>Caseworker - Aboriginal identified role</strong></p> <p>Salary Package up to $82,162</p> <p><em>(Inclusive of $70,000 base salary, leav... None [{'type': 'Email', 'value': 'ashley.crooks@kari.org.au'}, {'type': 'Phone', 'value': '(02) 8782 0300'}] False caseworker False NaN NaN NaN
2 40778851 2020-03-29T22:35:20.000Z 2020-05-18T13:59:59.000Z Financial Counsellor Outstanding opportunity for an experienced Financial Counsellor to join our multidisciplinary team. {'id': 24132249, 'description': 'Better Place Australia', 'searchParams': {'keywords': 'Better Place Australia'}} {'nation': 'Australia', 'state': 'Victoria', 'city': 'Melbourne', 'area': 'CBD & Inner Suburbs', 'suburb': 'melbourne'} 1002 3106 Full Time ... [] <p>Better Place Australia has a vision of <em>“An Australia where all people experience positive relationships, truly value each and live safer, m... None [{'type': 'Phone', 'value': '9556 5333'}] True financial-counsellor,counsellor False <meta charset="utf-8" />\n<style type="text/css"><!--#VideoJobAd,.videoembed{display:block;height:310px;padding:5px 0;text-align:center;width:100%... NaN NaN
3 40832664 2020-03-24T02:12:22.000Z 2020-04-26T13:00:00.000Z Apprentice or Trainee Hairdresser *Maurice Meade are currently looking for talented apprentice or trainee hairdressers to be placed within our salons* {'id': 25844154, 'description': 'Maurice Meade', 'searchParams': {'keywords': 'Maurice Meade'}} {'nation': 'Australia', 'state': 'Western Australia', 'city': 'Perth', 'area': '', 'suburb': ''} 1009 3107 Full Time ... [Which of the following statements best describes your right to work in Australia?, How many years' experience do you have as a hairdresser?, How ... <p>As Perth's leading hair salon and one of Australia's most recognised names in the industry, an <strong>apprenticeship/traineeship</strong> at <... {'companyOverallRating': 3.2, 'companyTotalReviews': 10, 'companyProfileUrl': '/companies/maurice-meade-935469/reviews?jobId=40832664', 'companyNa... [] False hairdresser False NaN {'link': 'https://www.youtube.com/embed/TZ2kBC90E1k?rel=0', 'position': 'Below'} {'id': '0f3b1437-a64e-8659-fe7a-d7eef2e23552', 'isDefault': False, 'logo': {'id': '087aa0e914db2d98d3ccaaed48971da2341e24ac', 'url': 'https://imag...
4 40878691 2020-03-04T21:56:03.000Z 2020-04-04T13:00:00.000Z Practice Nurse (RN) 2 X part-time practice nurses (RN) in Hobart CBD {'id': 44061230, 'description': 'HEALTHPLUS MEDICAL CENTRE', 'searchParams': {'advertiserid': 44061230}} {'nation': 'Australia', 'state': 'Tasmania', 'city': 'Hobart', 'area': '', 'suburb': 'hobart'} 1011 3105 Part Time ... [Which of the following statements best describes your right to work in Australia?, How many years' experience do you have as a registered nurse?] <p>A well established general practice located in Hobart CBD, is seeking to employ two experienced general practice nurses (RN) to join their frie... None [] False practice-registered-nurse,practice-nurse,registered-nurse,nurse False NaN NaN NaN

5 rows × 31 columns

714 ads in a month; not bad

pd.DataFrame(objs).url.str.replace(r'\?.*', '').nunique()
714

Jora

Not captured, just search results.

Search results could be used to track job volume over time.

objs = list(cdx.iter('au.jora.com/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)

Randstad

objs = list(cdx.iter('www.randstad.com.au/jobs/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 IMBUYFDLEKDVR5AF6RBVHICHYPFKGHVE crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00187.warc.gz eng 18359 text/html application/xhtml+xml 1109014883 200 20200405220324 https://www.randstad.com.au/jobs/administration-assistant_perth_18262203/?portalid=80 au,com,randstad)/jobs/administration-assistant_perth_18262203?portalid=80
1 UTF-8 DAPTUFGEBUM4HUBLQ7QPIOLCZZATHVOA crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00515.warc.gz eng 18673 text/html application/xhtml+xml 1114558885 200 20200405215527 https://www.randstad.com.au/jobs/administration-officers_brisbane_18199893/?portalid=80 au,com,randstad)/jobs/administration-officers_brisbane_18199893?portalid=80
2 UTF-8 GUJIWX6RQOD5LLQRSKY2XEXNFQ4OAQCA crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00120.warc.gz eng 18790 text/html application/xhtml+xml 1129945863 200 20200405232228 https://www.randstad.com.au/jobs/administration-processing-officer_melbourne_18214052/?portalid=80 au,com,randstad)/jobs/administration-processing-officer_melbourne_18214052?portalid=80
3 UTF-8 MCI5NYISQ6EOBECYEUWGQCCUTWPEYPMZ crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00276.warc.gz eng 18587 text/html application/xhtml+xml 1120915792 200 20200405221433 https://www.randstad.com.au/jobs/administration_sydney_18194848/?portalid=80 au,com,randstad)/jobs/administration_sydney_18194848?portalid=80
4 UTF-8 UPI4OZ5O45JPOXF3D2W2QTQ72VUIN5IT crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00026.warc.gz eng 18330 text/html application/xhtml+xml 1115337300 200 20200405231546 https://www.randstad.com.au/jobs/aps4-human-resources-assistant_canberra_18229178/?portalid=80 au,com,randstad)/jobs/aps4-human-resources-assistant_canberra_18229178?portalid=80
... ... ... ... ... ... ... ... ... ... ... ... ...
1663 UTF-8 ARJDDCA5NXMUZU2KBDD4V3PPOLMT5465 crawl-data/CC-MAIN-2020-16/segments/1585371830894.88/warc/CC-MAIN-20200409055849-20200409090349-00057.warc.gz eng 18813 text/html application/xhtml+xml 1077514527 200 20200409073229 https://www.randstad.com.au/jobs/western-australia/perth/permanent/page-2/ au,com,randstad)/jobs/western-australia/perth/permanent/page-2
1664 UTF-8 BMW3XINOAVYVRYIDIVH6DR24BP7WPFC4 crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00088.warc.gz eng 18653 text/html application/xhtml+xml 1119649212 200 20200404081716 https://www.randstad.com.au/jobs/western-australia/perth/permanent/page-3/ au,com,randstad)/jobs/western-australia/perth/permanent/page-3
1665 UTF-8 VEN7OU6PPI3RQ7R2AGPYITA2LW5N6SPP crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00119.warc.gz eng 16016 text/html application/xhtml+xml 1101346390 200 20200404074840 https://www.randstad.com.au/jobs/western-australia/perth/permanent/page-4/ au,com,randstad)/jobs/western-australia/perth/permanent/page-4
1666 UTF-8 SZLZZJSNNGBQBCYPMF76UOT4UR2YN2WC crawl-data/CC-MAIN-2020-16/segments/1585370508367.57/warc/CC-MAIN-20200402204908-20200402234908-00529.warc.gz eng 18507 text/html application/xhtml+xml 1111392226 200 20200402215745 https://www.randstad.com.au/jobs/western-australia/pilbara/ au,com,randstad)/jobs/western-australia/pilbara
1667 UTF-8 NWGF453M22MEWLOIRLX4Q77EVAQHWDE3 crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00002.warc.gz eng 18596 text/html application/xhtml+xml 1144967088 200 20200402151718 https://www.randstad.com.au/jobs/western-australia/pilbara/permanent/ au,com,randstad)/jobs/western-australia/pilbara/permanent

1668 rows × 12 columns

41 jobs in a month

df = pd.DataFrame(objs)
df[df.url.str.match(r'.*\d{7}\/.*')].url.str.replace(r'\?.*', '').nunique()
41

Contains:

  • post date
  • location
  • job type/working hours
  • salary
  • description
  • simple title
  • skills/qualification/education
with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None
import extruct

This looks like it acurately contains all the details

[data for data in extruct.extract(objs[0].content)['json-ld'] if data['@type'] == 'JobPosting']
[{'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'maxValue': 34.0,
    'minValue': 30.0,
    'unitText': 'HOUR'}},
  'datePosted': '2020-03-27T06:48:02Z',
  'description': '<p><strong>Key Responsibilities:</strong></p><ul><li>Provision of timely and effective administrative services</li><li>Prepare and generate audits/reports as required</li><li>Managing consumables stock levels</li><li>Assist with building/ground inspection and action issues</li><li>Provide systems and administration support to other departments when required</li><li>Assist in other administrative areas as required</li><li>Preparing documents for meetings and business trip</li><li>Processing and directing mail and incoming packages or deliverie</li><li>Greeting and directing visitors and new staff to the organisatio</li><li>Writing and issuing emails to teams and departments on behalf of teams or senior staff</li></ul><p><br><strong>Skill, knowledge and experience in:</strong></p><ul><li>Previous experience working within the Public Health industry </li><li>Business, administration and clerical support including minute taking</li><li>Provision of customer focused service</li><li>Interpersonal communication and teamwork</li><li>Work organisation with the ability to meet work schedules and deadlines</li><li>Use of PCs, including Microsoft Office applications such as: Word, Excel, Outlook, etc</li></ul><p> </p><p>If you believe you are suitable for this role please <strong>APPLY ONLINE</strong></p><p> </p><p>At Randstad, we are passionate about providing equal employment opportunities and embracing diversity to the benefit of all. We actively encourage applications from any background.</p><br /><br /><strong>skills</strong><br />admin, administration, admin assistant, local government, local council, state government, health<br /><br /><strong>qualification</strong><br />Previous experience in a similar role<br /><br /><strong>working hours</strong><br />Full-Time<br /><br /><strong>educational requirements</strong><br />Secondary School/High School',
  'educationRequirements': 'Secondary School/High School',
  'employmentType': 'TEMPORARY',
  'hiringOrganization': {'@context': 'http://schema.org',
   '@type': 'Organization',
   'logo': 'https://www.randstad.com.au/images/system/base/logo-randstad-sd.png',
   'name': 'Randstad Australia',
   'url': 'https://www.randstad.com.au/'},
  'identifier': {'@type': 'PropertyValue',
   'name': 'Randstad Australia',
   'value': '90M0415121_1585291654'},
  'industry': 'administration & office support',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'addressCountry': 'AU',
    'addressLocality': 'Perth',
    'addressRegion': 'Western Australia',
    'postalCode': '6003',
    'streetAddress': "St George's Terrace"},
   'geo': {'@type': 'GeoCoordinates',
    'latitude': -31.9334,
    'longitude': 115.8334}},
  'qualifications': 'Previous experience in a similar role',
  'skills': 'admin, administration, admin assistant, local government, local council, state government, health',
  'title': 'Administration Assistant',
  'validThrough': '2020-04-26T22:00:00Z',
  'workHours': 'Full-Time'}]

We can see non-ads don’t have the JobPosting

md = extruct.extract(objs[-1].content)
md['json-ld']
None
[{'@context': 'http://schema.org',
  '@type': 'BreadcrumbList',
  'itemListElement': [{'@type': 'ListItem',
    'item': {'@id': '/', 'name': 'home'},
    'position': 1},
   {'@type': 'ListItem',
    'item': {'@id': '/jobs/', 'name': 'jobs'},
    'position': 2},
   {'@type': 'ListItem',
    'item': {'@id': '/jobs/western-australia/', 'name': 'Western Australia'},
    'position': 3},
   {'@type': 'ListItem',
    'item': {'@id': '/jobs/western-australia/pilbara/', 'name': 'Pilbara'},
    'position': 4},
   {'@type': 'ListItem',
    'item': {'@id': '/jobs/western-australia/pilbara/permanent/',
     'name': 'Permanent'},
    'position': 5}]}]

This will get just the job ads

objs = list(cdx.iter('www.randstad.com.au/jobs/*',
                     from_ts='202004', to='202005',
                     filter=['status:200', r'~url:.*\d{6,}']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 IMBUYFDLEKDVR5AF6RBVHICHYPFKGHVE crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00187.warc.gz eng 18359 text/html application/xhtml+xml 1109014883 200 20200405220324 https://www.randstad.com.au/jobs/administration-assistant_perth_18262203/?portalid=80 au,com,randstad)/jobs/administration-assistant_perth_18262203?portalid=80
1 UTF-8 DAPTUFGEBUM4HUBLQ7QPIOLCZZATHVOA crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00515.warc.gz eng 18673 text/html application/xhtml+xml 1114558885 200 20200405215527 https://www.randstad.com.au/jobs/administration-officers_brisbane_18199893/?portalid=80 au,com,randstad)/jobs/administration-officers_brisbane_18199893?portalid=80
2 UTF-8 GUJIWX6RQOD5LLQRSKY2XEXNFQ4OAQCA crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00120.warc.gz eng 18790 text/html application/xhtml+xml 1129945863 200 20200405232228 https://www.randstad.com.au/jobs/administration-processing-officer_melbourne_18214052/?portalid=80 au,com,randstad)/jobs/administration-processing-officer_melbourne_18214052?portalid=80
3 UTF-8 MCI5NYISQ6EOBECYEUWGQCCUTWPEYPMZ crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00276.warc.gz eng 18587 text/html application/xhtml+xml 1120915792 200 20200405221433 https://www.randstad.com.au/jobs/administration_sydney_18194848/?portalid=80 au,com,randstad)/jobs/administration_sydney_18194848?portalid=80
4 UTF-8 UPI4OZ5O45JPOXF3D2W2QTQ72VUIN5IT crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00026.warc.gz eng 18330 text/html application/xhtml+xml 1115337300 200 20200405231546 https://www.randstad.com.au/jobs/aps4-human-resources-assistant_canberra_18229178/?portalid=80 au,com,randstad)/jobs/aps4-human-resources-assistant_canberra_18229178?portalid=80
5 UTF-8 SL5YRVGRWU6MGC2HFCS3DEZ2QGVBTD5P crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00356.warc.gz eng 18883 text/html application/xhtml+xml 1076007920 200 20200405232422 https://www.randstad.com.au/jobs/aps4-program-support-officer_canberra_18188942/?portalid=80 au,com,randstad)/jobs/aps4-program-support-officer_canberra_18188942?portalid=80
6 UTF-8 Z3TLHGFER7BIUGUCSCQM2SKQCDSPOSOS crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00489.warc.gz eng 18315 text/html application/xhtml+xml 1104122430 200 20200405233054 https://www.randstad.com.au/jobs/aps6-finance-and-budgets-officer_canberra_18203687/?portalid=80 au,com,randstad)/jobs/aps6-finance-and-budgets-officer_canberra_18203687?portalid=80
7 UTF-8 ZFRZ4B6H2THO7CUP4XKA3WMSARDOPAZD crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00292.warc.gz eng 18892 text/html application/xhtml+xml 1089703153 200 20200405223808 https://www.randstad.com.au/jobs/bi-analyst_brisbane_18218777/?portalid=80 au,com,randstad)/jobs/bi-analyst_brisbane_18218777?portalid=80
8 UTF-8 YTWKQQOFKYOVCUM6RYX2HMWVM6SOVXZO crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00055.warc.gz eng 18480 text/html application/xhtml+xml 1100652031 200 20200405214905 https://www.randstad.com.au/jobs/business-analyst-aps6-or-el1_canberra_18221476/?portalid=80 au,com,randstad)/jobs/business-analyst-aps6-or-el1_canberra_18221476?portalid=80
9 UTF-8 MK4DNVGGGRF2TUONJSEINSJWEP3J4SCN crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00363.warc.gz eng 18584 text/html application/xhtml+xml 1109350218 200 20200405064639 https://www.randstad.com.au/jobs/business-analyst-risk_sydney_18257810/ au,com,randstad)/jobs/business-analyst-risk_sydney_18257810
10 UTF-8 RQPYSCEJ7ISYGI2DXW53ZNH5V6IH2OCU crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00348.warc.gz eng 18611 text/html application/xhtml+xml 1115298161 200 20200405222404 https://www.randstad.com.au/jobs/business-support-officer_adelaide_18209500/?portalid=80 au,com,randstad)/jobs/business-support-officer_adelaide_18209500?portalid=80
11 UTF-8 2BTX5EXM7E3Q2YYXBM5YJ2MSXNUFJBSH crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00302.warc.gz eng 19037 text/html application/xhtml+xml 1102160195 200 20200405222538 https://www.randstad.com.au/jobs/clinical-case-manager_sydney_18194305/?portalid=80 au,com,randstad)/jobs/clinical-case-manager_sydney_18194305?portalid=80
12 UTF-8 JHPFLMLYHYTH2AY4LVERUBXMYOP7LZRA crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00339.warc.gz eng 19269 text/html application/xhtml+xml 1109889212 200 20200405213428 https://www.randstad.com.au/jobs/data-entry-operator_adelaide_18200495/?portalid=80 au,com,randstad)/jobs/data-entry-operator_adelaide_18200495?portalid=80
13 UTF-8 NADYTXPJOZUU7MSSKK5TCR6HEYA56URS crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00363.warc.gz eng 18596 text/html application/xhtml+xml 1106057488 200 20200405233203 https://www.randstad.com.au/jobs/document-controller_chatswood_18209567/?portalid=80 au,com,randstad)/jobs/document-controller_chatswood_18209567?portalid=80
14 UTF-8 AR7YFKGJWG63VFETNCEKEXTERYSPAAUQ crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00342.warc.gz eng 19058 text/html application/xhtml+xml 1109897506 200 20200405220718 https://www.randstad.com.au/jobs/el1-marketing-manager_canberra_18232611/?portalid=80 au,com,randstad)/jobs/el1-marketing-manager_canberra_18232611?portalid=80
15 UTF-8 322LNM7L4XE5KFWUAK72FX64DZ3DXUCG crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00342.warc.gz eng 18567 text/html application/xhtml+xml 1109917494 200 20200405230915 https://www.randstad.com.au/jobs/electrical-field-service-technician_parramatta_18257899/ au,com,randstad)/jobs/electrical-field-service-technician_parramatta_18257899
16 UTF-8 NYWXRFLQ6SQOA6KGJWLQFGEAVQ7VDYAM crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00237.warc.gz eng 18506 text/html application/xhtml+xml 1107716875 200 20200405222931 https://www.randstad.com.au/jobs/executive-assistant_parramatta_18204838/?portalid=80 au,com,randstad)/jobs/executive-assistant_parramatta_18204838?portalid=80
17 UTF-8 7NOSPGVFYJ5KKUJI7J5HUMRHHY5YSYZO crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00019.warc.gz eng 19133 text/html application/xhtml+xml 1092597625 200 20200405055840 https://www.randstad.com.au/jobs/graduate-entry-level-banking_sydney_18260628/ au,com,randstad)/jobs/graduate-entry-level-banking_sydney_18260628
18 UTF-8 ED63Z3N4JAZ5SD36DF5EVJGALFYOT4CU crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00091.warc.gz eng 18371 text/html application/xhtml+xml 1064975063 200 20200405221056 https://www.randstad.com.au/jobs/graduate-wealth-management-big-4-bank_sydney_18258111/ au,com,randstad)/jobs/graduate-wealth-management-big-4-bank_sydney_18258111
19 UTF-8 GE3D4HGSNOSPH2NMK7QVRJE5HTRE3FWQ crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00084.warc.gz eng 18972 text/html application/xhtml+xml 1086743784 200 20200405054438 https://www.randstad.com.au/jobs/graduate-wealth-management-entry-level-banking_sydney_18257816/ au,com,randstad)/jobs/graduate-wealth-management-entry-level-banking_sydney_18257816
20 UTF-8 K6CTMZCOBM573AG75J77YGLTTIJBQ5BU crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00490.warc.gz eng 17350 text/html application/xhtml+xml 1101420606 200 20200405222651 https://www.randstad.com.au/jobs/hr-advisor_australia_18257901/ au,com,randstad)/jobs/hr-advisor_australia_18257901
21 UTF-8 VPSFW4E5TMKBOOH4LBVN76LKC65B563R crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00422.warc.gz eng 18429 text/html application/xhtml+xml 1093621129 200 20200405231102 https://www.randstad.com.au/jobs/hseq-advisor_sydney_18257975/ au,com,randstad)/jobs/hseq-advisor_sydney_18257975
22 UTF-8 433EIE5PRE72M5CGM7CDNI3WEBXWRSNG crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00383.warc.gz eng 18371 text/html application/xhtml+xml 1064091228 200 20200405223556 https://www.randstad.com.au/jobs/human-resources-administration_sydney_18209789/?portalid=80 au,com,randstad)/jobs/human-resources-administration_sydney_18209789?portalid=80
23 UTF-8 YXBDTR7R4NNUHGSSPPLPAWC5KQ2PHQ37 crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00157.warc.gz eng 19031 text/html application/xhtml+xml 1085455854 200 20200405233329 https://www.randstad.com.au/jobs/interface-manager_melbourne_18257974/ au,com,randstad)/jobs/interface-manager_melbourne_18257974
24 UTF-8 WKBOENEQYWWXPSKS45NGR6KZ6ZIRA6RR crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00247.warc.gz eng 18649 text/html application/xhtml+xml 1080502196 200 20200405060259 https://www.randstad.com.au/jobs/investment-data-analyst_sydney_18260908/ au,com,randstad)/jobs/investment-data-analyst_sydney_18260908
25 UTF-8 EUXDQQBJTUQQJI5256PQNBIR6Q7MDHPY crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00187.warc.gz eng 18849 text/html application/xhtml+xml 1109034172 200 20200405223212 https://www.randstad.com.au/jobs/loan-administration-big-4-bank_kogarah_18232645/?portalid=80 au,com,randstad)/jobs/loan-administration-big-4-bank_kogarah_18232645?portalid=80
26 UTF-8 BM25VL5JUGXESCSRY5XDU6M45K4DMBJX crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00299.warc.gz eng 18647 text/html application/xhtml+xml 1097336901 200 20200405213724 https://www.randstad.com.au/jobs/mrhr-driver_darra_18257553/?portalid=80 au,com,randstad)/jobs/mrhr-driver_darra_18257553?portalid=80
27 UTF-8 SKWEGD2RZCP5RIYCBMIURWEQZIBJBPJ6 crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00504.warc.gz eng 18365 text/html application/xhtml+xml 1072146471 200 20200405214655 https://www.randstad.com.au/jobs/project-engineer_sydney_18228660/ au,com,randstad)/jobs/project-engineer_sydney_18228660
28 UTF-8 OO3LKHBYGYUB5CMTGE56NHVRR4Q7ROH4 crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00466.warc.gz eng 18500 text/html application/xhtml+xml 1128190420 200 20200405215327 https://www.randstad.com.au/jobs/project-support-officer_perth_18189434/?portalid=80 au,com,randstad)/jobs/project-support-officer_perth_18189434?portalid=80
29 UTF-8 AZHLUAY4VWDL6GVNDOSSC2JEORKTEK55 crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00175.warc.gz eng 18765 text/html application/xhtml+xml 1062729771 200 20200405214438 https://www.randstad.com.au/jobs/project-support-officer_sydney_18228647/?portalid=80 au,com,randstad)/jobs/project-support-officer_sydney_18228647?portalid=80
30 UTF-8 OBQRJT2IA5T5EFVFLW2FQKWHZEQULU7X crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00345.warc.gz eng 18694 text/html application/xhtml+xml 1089156022 200 20200405230136 https://www.randstad.com.au/jobs/scheduler_granville_18209788/?portalid=80 au,com,randstad)/jobs/scheduler_granville_18209788?portalid=80
31 UTF-8 KAJUOOFU65IX46NBOWMGX5RZMKU47ORR crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00497.warc.gz eng 18334 text/html application/xhtml+xml 1081494902 200 20200405064317 https://www.randstad.com.au/jobs/senior-backend-developer_sydney_18257365/ au,com,randstad)/jobs/senior-backend-developer_sydney_18257365
32 UTF-8 4EVKAIYON3DSUD6QAGAHG3INQXY55GMC crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00353.warc.gz eng 18507 text/html application/xhtml+xml 1105264989 200 20200405224339 https://www.randstad.com.au/jobs/senior-coordinator_sydney_18213882/?portalid=80 au,com,randstad)/jobs/senior-coordinator_sydney_18213882?portalid=80
33 UTF-8 6F3K7NLSRLVJOBHCSMNYEYC5PKIYDEZX crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00261.warc.gz eng 18243 text/html application/xhtml+xml 1077707980 200 20200405222248 https://www.randstad.com.au/jobs/senior-policy-project-officer_parramatta_18212689/?portalid=80 au,com,randstad)/jobs/senior-policy-project-officer_parramatta_18212689?portalid=80
34 UTF-8 OS4CVVNVO3OOVMKUFHC2QIUUUAYFDY72 crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00265.warc.gz eng 18668 text/html application/xhtml+xml 1088874106 200 20200405232649 https://www.randstad.com.au/jobs/senior-product-engineer_sydney_18226007/?portalid=80 au,com,randstad)/jobs/senior-product-engineer_sydney_18226007?portalid=80
35 UTF-8 CDXFJN7XSE5FMIYG3FNQT4NODIENNCJJ crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00025.warc.gz eng 18212 text/html application/xhtml+xml 1093445712 200 20200405053918 https://www.randstad.com.au/jobs/senior-project-engineer_melbourne_18243581/ au,com,randstad)/jobs/senior-project-engineer_melbourne_18243581
36 UTF-8 6HLXH64LCGE5GA5PFYUMFRQMBXTZVDMN crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00078.warc.gz eng 18404 text/html application/xhtml+xml 1086439157 200 20200405222114 https://www.randstad.com.au/jobs/senior-risk-advisor-editor-leading-brand_sydney_18257902/ au,com,randstad)/jobs/senior-risk-advisor-editor-leading-brand_sydney_18257902
37 UTF-8 U64XKOK5WGX4C4BO3KEPXXQ3DPRCC76Z crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00049.warc.gz eng 18150 text/html application/xhtml+xml 1119274208 200 20200405074318 https://www.randstad.com.au/jobs/senior-software-engineer_sydney_18257361/ au,com,randstad)/jobs/senior-software-engineer_sydney_18257361
38 UTF-8 SYYXPJAMWRH3KC7JFWGECJCKA7A4IMU4 crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00108.warc.gz eng 18265 text/html application/xhtml+xml 1102578202 200 20200405055414 https://www.randstad.com.au/jobs/signalling-functional-tester-rail_adelaide_18232752/ au,com,randstad)/jobs/signalling-functional-tester-rail_adelaide_18232752
39 UTF-8 EF5VQMZ7HMH4PLTSR5R27KAP6A2HOCWW crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00269.warc.gz eng 18993 text/html application/xhtml+xml 1110778728 200 20200405215727 https://www.randstad.com.au/jobs/sofware-licensing-officer_brisbane_18218715/?portalid=80 au,com,randstad)/jobs/sofware-licensing-officer_brisbane_18218715?portalid=80
40 UTF-8 N45IYZAL742LAQYUK4RFWKNXBHNWV5LB crawl-data/CC-MAIN-2020-16/segments/1585370529375.49/warc/CC-MAIN-20200405053120-20200405083120-00157.warc.gz eng 18173 text/html application/xhtml+xml 1089790189 200 20200405074433 https://www.randstad.com.au/jobs/sql-database-administrator-parramatta-nv1_parramatta_18256657/ au,com,randstad)/jobs/sql-database-administrator-parramatta-nv1_parramatta_18256657

People2people

objs = list(cdx.iter('www.people2people.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 MEBY4ISDBC7GGONQ67G2YIK4BAGVOYCU crawl-data/CC-MAIN-2020-16/segments/1585371675859.64/warc/CC-MAIN-20200407054138-20200407084638-00328.warc.gz eng 96279 text/html text/html 1019938666 200 20200407064816 https://www.people2people.com.au/job/100763333251915/whs-consultant-1/ au,com,people2people)/job/100763333251915/whs-consultant-1
1 UTF-8 THT7ZHPL6LVG7KP7UUODGSKOWYX2GRBT crawl-data/CC-MAIN-2020-16/segments/1585371637684.76/warc/CC-MAIN-20200406133533-20200406164033-00306.warc.gz eng 95314 text/html text/html 1093556364 200 20200406155210 https://www.people2people.com.au/job/100763333257924/accounts-payable-officer-199/ au,com,people2people)/job/100763333257924/accounts-payable-officer-199
2 UTF-8 PJEH4LNMMIUYHE5BM26XOLRZON4SSEOO crawl-data/CC-MAIN-2020-16/segments/1585371807538.83/warc/CC-MAIN-20200408010207-20200408040707-00436.warc.gz eng 95868 text/html text/html 1070712354 200 20200408024753 https://www.people2people.com.au/job/100763333261163/credit-controller-36/ au,com,people2people)/job/100763333261163/credit-controller-36
3 UTF-8 O4NE6PTXIEOQV356M7DMADLXXYJZHN3P crawl-data/CC-MAIN-2020-16/segments/1585371675859.64/warc/CC-MAIN-20200407054138-20200407084638-00110.warc.gz eng 95119 text/html text/html 1011977159 200 20200407083800 https://www.people2people.com.au/job/100763333261259/corporate-receptionist-129/ au,com,people2people)/job/100763333261259/corporate-receptionist-129
4 UTF-8 KIYCIXBEN2VJZXM7DYHAIVHIZ7A3KMU4 crawl-data/CC-MAIN-2020-16/segments/1585371675859.64/warc/CC-MAIN-20200407054138-20200407084638-00370.warc.gz eng 95307 text/html text/html 1022237392 200 20200407061923 https://www.people2people.com.au/job/100763333263262/accounts-receivable-17/ au,com,people2people)/job/100763333263262/accounts-receivable-17
... ... ... ... ... ... ... ... ... ... ... ... ...
964 UTF-8 IPUYU7NZ56VQ3A4JTGPRYKXJVYJWXIIY crawl-data/CC-MAIN-2020-16/segments/1585370528224.61/warc/CC-MAIN-20200405022138-20200405052138-00472.warc.gz eng 96048 text/html text/html 1059151015 200 20200405040331 https://www.people2people.com.au/job/senior-legal-secretary-projects-1/ au,com,people2people)/job/senior-legal-secretary-projects-1
965 UTF-8 IQWGOHLZR5UWTZHBKBTECK6U6L4GJXBZ crawl-data/CC-MAIN-2020-16/segments/1585370511408.40/warc/CC-MAIN-20200410173109-20200410203609-00175.warc.gz eng 95699 text/html text/html 1049801495 200 20200410181601 https://www.people2people.com.au/job/telesales-education/ au,com,people2people)/job/telesales-education
966 UTF-8 XTSS44B5J7JTNWTS4YAWURN3NGORSHYJ crawl-data/CC-MAIN-2020-16/segments/1585370505550.17/warc/CC-MAIN-20200401065031-20200401095031-00228.warc.gz eng 96086 text/html text/html 1116875760 200 20200401074544 https://www.people2people.com.au/job/trainee-recruitment-consultant-temp-specialist/ au,com,people2people)/job/trainee-recruitment-consultant-temp-specialist
967 UTF-8 YRG7QLNVJ3TCFTGHOBROKRQJVPEV3XO5 crawl-data/CC-MAIN-2020-16/segments/1585371807538.83/warc/CC-MAIN-20200408010207-20200408040707-00359.warc.gz eng 95947 text/html text/html 1086976677 200 20200408015723 https://www.people2people.com.au/job/treasury-officer-inner-east/ au,com,people2people)/job/treasury-officer-inner-east
968 UTF-8 7WUNED5PZGXOUFFQWSG4DRRSW7FEK3I5 crawl-data/CC-MAIN-2020-16/segments/1585371675859.64/warc/CC-MAIN-20200407054138-20200407084638-00468.warc.gz eng 95825 text/html text/html 1048898819 200 20200407065559 https://www.people2people.com.au/job/warehouse-pick-and-pack-4/ au,com,people2people)/job/warehouse-pick-and-pack-4

969 rows × 12 columns

Looks like hundreds of job ads

  • Title
  • Location
  • Job Type
  • Salary
  • Description
with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

Again the JSON-LD data seems good!

extruct.extract(objs[0].content)['json-ld']
[{'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'people2people',
  'url': 'https://www.people2people.com.au',
  'logo': None},
 {'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': 'Up to $43/hr plus super'}},
  'datePosted': '2018-11-26T16:05:29.000+11:00',
  'employmentType': 'Temporary',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'people2people',
   'sameAs': 'https://www.people2people.com.au',
   'logo': None},
  'industry': 'NSW Government',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Leichhardt',
    'addressRegion': 'Leichhardt',
    'addressCountry': 'AU',
    'postalCode': None}},
  'salaryCurrency': 'AUD',
  'title': 'WHS Consultant',
  'validThrough': '2046-04-12',
  'description': "people2people are collaborating with the Government who are currently seeking a WHS Consultant on an initial 2 month assignment located in Sydney's Inner West.\xa0<br><br><strong>THE ROLE</strong><br><br>In this role you will be responsible for end-to-end (identifying, developing, delivering, implementing, coordinating and evaluating) NSW WHS and risk management education and training at operational and corporate levels:<ul><li>Develop the\xa0policies, programs, strategies and training schedule for WHS and risk management.\xa0</li><li>Establish and maintain educational resources and materials to capture WHS and risk management systems and processes.\xa0</li><li>Build and sustain relationships with key stakeholders.</li><li>Provide advice on internal WHS and risk management policies, procedures and programs.\xa0</li></ul><br><strong>ABOUT YOU</strong><br><br>To be eligible for this role you must have:<ul><li>Demonstrated understanding of WHS and risk management principles, methods and best practice processes.\xa0</li><li>Certificate IV in workplace training and experience in development, delivery and evaluation of training.</li><li>Excellent interpersonal and communication skills with an ability to effectively manage stakeholders.\xa0</li><li>Demonstrated project management skills.\xa0</li><li>Current valid NSW drivers licence.\xa0</li></ul><strong>To apply for the role, click the appropriate link on this page or call Emily Wise on 02 8270 9762 for a confidential discussion</strong><br>\xa0"}]

Xpand

objs = list(cdx.iter('www.xpand.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 RSDNME7KPQ2HRK4POSTQ5H367NHYRPKL crawl-data/CC-MAIN-2020-16/segments/1585370520039.50/warc/CC-MAIN-20200404042338-20200404072338-00284.warc.gz eng 90391 text/html text/html 1188049923 200 20200404050559 https://www.xpand.com.au/job/0406-accountant/ au,com,xpand)/job/0406-accountant
1 UTF-8 Z7RHJOFRO4RLHBKUMXRFELQ4EUDIT6VB crawl-data/CC-MAIN-2020-16/segments/1585370525223.55/warc/CC-MAIN-20200404200523-20200404230523-00477.warc.gz eng 91696 text/html text/html 1228428142 200 20200404212555 https://www.xpand.com.au/job/18-campaign-delivery-expert/ au,com,xpand)/job/18-campaign-delivery-expert
2 UTF-8 LDYRQYDC5LTP52CNLA7MCEB6GUUDA52X crawl-data/CC-MAIN-2020-16/segments/1585370506580.20/warc/CC-MAIN-20200402014600-20200402044600-00028.warc.gz eng 89943 text/html text/html 943822755 200 20200402033808 https://www.xpand.com.au/job/accessibility-it-analyst-slash-consultant-1/ au,com,xpand)/job/accessibility-it-analyst-slash-consultant-1
3 UTF-8 WJYZOI4F7CEYYF5VPFXQ24N6REM5JESP crawl-data/CC-MAIN-2020-16/segments/1585370508367.57/warc/CC-MAIN-20200402204908-20200402234908-00059.warc.gz eng 89936 text/html text/html 1246740961 200 20200402223936 https://www.xpand.com.au/job/accessibility-it-analyst-slash-consultant-2/ au,com,xpand)/job/accessibility-it-analyst-slash-consultant-2
4 UTF-8 LFOLHKNCTMAD4EHAOFQMBKUILFKMEC6F crawl-data/CC-MAIN-2020-16/segments/1585370506580.20/warc/CC-MAIN-20200402014600-20200402044600-00090.warc.gz eng 89942 text/html text/html 924965636 200 20200402034125 https://www.xpand.com.au/job/accessibility-it-analyst-slash-consultant-3/ au,com,xpand)/job/accessibility-it-analyst-slash-consultant-3
... ... ... ... ... ... ... ... ... ... ... ... ...
1163 UTF-8 Z3YQEDFTWYCTHDJNKXPOQP4TZTO4RMBT crawl-data/CC-MAIN-2020-16/segments/1585370508367.57/warc/CC-MAIN-20200402204908-20200402234908-00301.warc.gz eng 90446 text/html text/html 1240334928 200 20200402222316 https://www.xpand.com.au/job/visual-designer-1/ au,com,xpand)/job/visual-designer-1
1164 UTF-8 3A2ANL64HPGGLPTOHTE4PHXUWYJRIHOX crawl-data/CC-MAIN-2020-16/segments/1585371656216.67/warc/CC-MAIN-20200406164846-20200406195346-00016.warc.gz eng 92473 text/html text/html 1229884740 200 20200406172559 https://www.xpand.com.au/job/visual-designer-ux-slash-ui/ au,com,xpand)/job/visual-designer-ux-slash-ui
1165 UTF-8 B3TRPWM5BPX2PVJQ3DLEOVQWE4PKV3WO crawl-data/CC-MAIN-2020-16/segments/1585371611051.77/warc/CC-MAIN-20200405213008-20200406003508-00092.warc.gz eng 89719 text/html text/html 1222718957 200 20200405215501 https://www.xpand.com.au/job/web-developer-wordpress/ au,com,xpand)/job/web-developer-wordpress
1166 UTF-8 PWGIJUO7MWDEIGIHM2RP7FPAPWHMHEJC crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00209.warc.gz eng 89272 text/html text/html 1206698861 200 20200402121527 https://www.xpand.com.au/job/windows-administrator-slash-engineer-x-2-trading-systems/ au,com,xpand)/job/windows-administrator-slash-engineer-x-2-trading-systems
1167 UTF-8 XN27BPOCDNBAEYUQOJ5DYS2JLELTR5GM crawl-data/CC-MAIN-2020-16/segments/1585370506580.20/warc/CC-MAIN-20200402014600-20200402044600-00381.warc.gz eng 89333 text/html text/html 936750937 200 20200402040223 https://www.xpand.com.au/job/windows-administrator-slash-engineer-x-2-trading-systems-1/ au,com,xpand)/job/windows-administrator-slash-engineer-x-2-trading-systems-1

1168 rows × 12 columns

  • Location
  • Title
  • Job Type
  • Description
with open('test.html', 'wb') as f:
    f.write(objs[-1].content)
None
extruct.extract(objs[0].content)['json-ld']
None
[{'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'Xpand',
  'url': 'https://www.xpand.com.au',
  'logo': None},
 {'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': None,
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': 'Monthly Salary'}},
  'datePosted': '2018-08-31T14:31:59.000+10:00',
  'employmentType': 'Contract',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Xpand',
   'sameAs': 'https://www.xpand.com.au',
   'logo': None},
  'industry': 'Other',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Singapore',
    'addressRegion': 'Singapore',
    'addressCountry': 'SG',
    'postalCode': None}},
  'title': 'Accountant',
  'validThrough': '2018-09-27',
  'description': '<div style="text-align:justify"><span style="text-align:justify"><strong>Overview</strong><br><br>We are looking for an experienced Accountant coming from a Big 4 or with Industry experience (Leading Tech Client) who has handled statutory compliance across APAC, provide P&amp;L / Balance Sheet commentary &amp; finance analysis, ensuring proper data provision to tax agents and understands GST filing process.</span><br> &nbsp;<br> <span style="text-align:justify">This is a 12 months extendable contract role.</span><br> &nbsp;<br> <strong><span style="text-align:justify">Responsibilities</span></strong><br> <ul> <li>Perform accounting functions related to Local and US GAAP rules such as: BS and P&amp;L accounting and flux analysis, month end close process and reporting, general accounting and tax fillings and all related statutory obligation.</li> <li>Perform GL reconciliations to ensure accuracy of our financial statements and accurately record and maintain certain accounting activities in our finance systems including GL tax-related entries and reconciliations.</li> <li>Work with third-party entities such as outsourced services providers for finance and accounting firms in matters related to the Accounting and statutory compliance for legal entities.</li> <li>Work with internal cross-functional teams, such as Tax, Corp. Legal, etc, to ensure accounting compliance filing obligations are completed by statutory due dates.</li> </ul> &nbsp;<br> <strong><span style="text-align:justify">Requirement</span></strong><br> <ul> <li>Minimum 5+ years of relevant accounting &amp; Statutory compliance experience</li> <li>Bachelor\'s degree in an Accounting/Commerce discipline</li> <li>CPA / CA or other professional accounting accreditation</li> <li>Strong working knowledge of U.S. GAAP and IFRS</li> <li>Excellent interpersonal and communication skills</li> <li>Oracle systems usage experience</li> </ul> &nbsp;<br> <span style="text-align:justify"><strong>HOW to Apply</strong><br></span><br> <span style="text-align:justify">Xpand your job search in the right direction by applying via the links below. Alternatively, for moving forward email Kapil Chadha on kapil.chadha@xpand.sg. (EA License No: 07C3147, CEI No: R1102816)</span><br> &nbsp;</div>'}]

Launch Recruitment

objs = list(cdx.iter('jobs.launchrecruitment.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 7XPDX345HRGM733J54WZKIATINHUQWXD crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00058.warc.gz eng 82020 text/html text/html 534614307 200 20200404075221 https://jobs.launchrecruitment.com.au/job/100602033171662/layout-contractor-dm-2/ au,com,launchrecruitment,jobs)/job/100602033171662/layout-contractor-dm-2
1 UTF-8 MMCGR4KNWMM2HEVXL64BHF33SXUCMRUC crawl-data/CC-MAIN-2020-16/segments/1585371830894.88/warc/CC-MAIN-20200409055849-20200409090349-00029.warc.gz eng 82746 text/html text/html 501234066 200 20200409072148 https://jobs.launchrecruitment.com.au/job/100602033174182/media-relations-manager/ au,com,launchrecruitment,jobs)/job/100602033174182/media-relations-manager
2 UTF-8 AY72ZPEI37XTQBQNA6QAI7HR3MP3KWGY crawl-data/CC-MAIN-2020-16/segments/1585370506959.34/warc/CC-MAIN-20200402111815-20200402141815-00153.warc.gz eng 82758 text/html text/html 550956619 200 20200402124921 https://jobs.launchrecruitment.com.au/job/100602033176601/product-marketing-manager/ au,com,launchrecruitment,jobs)/job/100602033176601/product-marketing-manager
3 UTF-8 CK5VR2XOFPIJMPWZPRDT5PCA4GLCQXJH crawl-data/CC-MAIN-2020-16/segments/1585370524604.46/warc/CC-MAIN-20200404165658-20200404195658-00411.warc.gz eng 83213 text/html text/html 527603035 200 20200404174721 https://jobs.launchrecruitment.com.au/job/100602033177205/mm-wave-design-engineer/ au,com,launchrecruitment,jobs)/job/100602033177205/mm-wave-design-engineer
4 UTF-8 EK7VI33V22534J3G4UALGAXYNMSFHBAV crawl-data/CC-MAIN-2020-16/segments/1585370508367.57/warc/CC-MAIN-20200402204908-20200402234908-00253.warc.gz eng 83124 text/html text/html 534578192 200 20200402223049 https://jobs.launchrecruitment.com.au/job/100602033179204/trainee-it-project-manager/ au,com,launchrecruitment,jobs)/job/100602033179204/trainee-it-project-manager
... ... ... ... ... ... ... ... ... ... ... ... ...
1279 UTF-8 6HU234JKJA5YDTD4V5CSV43EB2LL4IKQ crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00049.warc.gz eng 81770 text/html text/html 534513845 200 20200404090330 https://jobs.launchrecruitment.com.au/job/trade-marketing-manager-1/ au,com,launchrecruitment,jobs)/job/trade-marketing-manager-1
1280 UTF-8 43ZCPOUCHXD5ODHR3TZHBCZVOHPA3BAK crawl-data/CC-MAIN-2020-16/segments/1585370518622.65/warc/CC-MAIN-20200403190006-20200403220006-00472.warc.gz eng 83760 text/html text/html 534876458 200 20200403194109 https://jobs.launchrecruitment.com.au/job/trade-marketing-specialist-retail-activation/ au,com,launchrecruitment,jobs)/job/trade-marketing-specialist-retail-activation
1281 UTF-8 QZRZ7GMWMI4V7CUIF4DLEUCKSSPNGRP4 crawl-data/CC-MAIN-2020-16/segments/1585370521574.59/warc/CC-MAIN-20200404073139-20200404103139-00429.warc.gz eng 82477 text/html text/html 534562691 200 20200404085259 https://jobs.launchrecruitment.com.au/job/ux-designer/ au,com,launchrecruitment,jobs)/job/ux-designer
1282 UTF-8 OR7NGAWHX7OKU7E3EAWZU6E43ZUHBI64 crawl-data/CC-MAIN-2020-16/segments/1585371861991.79/warc/CC-MAIN-20200409154025-20200409184525-00236.warc.gz eng 82421 text/html text/html 520704309 200 20200409173039 https://jobs.launchrecruitment.com.au/job/wireless-field-technician/ au,com,launchrecruitment,jobs)/job/wireless-field-technician
1283 UTF-8 EGN57VNKK5UI7BNWCLQVYUORTKG2H2D4 crawl-data/CC-MAIN-2020-16/segments/1585371620338.63/warc/CC-MAIN-20200406070848-20200406101348-00183.warc.gz eng 81675 text/html text/html 454029958 200 20200406074037 https://jobs.launchrecruitment.com.au/job/wireless-field-technician-2/ au,com,launchrecruitment,jobs)/job/wireless-field-technician-2

1284 rows × 12 columns

  • Job Title
  • Contract Type
  • Location
  • Industry
  • Salary
  • Start Date
  • Job Published
  • Job Description
extruct.extract(objs[0].content)['json-ld']
None
[{'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'Launch Recruitment',
  'url': 'https://jobs.launchrecruitment.com.au',
  'logo': None},
 {'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': 'Competitive Daily Rate'}},
  'datePosted': '2018-07-13T08:31:06.000+10:00',
  'employmentType': 'Contract',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Launch Recruitment',
   'sameAs': 'https://jobs.launchrecruitment.com.au',
   'logo': None},
  'industry': 'Emerging Tech',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Melbourne C B D',
    'addressRegion': 'Melbourne C B D',
    'addressCountry': None,
    'postalCode': None}},
  'salaryCurrency': 'AUD',
  'title': 'Layout Contractor (DM)',
  'validThrough': '2018-08-09',
  'description': "Our client, an international powerhouse in the computer processing field, is looking for an experienced IC Layout Designer to help with some of their keystone project work. This is a unique opportunity to work with a household name contributing to cornerstone project work with tangible real-world implications. A role you'll be proud to feature on your CV.\xa0<br/><br/>What are they looking for? An experienced IC layout contractor who will help with IC layout development and will be responsible for RF/analog/mixed-signal cell, block in sub-micron CMOS technologies.\xa0<br/><br/><u><strong>Required Skills & Experience</strong></u>\xa0<br/>a. Experience in mask layout and has demonstrated tapeout experience.\xa0<br/>b. Proficient in layout techniques for device matching, isolation techniques, and minimization of parasitic, IR drop, and etc.<br/>c. Proficient in identifying root cause and debugging DRC/LVS/ERC error.<br/>d. Proficient with Cadence Virtuoso Layout tools, Calibre DRC/LVS/ERC tools.<br/><br/>Sound like you? Then please apply. If you are interested in knowing more, you are also welcome to contact David Milburn on (03) 8399 9943 for more information.\xa0<br/><br/>\xa0"}]
with open('test.html', 'wb') as f:
    f.write(objs[0].content)

Careers Vic

objs = list(cdx.iter('careers.vic.gov.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 286 287 288 289 290 291 292 293 294 295
url https://careers.vic.gov.au/job/2019-diabetes-clinical-fellow-205104 https://careers.vic.gov.au/job/2019-expression-of-interest-basic-physician-trainee-registrars-and-advanced-physician-registrars-436186 https://careers.vic.gov.au/job/2020-emergency-medicine-registrars-436184 https://careers.vic.gov.au/job/2020-emergency-registrar-training-scheme-453015 https://careers.vic.gov.au/job/2020-expression-of-interest-hospital-medical-officers-436176 https://careers.vic.gov.au/job/2020-intensive-care-registrar-casey-hospital-451171 https://careers.vic.gov.au/job/2020-intern-program-legal-and-justice-policy-443967 https://careers.vic.gov.au/job/2020-monash-health-aboriginal-nursing-and-midwifery-and-allied-health-cadetship-program-432156 https://careers.vic.gov.au/job/2020-obstetric-gynaecology-senior-registrar-maternity-leave-position-430464 https://careers.vic.gov.au/job/2020-obstetrics-gynaecology-registrar-436201 ... https://careers.vic.gov.au/job/team-leader-administration-property-services-453611 https://careers.vic.gov.au/job/team-leader-senior-social-worker-453791 https://careers.vic.gov.au/job/technical-architect-x2-450717 https://careers.vic.gov.au/job/technical-assistant-453660 https://careers.vic.gov.au/job/theatre-technician-449954 https://careers.vic.gov.au/job/theatre-technician-453644 https://careers.vic.gov.au/job/ward-clerk-452714 https://careers.vic.gov.au/job/western-health-enrolled-nurse-vacancies-453056 https://careers.vic.gov.au/job/workplace-relations-advisor-453787 https://careers.vic.gov.au/job/youth-justice-worker-custodial-malmsbury-and-parkville-august-intake-451089

1 rows × 296 columns

  • Location
  • Job type
  • Organisation
  • Salary
  • Occupation
  • Title
  • Text
with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

No dice with structured data

extruct.extract(objs[0].content)
{'microdata': [],
 'json-ld': [],
 'opengraph': [],
 'microformat': [],
 'rdfa': [{'@id': '_:Nb3aabe6261dc478b82bab4f6d47adcc2',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]},
  {'@id': '#sendEmailModal',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#dialog'}]},
  {'@id': '_:Nefe2d9b28e0249dfba114e7e9b198b93',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#navigation'}]},
  {'@id': '_:N9df4e5d1fe0e4155a756a87c0a50784c',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]},
  {'@id': '_:N1aa979629c744723930fd7f0105345e8',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]},
  {'@id': '#popup-confirm-fav',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#dialog'}]},
  {'@id': '_:Ne2c82b74b18949b89bd069daa0fb4581',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#document'}]},
  {'@id': '#CountdownPopup',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#dialog'}]},
  {'@id': '_:N938f30ac945548338dc6171cb4ad073a',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]},
  {'@id': '_:N16358aa5120e41e4aadba54985ac5526',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]},
  {'@id': '_:Nc7de78c2c9ab445fa1a2a8299fdc396e',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#button'}]}]}
soup = BeautifulSoup(objs[0].content)

Can extract metadata with string matching

[l.text for l in soup.select('.txt-info')]
['Location: Melbourne | Southern Metropolitan',
 'Job type: Part time',
 'Organisation: Monash Health',
 'Salary: Salary not specified',
 'Occupation: Medical',
 '\n\nReference: \n32187\n\n',
 'Job posted: 04/06/2018',
 'Closes: 28/08/2020',
 'Occupation: Medical',
 'Classification: ',
 'Job duration: Not provided',
 'Contact: Linda Raineri - 03 9594 2893',
 '\n\nReference: \n32187\n\n',
 'Occupation: Medical',
 'Salary Range: Salary not specified',
 'Work location: Melbourne | Southern Metropolitan']
s = soup.select('.txt-pre-line')[0]
s
<div class="txt-pre-line">
<p><strong>Diabetes Clinical Fellow</strong></p>
<p><strong>About the Role:</strong></p>
<p>We are looking for positive, committed and inspiring Junior Medical Staff to join a thriving team dedicated to achieving the best in patient care. Applications are invited from qualified medical practitioners, registered in Australia, with suitable experience for the position of Diabetes Clinical Fellow.</p>
<p>This position is based between Monash Medical Centre and Dandenong Hospitals. This position includes clinical diabetes and endocrinology ward service and ambulatory care roles.</p>
<p>With so much happening at Monash Health, why don’t you join us, become a Monash Doctor.</p>
<p><strong>Monash Health will offer:</strong></p>
<ul>
<li>competitive salary</li>
<li>salary packaging</li>
<li>friendly and supportive culture</li>
<li>opportunity to experience various teams</li>
<li>on-going supervision and professional development</li>
</ul>
<p>Monash health provides a world of healthcare across south eastern Melbourne, uniquely integrating primary, secondary and tertiary health services as well as world-renowned research and teaching facilities. We employee more than 17,000 staff who work across 40 care locations. In Monash Health your own growth and development is important and with us you can really be your best. For more information please visit www.monashhealth.org</p>
<p>Please note for non-Monash Health staff; as part of the application process you will be required to complete and attach a ‘fit to work National police check consent form’ to your application along with 100 points of certified ID.</p>
</div>

Get the text

print(''.join(map(str, s.contents)))

<p><strong>Diabetes Clinical Fellow</strong></p>
<p><strong>About the Role:</strong></p>
<p>We are looking for positive, committed and inspiring Junior Medical Staff to join a thriving team dedicated to achieving the best in patient care. Applications are invited from qualified medical practitioners, registered in Australia, with suitable experience for the position of Diabetes Clinical Fellow.</p>
<p>This position is based between Monash Medical Centre and Dandenong Hospitals. This position includes clinical diabetes and endocrinology ward service and ambulatory care roles.</p>
<p>With so much happening at Monash Health, why don’t you join us, become a Monash Doctor.</p>
<p><strong>Monash Health will offer:</strong></p>
<ul>
<li>competitive salary</li>
<li>salary packaging</li>
<li>friendly and supportive culture</li>
<li>opportunity to experience various teams</li>
<li>on-going supervision and professional development</li>
</ul>
<p>Monash health provides a world of healthcare across south eastern Melbourne, uniquely integrating primary, secondary and tertiary health services as well as world-renowned research and teaching facilities. We employee more than 17,000 staff who work across 40 care locations. In Monash Health your own growth and development is important and with us you can really be your best. For more information please visit www.monashhealth.org</p>
<p>Please note for non-Monash Health staff; as part of the application process you will be required to complete and attach a ‘fit to work National police check consent form’ to your application along with 100 points of certified ID.</p>

Check another

soup = BeautifulSoup(objs[1].content)
None
[l.text for l in soup.select('.txt-info')]
['Location: Geelong',
 'Job type: Not provided',
 'Organisation: Barwon Health',
 'Salary: Salary not specified',
 'Occupation: Medical',
 '\n\nReference: \n158317\n\n',
 'Job posted: 14/01/2019',
 'Closes: No closing date',
 'Occupation: Medical',
 'Classification: ',
 'Job duration: Not provided',
 'Contact: A/Prof Deborah Friedman - (03) 4215 0643Deborahf@barwonhealth.org.auAlex Townsend   - (03) 4215 0643 Alexandra.Townsend@barwonhealth.org.au',
 '\n\nReference: \n158317\n\n',
 'Occupation: Medical',
 'Salary Range: Salary not specified',
 'Work location: Geelong']
print(''.join(map(str, soup.select('.txt-pre-line')[0].contents)))

<p><strong>About University Hospital Geelong, Barwon Health</strong></p><p>Formed in 1998, Barwon Health is one of the largest and most comprehensive regional health services in Australia, providing care at all stages of life and circumstance. Health services available through Barwon Health cover the full spectrum from primary care, community services, aged care, rehabilitation, mental health, emergency and acute care. With the exception of neurosurgery and transplantation, virtually all other specialties are available through University Hospital Geelong.</p><p>Barwon health is the major regional health provider for the Barwon South West region. It is Victoria’s largest regional health service with one of the busiest hospitals in the state, University Hospital. We serve over 500,000 people through the efforts of over 6,500 staff and more than 1300 volunteers. We provide care at all stages or life and circumstance through a wide range of services from emergency and acute to mental health, primary care, community services, aged care, and subacute/rehabilitation. Care is provided to the community through over 21 key locations throughout the region.</p><p>Guided by our values Barwon Health is Victoria's largest regional health care service and is also the largest employer in the Geelong and surf coast region, employing over 6,500 staff. Working at Barwon Health offers diversity where no two days are the same with varied areas of expertise and locations. We have a dedicated clinical education and training department giving you access to opportunities to improve your knowledge and skill base.</p><p><strong><span style="">About the roles</span></strong></p><p>University Hospital Geelong has on-going opportunities to join its BPT 2/3 Medical Registrar and Advanced Trainee cohorts in 2019.</p><p><strong>BPT 2 and 3</strong></p><p>University Hospital Geelong has a moderate sized physician training program which has the capacity for 32 medical registrars filling BPT 2 and BPT3 positions. It is expected that in 2019 there will be an even split of 15 BPT 2 positions and 15 BPT 3 positions.</p><p>These training roles incorporate both clinical service provision combined with education, with an expectation of additional self-directed learning.</p><p>The BPT 2 and BPT 3 year includes 4 rotations. The rotations offered include:</p><ul><li>General medicine spread over 6 different teams including a rapid assessment unit</li><li>Neurology</li><li>Stroke</li><li>Rural medical term in Warrnambool and Hamilton base hospitals</li><li>Geriatrics</li><li>Hospital in the home</li><li>Intensive care</li><li>Specialty leave cover (up to 5 weeks) for advanced trainees in; cardiology, gastroenterology, neurology, renal , infectious diseases, respiratory, palliative care, endocrinology, medical oncology and haematology</li></ul><p>All registrars also do one-half of a term of nights or evenings during any given year.</p><p>Basic physician trainee roles include several weekly educational meetings including;</p><ul><li>Journal club</li><li>Hospital grand rounds</li><li>BPT teaching</li><li>Radiology teaching</li><li>Clinical teaching rounds</li></ul><p>For those trainees preparing for the RACP examinations Barwon Health has developed a comprehensive preparation program. For the written examination there are additional focused tutorials and a mock exam in the few months before the examination. While for trainees preparing for the clinical examination, the hospital provides training spanning over 4 months, including one on one mentorship and 4-5 group teaching sessions per week. Examination success rates are usually at or above the national average</p><p><strong>Advanced Trainees</strong></p><p>The Department of General Medicine at Barwon Health provides advanced physician training positions in General Medicine in partnership with other regional centres. We pride ourselves on being able to offer customised training which is ideally suited to future practice in regional and rural areas.</p><p>Our advanced physician training program offers high quality six-month (and occasionally 12 month) specialty rotations. University Hospital Geelong (UHG), in conjunction with Ballarat Base Hospital (BBH) and South West Health Care (SWHC) in Warrnambool form the Western Victoria Regional training hub. Through this collaboration we are able to offer rotations at all three sites and can enable trainees to complete all of their advanced physician training in Western Victoria.<br/>Rotations include: <br/>• Senior medical registrar, UHG <br/>• Intensive care, UHG <br/>• Cardiology, UHG <br/>• Infectious Diseases, BBH<br/>• Respiratory medicine, BBH<br/>• Nephrology, BBH<br/>• Neurology, BBH<br/>• Gastroenterology, UHG and BBH<br/>• General Medicine, SWHC<br/>Rotations such as Palliative Care and Geriatrics are also sometimes available at UHG.</p><p><strong>About the culture</strong></p><p>You will work with innovative teams who maintain extensive knowledge and experience. You will have support and guidance from fellow team members and management who exhibit our Barwon Health values, respect, compassion, commitment, accountability and innovation.</p><p>We believe that working at Barwon Health is joining a culture where people strive to work to the full extent of their qualification, capability and experience with a working environment that enables this to happen.</p><p>We are a fun, passionate, supportive, energetic, driven team who works cohesively together. We promote professional development in the workplace and as a team are focussed on quality patient centred care. No day is the same; we are a team that embrace's change and are always looking for innovative ideas to improve our team and the organisation.</p><p>Barwon Health is committed to developing a vibrant culture of education, training and research for all staff fostering clinical excellence, effective leadership and a solid foundation of research underpinned by the role of Barwon Health as a teaching hospital.</p><p>We have a 'can do' culture which is embedded within a fast paced environment where we support and nurture multidisciplinary approaches to client care including empowering the client to lead an active and independent life.</p><p>As a team we are focussed on providing the highest quality patient centred care.</p><p><strong>At Barwon Health we celebrate and harness diversity, and consider it a competitive advantage. We encourage applications from all diverse backgrounds. Aboriginal and Torres Strait Islanders are encouraged to apply.</strong></p><p><strong>Recruitment agencies should note that Barwon Health does not accept agency resumes. Barwon Health is not responsible for any fees related to any unsolicited resumes submitted by Recruitment Agencies.</strong></p>

Design and build

Building recruitment agency

objs = list(cdx.iter('www.designandbuild.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 3OJXPYZXL7ELSRRMG52TC564NZYFZO2H crawl-data/CC-MAIN-2020-16/segments/1585371810807.81/warc/CC-MAIN-20200408072713-20200408103213-00434.warc.gz eng 89250 text/html text/html 909421601 200 20200408090252 https://www.designandbuild.com.au/job/12d-civil-designer/ au,com,designandbuild)/job/12d-civil-designer
1 UTF-8 DRAEPC5GEHK3LOPJGWJPWOA4XVPV3H2M crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00268.warc.gz eng 88899 text/html text/html 869079322 200 20200402170501 https://www.designandbuild.com.au/job/12d-civil-designer-3/ au,com,designandbuild)/job/12d-civil-designer-3
2 UTF-8 MZEGH4HVTQU2QGONQ5EOOO4WTWU7XLPI crawl-data/CC-MAIN-2020-16/segments/1585370506121.24/warc/CC-MAIN-20200401192839-20200401222839-00304.warc.gz eng 89613 text/html text/html 877462682 200 20200401194522 https://www.designandbuild.com.au/job/2ic-architect-slash-documenter/ au,com,designandbuild)/job/2ic-architect-slash-documenter
3 UTF-8 KBQ65ZWZ3YLVAMZPGFJIYBJTWOEJIVXN crawl-data/CC-MAIN-2020-16/segments/1585371660550.75/warc/CC-MAIN-20200406200320-20200406230820-00019.warc.gz eng 89753 text/html text/html 900290689 200 20200406210013 https://www.designandbuild.com.au/job/accountant-1/ au,com,designandbuild)/job/accountant-1
4 UTF-8 26FZTCF5G4SNTFKHZHYCBUCIM3UVO2ZZ crawl-data/CC-MAIN-2020-16/segments/1585370506673.7/warc/CC-MAIN-20200402045741-20200402075741-00050.warc.gz eng 89758 text/html text/html 882049306 200 20200402053420 https://www.designandbuild.com.au/job/accountant-2/ au,com,designandbuild)/job/accountant-2
... ... ... ... ... ... ... ... ... ... ... ... ...
757 UTF-8 ZGMLAXUJBTLO7NROYKK3XOUZMFKPZAXO crawl-data/CC-MAIN-2020-16/segments/1585370507738.45/warc/CC-MAIN-20200402173940-20200402203940-00045.warc.gz eng 94941 text/html text/html 873614607 200 20200402183007 https://www.designandbuild.com.au/job/whs-officer-13/ au,com,designandbuild)/job/whs-officer-13
758 UTF-8 5FE3SF5TG5W23ZSPGOVV2L5A7PU4EFDO crawl-data/CC-MAIN-2020-16/segments/1585371660550.75/warc/CC-MAIN-20200406200320-20200406230820-00200.warc.gz eng 89649 text/html text/html 904713805 200 20200406202617 https://www.designandbuild.com.au/job/whs-officer-18/ au,com,designandbuild)/job/whs-officer-18
759 UTF-8 ERGZJRM37WRCKC35YWTLNE3V3WTCZ4DQ crawl-data/CC-MAIN-2020-16/segments/1585371660550.75/warc/CC-MAIN-20200406200320-20200406230820-00231.warc.gz eng 89654 text/html text/html 885365725 200 20200406205524 https://www.designandbuild.com.au/job/whs-officer-19/ au,com,designandbuild)/job/whs-officer-19
760 UTF-8 LWFMW2LGHJHWLPHOUOXJOVXTI4LC2EUC crawl-data/CC-MAIN-2020-16/segments/1585371821680.80/warc/CC-MAIN-20200408170717-20200408201217-00080.warc.gz eng 88414 text/html text/html 870661921 200 20200408190858 https://www.designandbuild.com.au/job/working-foreman-8/ au,com,designandbuild)/job/working-foreman-8
761 UTF-8 H722O5N26UXQBN25JOQZ3BJBJV7FGTIY crawl-data/CC-MAIN-2020-16/segments/1585370506988.10/warc/CC-MAIN-20200402143006-20200402173006-00353.warc.gz eng 89389 text/html text/html 903668793 200 20200402154929 https://www.designandbuild.com.au/job/works-coordinator-4/ au,com,designandbuild)/job/works-coordinator-4

762 rows × 12 columns

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None
md = extruct.extract(objs[0].content)
[data for data in md['json-ld'] if data['@type'] == 'JobPosting']
[{'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': None,
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': '$100,000 - $150,000'}},
  'datePosted': '2020-03-04T05:46:22.000+11:00',
  'employmentType': 'Permanent',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Design & Build',
   'sameAs': 'https://www.designandbuild.com.au',
   'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMTAvMjIvMDMvMjgvMzEvNTY3L2RuYi1sb2dvLW5ldy5wbmciXV0'},
  'industry': 'Engineering',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Sydney',
    'addressRegion': 'Sydney',
    'addressCountry': 'AU',
    'postalCode': None}},
  'title': 'Senior Drainage Engineer',
  'validThrough': '2020-03-30',
  'description': 'This Engineering Consultancy in Sydney has a great reputation for staff retention and staff development, keeping their tight night Tier 2 culture but working on larger Tier 1 projects they offer their Staff the best of both!!<br><strong><br>As the Senior&nbsp;Drainage Engineer you will;<br></strong> <ul> <li>Working without supervision on medium to large projects in NSW reporting in directly to the Highways Team Leader.</li> <li>Undertake&nbsp;design and investigation services in stormwater drainage design, water quality, WSUD and water balance</li> <li>Have more junior Engineers look up to you for guidance and mentoring.</li> </ul> <strong>To be considered for the position of Senior&nbsp;Drainage Engineer&nbsp;you will;</strong><br> <ul> <li>Have a degree in Civil Engineering (or other relevant)</li> <li>5+ years experience in Drainage Engineering for Transport projects within Engineering Consulting</li> <li>Expert in the use, understanding and application of softwares such as 12D, DRAINS, HEC-RAS and other modelling packages</li> </ul> For any questions relating to this role or other opportunities with D&amp;B and our clients please contact Alex Scott on + 61 29376 8200 / alex@designandbuild.com.au or click apply. Your application will be treated as strictly confidential.&nbsp;<br><br>Check us out on Google: https://bit.ly/2whfDMO&nbsp;<br><br>Thanks for your consideration.'}]

NSW Government job board

Not many jobs here

objs = list(cdx.iter('iworkfor.nsw.gov.au/job/*',
                     from_ts='202003', to='202004',
                     limit=50,
                     filter=['status:200']))
pd.DataFrame(objs)
charset digest filename languages length mime mime-detected offset status timestamp url urlkey
0 UTF-8 L336NOH42QC5ZOKAJXUPUCRYFIEVJOPG crawl-data/CC-MAIN-2020-16/segments/1585370494349.3/warc/CC-MAIN-20200329140021-20200329170021-00527.warc.gz eng 11826 text/html application/xhtml+xml 533239693 200 20200329162208 https://iworkfor.nsw.gov.au/job/aboriginal-youth-justice-conference-convenor-youth-justice-191344 au,gov,nsw,iworkfor)/job/aboriginal-youth-justice-conference-convenor-youth-justice-191344
1 UTF-8 UGLQDTJYTS5ON2QUW3U2PTF5O4QCTZS3 crawl-data/CC-MAIN-2020-16/segments/1585370494349.3/warc/CC-MAIN-20200329140021-20200329170021-00237.warc.gz eng 11304 text/html application/xhtml+xml 546720411 200 20200329162558 https://iworkfor.nsw.gov.au/job/education-administration-support-ageing-disability-192093 au,gov,nsw,iworkfor)/job/education-administration-support-ageing-disability-192093
2 UTF-8 IY4X2M2X2WAGM3TKS2OREF6FOUKT4EAQ crawl-data/CC-MAIN-2020-16/segments/1585370494349.3/warc/CC-MAIN-20200329140021-20200329170021-00361.warc.gz eng 12190 text/html application/xhtml+xml 528281817 200 20200329142811 https://iworkfor.nsw.gov.au/job/manager-operational-systems-service-delivery-192091 au,gov,nsw,iworkfor)/job/manager-operational-systems-service-delivery-192091
3 UTF-8 WG22ETBXH4SKHONHIQTU34H3KUF4OJZX crawl-data/CC-MAIN-2020-16/segments/1585370494349.3/warc/CC-MAIN-20200329140021-20200329170021-00218.warc.gz eng 12228 text/html application/xhtml+xml 557493487 200 20200329153613 https://iworkfor.nsw.gov.au/job/organisation-design-specialist-talent-pool-191676 au,gov,nsw,iworkfor)/job/organisation-design-specialist-talent-pool-191676

Stored as html, in tables and such.

Could extract, but not worth time investment

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

Backpacker Jobs

Job ads aimed at backpackers; freemium so there’s a lot of guff

objs = list(cdx.iter('www.backpackerjobboard.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 467 468 469 470 471 472 473 474 475 476
url https://www.backpackerjobboard.com.au/job/12804/tennis-coach-at-fraser-coast-tennis/ https://www.backpackerjobboard.com.au/job/13476/chef2-year-visa-outback-experience-at-rachel-weir/ https://www.backpackerjobboard.com.au/job/18548/sales-promotions-immediate-start-at-mckenzie-holland/ https://www.backpackerjobboard.com.au/job/20400/sales-customer-service-travel-opportunities-at-mckenzie-holland/ https://www.backpackerjobboard.com.au/job/20763/customer-relations-sales-working-holiday-visas-accepted-at-mckenzie-holland/ https://www.backpackerjobboard.com.au/job/20911/marketing-sales-working-holiday-visas-accepted-at-mckenzie-holland/ https://www.backpackerjobboard.com.au/job/29741/experienced-waiter-at-trattoria-italiana/ https://www.backpackerjobboard.com.au/job/30851/live-in-helper-wanted-at-gater-fishing/ https://www.backpackerjobboard.com.au/job/31777/meat-processing-workers-at-ncmc/ https://www.backpackerjobboard.com.au/job/32415/2nd-year-visa-opportunity-working-with-horses-at-strempel-racing/ ... https://www.backpackerjobboard.com.au/job/71621/week-days-full-time-good-salary-at-my-home-clean/ https://www.backpackerjobboard.com.au/job/71622/week-days-full-time-good-salary-at-myhome-cleaning/ https://www.backpackerjobboard.com.au/job/71630/sales-agents-outbound-call-centre-excellent-base-rate-plus-uncapped-commissions-at-nsp-personnel/ https://www.backpackerjobboard.com.au/job/71635/a-wonderful-au-pair-opportunity-near-the-beachcity-of-perth-at-au-pair-care-australia/ https://www.backpackerjobboard.com.au/job/71636/home-cleaning-sanitation-at-essential-home-services-peninsula/ https://www.backpackerjobboard.com.au/job/71637/french-female-au-pair-wanted-at-b/ https://www.backpackerjobboard.com.au/job/71638/urgent-aupairs-port-hedland-karratha-and-newman-at-pilbara-aupair-agency/ https://www.backpackerjobboard.com.au/job/71640/au-pair-job-in-st-clair-in-a-large-independednt-private-garden-house-at-tavi/ https://www.backpackerjobboard.com.au/job/71642/care-ninja-live-in-at-independence-world/ https://www.backpackerjobboard.com.au/job/71650/yellow-pages-deliverers-wa-at-gdr-group/

1 rows × 477 columns

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None
extruct.extract(objs[0].content)
{'microdata': [{'type': 'http://schema.org/JobPosting',
   'properties': {'datePosted': '2014-03-20 04:09:01',
    'employmentType': 'PART_TIME',
    'title': 'Tennis Coach',
    'hiringOrganization': {'type': 'http://schema.org/Organization',
     'properties': {'name': 'Fraser Coast Tennis'}},
    'jobLocation': {'type': 'http://schema.org/Place',
     'properties': {'address': {'type': 'http://schema.org/PostalAddress',
       'properties': {'addressRegion': 'Hervey Bay',
        'postalCode': '',
        'addressCountry': 'AU'}}}},
    'industry': 'sports jobs',
    'description': 'Tennis Coach wanted for part time work Hervey Bay. Great opportunity. Possibility of a full time position for the right person. Please email fcpt123@gmail.com.'}},
  {'type': 'https://schema.org/BreadcrumbList',
   'properties': {'itemListElement': [{'type': 'https://schema.org/ListItem',
      'properties': {'item': 'https://www.backpackerjobboard.com.au/',
       'name': 'Home',
       'position': '1'}},
     {'type': 'https://schema.org/ListItem',
      'properties': {'item': 'https://www.backpackerjobboard.com.au/jobs/sports-jobs/',
       'name': 'sports jobs',
       'position': '2'}},
     {'type': 'https://schema.org/ListItem',
      'properties': {'item': 'https://www.backpackerjobboard.com.au/job/12804/tennis-coach-at-fraser-coast-tennis/',
       'name': 'Tennis Coach',
       'position': '3'}}]}}],
 'json-ld': [{'@context': 'http://schema.org',
   '@type': 'Organization',
   'name': 'Backpacker Job Board',
   'logo': 'https://www.backpackerjobboard.com.au/img/logo_schema.png',
   'url': 'https://www.backpackerjobboard.com.au/',
   'sameAs': ['https://www.facebook.com/backpackerjobsaustralia',
    'https://twitter.com/backpackerjob',
    'https://www.instagram.com/backpackerjobboard/',
    'https://www.pinterest.com.au/backpackerjobboard/',
    'https://www.crunchbase.com/organization/backpacker-job-board']}],
 'opengraph': [{'namespace': {'og': 'http://ogp.me/ns#'},
   'properties': [('og:image',
     'https://www.backpackerjobboard.com.au/images/fb-sports-jobs.jpg'),
    ('og:site_name', 'Backpacker Job Board Australia'),
    ('og:title', 'Tennis Coach'),
    ('og:type', 'website'),
    ('og:country-name', 'Australia')]}],
 'microformat': [],
 'rdfa': [{'@id': '',
   'http://ogp.me/ns#country-name': [{'@value': 'Australia'}],
   'http://ogp.me/ns#image': [{'@value': 'https://www.backpackerjobboard.com.au/images/fb-sports-jobs.jpg'}],
   'http://ogp.me/ns#site_name': [{'@value': 'Backpacker Job Board Australia'}],
   'http://ogp.me/ns#title': [{'@value': 'Tennis Coach'}],
   'http://ogp.me/ns#type': [{'@value': 'website'}],
   'http://ogp.me/ns/fb#admins': [{'@value': '513199994'}],
   'http://ogp.me/ns/fb#app_id': [{'@value': '182079148660895'}]},
  {'@id': '_:Na5441dd18a034ef297b6b87ad533f913',
   'http://www.w3.org/1999/xhtml/vocab#role': [{'@id': 'http://www.w3.org/1999/xhtml/vocab#navigation'}]}]}
[x for x in extruct.extract(objs[0].content)['microdata'] if x['type'] == 'http://schema.org/JobPosting']
[{'type': 'http://schema.org/JobPosting',
  'properties': {'datePosted': '2014-03-20 04:09:01',
   'employmentType': 'PART_TIME',
   'title': 'Tennis Coach',
   'hiringOrganization': {'type': 'http://schema.org/Organization',
    'properties': {'name': 'Fraser Coast Tennis'}},
   'jobLocation': {'type': 'http://schema.org/Place',
    'properties': {'address': {'type': 'http://schema.org/PostalAddress',
      'properties': {'addressRegion': 'Hervey Bay',
       'postalCode': '',
       'addressCountry': 'AU'}}}},
   'industry': 'sports jobs',
   'description': 'Tennis Coach wanted for part time work Hervey Bay. Great opportunity. Possibility of a full time position for the right person. Please email fcpt123@gmail.com.'}}]
[x for x in extruct.extract(objs[1].content)['microdata'] if x['type'] == 'http://schema.org/JobPosting']
None
[{'type': 'http://schema.org/JobPosting',
  'properties': {'datePosted': '2014-05-30 02:25:38',
   'employmentType': 'FULL_TIME',
   'title': 'Chef/2 Year Visa/ Outback Experience',
   'hiringOrganization': {'type': 'http://schema.org/Organization',
    'properties': {'name': 'Rachel Weir'}},
   'jobLocation': {'type': 'http://schema.org/Place',
    'properties': {'address': {'type': 'http://schema.org/PostalAddress',
      'properties': {'addressLocality': 'Julia Creek Gulf of Australia',
       'addressRegion': 'Queensland',
       'postalCode': '',
       'addressCountry': 'AU'}}}},
   'industry': 'farm work',
   'description': 'If you are looking for the REAL outback experience then this job may be for you!\nNick Weir Contract Mustering is a small family owned business located at Julia Creek Qld. We employ 8 young, energetic people at a time. Work is seasonal and takes us to various locations in the Gulf of Qld. We are happy, hardworking people and many of our previous employees return for a few years at a time.\nA position for a chef/cook is available. This position enables the camp cook to get out and experience a real outback situation and work in a different environment. Yard work (with cattle) and horseriding may be possible for the right person and if they wish.\nCamp cooking is done in a 40ft. kitchen trailer with a gas stove and running hot water. An extremely high level of cleanliness in the kitchen is expected. Clean habits and a high level of personal hygiene are a necessity. A high level of work ethic and a happy personality is always a bonus. The Chef/Cook will prepare meals for the men 2-4 times/day depending on what they are doing. The kitchen is their responsbility and will include ordering stores and keeping the kitchen clean and tidy. A full list of responsibilties can be emailed to potential candidates.\nThe cook also does the washing for the men as it is easier for one person to do it on a daily basis. This is usually three loads of washing.\nQualified Chefs will be given preference over Cooks, the ability to cook quality home style Qualified Chefs will be given preference over Cooks, the ability to cook quality home style meals for the entire camp is a must.\nPay is $140/day.\nWork is seven days a week until we have scheduled time off. Time off is usually a block of four days where we may go to a campdraft.\nStockcamp conditions apply in most cases. Please be aware that this position involves camping out in a swag. We are on generator power. (not run 24 hours a day). It is a remote situation which may suit only some applicants. Phone and Internet is available only when in town. Transport is advantageous but not essential.\nMany of our previous overseas Chef/Cooks have found this to be a highly rewarding once in a lifetime experience\nWork is usually until November.\nGenuine enquires from candidates who are willing to work hard only please. Please email a resume with references and a photo. Second year Visa applicants welcome. Start end March.\nThanking You!'}}]

Sirius People

Recruiter

objs = list(cdx.iter('www.siriuspeople.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 550 551 552 553 554 555 556 557 558 559
url https://www.siriuspeople.com.au/job/100857432045396/specialist-recruitment-consultant/ https://www.siriuspeople.com.au/job/100857432045483/javascript-and-node-slash-react-developer-melbourne-permanent/ https://www.siriuspeople.com.au/job/100857432045869/mid-level-business-project-manager-1/ https://www.siriuspeople.com.au/job/100857432045926/assistant-brand-manager/ https://www.siriuspeople.com.au/job/100857432046085/java-developers-aus-citizen-adelaide-permanent-$100k-incl-super/ https://www.siriuspeople.com.au/job/100857432046500/angular-and-net-developer/ https://www.siriuspeople.com.au/job/100857432046565/full-stack-developer-angular-java-slash-nodejs-banking-sydney/ https://www.siriuspeople.com.au/job/100857432046571/devops-engineer-5/ https://www.siriuspeople.com.au/job/100857432046838/senior-net-core-developer-aws-slash-azure-perth-wa/ https://www.siriuspeople.com.au/job/100857432046843/executive-assistant-9/ ... https://www.siriuspeople.com.au/job/senior-magento-slash-bigcommerce-developer/ https://www.siriuspeople.com.au/job/senior-payroll-officer/ https://www.siriuspeople.com.au/job/senior-product-manager-1/ https://www.siriuspeople.com.au/job/senior-slash-lead-front-end-developer-melbourne-permanent-2/ https://www.siriuspeople.com.au/job/senior-slash-lead-front-end-developer-melbourne-permanent-3/ https://www.siriuspeople.com.au/job/senior-web-developer/ https://www.siriuspeople.com.au/job/servicenow-developer-slash-technical-consultant-melb-permanent-4/ https://www.siriuspeople.com.au/job/team-leader-gis-development-and-application-development-perm-1/ https://www.siriuspeople.com.au/job/tech-lead-scrum-master/ https://www.siriuspeople.com.au/job/warehouse-coordinator-slash-loan-set-officer/

1 rows × 560 columns

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

All in the JSON LD

Note that there is no country, and the location is ‘Melbourne C B D’.

But the currency is AUD

extruct.extract(objs[0].content)['json-ld']
[{'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'Sirius People',
  'url': 'https://www.siriuspeople.com.au',
  'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMDkvMTQvMDIvNTcvNTMvMTM3L3Npcml1cy1sb2dvLnBuZyJdXQ'},
 {'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': 'Competitive Base + Comms'}},
  'datePosted': '2019-10-01T12:02:04.000+10:00',
  'employmentType': 'Permanent',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Sirius People',
   'sameAs': 'https://www.siriuspeople.com.au',
   'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMDkvMTQvMDIvNTcvNTMvMTM3L3Npcml1cy1sb2dvLnBuZyJdXQ'},
  'industry': 'Sales & Marketing',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Melbourne C B D',
    'addressRegion': 'Melbourne C B D',
    'addressCountry': None,
    'postalCode': None}},
  'salaryCurrency': 'AUD',
  'title': 'Specialist Recruitment Consultant',
  'validThrough': '2019-10-29',
  'description': "<strong>The Opportunity</strong><br>Specialist Recruitment Consultant<br><br><strong>The Sirius-ly Quick Brief</strong><br>Sirius People is a boutique recruitment company that was launched in 2003 and has steadily grown, generating a strong reputation as a trusted supplier and securing a top-tier client base internationally.<br>Our Vision is to become the number one recruitment specialist on the Eastern Seaboard by 2022 and we are looking for superstar sales people whose personal passion and success collectively contribute to reaching this goal!<br><br><strong>The Plan</strong><br>As an\xa0Specialist you will become an industry expert, providing customers with up-to-date market knowledge, offering a personal recruitment process to match their individual needs.<br><br><strong>The Expectations</strong><br><ul><li>Identify and develop business relationships, with a key focus on growing your\xa0existing client base</li><li>Understand client needs to offer a tailored service</li><li>Source suitable candidates for client opportunities</li><li>Work with Delivery team to network and build a candidate database</li><li>Manage and coordinate the interview process between client and candidate</li><li>Showcase market awareness and keep up to date with industry knowledge</li></ul><strong>The Pre-Requisites</strong><br><ul><li>Bachelor's degree in a relevant field - desired but not essential</li><li>2-3 years proven track record of success in a recruitment/sales environment</li><li>A desire for continuous personal and professional development</li><li>A passion to work as a team to achieve something great!</li></ul><strong>The Perks & Benefits</strong><br><ul><li>On-going training and transparent progression structure</li><li>Flexible working arrangements</li><li>Employee Assistance Program</li><li>A competitive salary and bonus structure AND many, many more!</li></ul><strong>The Deal</strong><br>For more information on joining our Tribe then send through your resume to Gemma at\xa0gemma@siriuspeople.com.au. Don't have a resume? Don't worry! Send an email including an interesting fact about yourself so that we can start conversations about you joining THE BEST COMPANY EVER! Chat soon."},
 {'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'Sirius People',
  'url': 'https://www.siriuspeople.com.au/',
  'sameAs': ['https://www.facebook.com/SiriusPeopleRecruitment/',
   'https://twitter.com/sirius_people',
   'https://www.instagram.com/sirius_people/',
   'https://www.linkedin.com/company/sirius-people/']}]

Backpacker Jobs

Job ads aimed at backpackers; freemium so there’s a lot of guff

objs = list(cdx.iter('www.siriuspeople.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 550 551 552 553 554 555 556 557 558 559
url https://www.siriuspeople.com.au/job/100857432045396/specialist-recruitment-consultant/ https://www.siriuspeople.com.au/job/100857432045483/javascript-and-node-slash-react-developer-melbourne-permanent/ https://www.siriuspeople.com.au/job/100857432045869/mid-level-business-project-manager-1/ https://www.siriuspeople.com.au/job/100857432045926/assistant-brand-manager/ https://www.siriuspeople.com.au/job/100857432046085/java-developers-aus-citizen-adelaide-permanent-$100k-incl-super/ https://www.siriuspeople.com.au/job/100857432046500/angular-and-net-developer/ https://www.siriuspeople.com.au/job/100857432046565/full-stack-developer-angular-java-slash-nodejs-banking-sydney/ https://www.siriuspeople.com.au/job/100857432046571/devops-engineer-5/ https://www.siriuspeople.com.au/job/100857432046838/senior-net-core-developer-aws-slash-azure-perth-wa/ https://www.siriuspeople.com.au/job/100857432046843/executive-assistant-9/ ... https://www.siriuspeople.com.au/job/senior-magento-slash-bigcommerce-developer/ https://www.siriuspeople.com.au/job/senior-payroll-officer/ https://www.siriuspeople.com.au/job/senior-product-manager-1/ https://www.siriuspeople.com.au/job/senior-slash-lead-front-end-developer-melbourne-permanent-2/ https://www.siriuspeople.com.au/job/senior-slash-lead-front-end-developer-melbourne-permanent-3/ https://www.siriuspeople.com.au/job/senior-web-developer/ https://www.siriuspeople.com.au/job/servicenow-developer-slash-technical-consultant-melb-permanent-4/ https://www.siriuspeople.com.au/job/team-leader-gis-development-and-application-development-perm-1/ https://www.siriuspeople.com.au/job/tech-lead-scrum-master/ https://www.siriuspeople.com.au/job/warehouse-coordinator-slash-loan-set-officer/

1 rows × 560 columns

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

All in the JSON LD

Note that there is no country, and the location is ‘Melbourne C B D’.

But the currency is AUD

[r for r in extruct.extract(objs[0].content)['json-ld'] if r['@type'] == 'JobPosting']
[{'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'unitText': None,
    'value': 'Competitive Base + Comms'}},
  'datePosted': '2019-10-01T12:02:04.000+10:00',
  'employmentType': 'Permanent',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Sirius People',
   'sameAs': 'https://www.siriuspeople.com.au',
   'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMDkvMTQvMDIvNTcvNTMvMTM3L3Npcml1cy1sb2dvLnBuZyJdXQ'},
  'industry': 'Sales & Marketing',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Melbourne C B D',
    'addressRegion': 'Melbourne C B D',
    'addressCountry': None,
    'postalCode': None}},
  'salaryCurrency': 'AUD',
  'title': 'Specialist Recruitment Consultant',
  'validThrough': '2019-10-29',
  'description': "<strong>The Opportunity</strong><br>Specialist Recruitment Consultant<br><br><strong>The Sirius-ly Quick Brief</strong><br>Sirius People is a boutique recruitment company that was launched in 2003 and has steadily grown, generating a strong reputation as a trusted supplier and securing a top-tier client base internationally.<br>Our Vision is to become the number one recruitment specialist on the Eastern Seaboard by 2022 and we are looking for superstar sales people whose personal passion and success collectively contribute to reaching this goal!<br><br><strong>The Plan</strong><br>As an\xa0Specialist you will become an industry expert, providing customers with up-to-date market knowledge, offering a personal recruitment process to match their individual needs.<br><br><strong>The Expectations</strong><br><ul><li>Identify and develop business relationships, with a key focus on growing your\xa0existing client base</li><li>Understand client needs to offer a tailored service</li><li>Source suitable candidates for client opportunities</li><li>Work with Delivery team to network and build a candidate database</li><li>Manage and coordinate the interview process between client and candidate</li><li>Showcase market awareness and keep up to date with industry knowledge</li></ul><strong>The Pre-Requisites</strong><br><ul><li>Bachelor's degree in a relevant field - desired but not essential</li><li>2-3 years proven track record of success in a recruitment/sales environment</li><li>A desire for continuous personal and professional development</li><li>A passion to work as a team to achieve something great!</li></ul><strong>The Perks & Benefits</strong><br><ul><li>On-going training and transparent progression structure</li><li>Flexible working arrangements</li><li>Employee Assistance Program</li><li>A competitive salary and bonus structure AND many, many more!</li></ul><strong>The Deal</strong><br>For more information on joining our Tribe then send through your resume to Gemma at\xa0gemma@siriuspeople.com.au. Don't have a resume? Don't worry! Send an email including an interesting fact about yourself so that we can start conversations about you joining THE BEST COMPANY EVER! Chat soon."}]

Six Degrees Executive

Recruiter

objs = list(cdx.iter('www.sixdegreesexecutive.com.au/job/*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 548 549 550 551 552 553 554 555 556 557
url https://www.sixdegreesexecutive.com.au/job/account-coordinator-health-and-beauty/ https://www.sixdegreesexecutive.com.au/job/account-director/ https://www.sixdegreesexecutive.com.au/job/account-director-1/ https://www.sixdegreesexecutive.com.au/job/account-executive-11/ https://www.sixdegreesexecutive.com.au/job/account-executive-health-and-wellness-1/ https://www.sixdegreesexecutive.com.au/job/account-executive-toys-jb-hifi/ https://www.sixdegreesexecutive.com.au/job/account-manager-21/ https://www.sixdegreesexecutive.com.au/job/account-manager-26/ https://www.sixdegreesexecutive.com.au/job/account-manager-27/ https://www.sixdegreesexecutive.com.au/job/account-manager-28/ ... https://www.sixdegreesexecutive.com.au/job/warehouse-and-logistics-manager-1/ https://www.sixdegreesexecutive.com.au/job/warehouse-inventory-controller/ https://www.sixdegreesexecutive.com.au/job/warehouse-inventory-controller-1/ https://www.sixdegreesexecutive.com.au/job/warehouse-logistics-manager/ https://www.sixdegreesexecutive.com.au/job/warehouse-manager-4/ https://www.sixdegreesexecutive.com.au/job/warehouse-operations-manager-1/ https://www.sixdegreesexecutive.com.au/job/warehouse-supervisor-1/ https://www.sixdegreesexecutive.com.au/job/warehouse-team-leader-3/ https://www.sixdegreesexecutive.com.au/job/whs-coordinator/ https://www.sixdegreesexecutive.com.au/job/whs-coordinator-1/

1 rows × 558 columns

with open('test.html', 'wb') as f:
    f.write(objs[0].content)
None

All in the JSON LD

extruct.extract(objs[0].content)['json-ld']
[{'@context': 'http://schema.org',
  '@type': 'Organization',
  'name': 'Six Degrees Executive',
  'url': 'https://www.sixdegreesexecutive.com.au',
  'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMDIvMDkvMTcvMDMvMTUvODg4L2xvZ28ucG5nIl1d'},
 {'@context': 'http://schema.org',
  '@type': 'JobPosting',
  'baseSalary': {'@type': 'MonetaryAmount',
   'currency': 'AUD',
   'value': {'@type': 'QuantitativeValue',
    'unitText': 'YEAR',
    'value': 'Negotiable'}},
  'datePosted': '2020-02-27T22:39:16.000+00:00',
  'employmentType': 'Permanent / Full Time',
  'hiringOrganization': {'@type': 'Organization',
   'name': 'Six Degrees Executive',
   'sameAs': 'https://www.sixdegreesexecutive.com.au',
   'logo': 'https://d418bv7mr3wfv.cloudfront.net/s3/W1siZiIsIjIwMTgvMDIvMDkvMTcvMDMvMTUvODg4L2xvZ28ucG5nIl1d'},
  'industry': 'Sales',
  'jobLocation': {'@type': 'Place',
   'address': {'@type': 'PostalAddress',
    'streetAddress': None,
    'addressLocality': 'Melbourne',
    'addressRegion': ' Victoria',
    'addressCountry': 'AU',
    'postalCode': None}},
  'salaryCurrency': 'AUD',
  'title': 'Account Coordinator - Health & Beauty',
  'validThrough': '2020-03-28',
  'description': '<p><strong>About the company:</strong><br /><br />Our client is an Australian FMCG business made up of a portfolio of \'better for you\' brands across plant based health food, petcare, beauty and baby categories. An exciting opportunity exists to join the team as an Account Coordinator for Health and Beauty, assisting the Business Management team with various key customer accounts and strategy planning and execution.<br /><br /><strong>About the role:</strong></p><p>Reporting to the Business Management team, your responsibilities will include:</p><ul><li>Development of category and/or product strategies to drive sales</li><li>Product costing schedules and inventory database management</li><li>Internal engagement with supply chain and finance teams</li><li>Prepare and communicate new products to internal teams </li><li>New business development through network and relationship management</li><li>Proactive analysis of category and competitor trends</li><li>Assist with promotional planning and customer communication</li><li>Provide support with quarterly business reviews, product advertising/marketing, tradeshow and promotional events</li><li>Administrative tasks as required to support the team</li></ul><p><strong>Skills &amp; experience:</strong></p><ul><li>Understanding of FMCG environment</li><li>Account Coordinator, Sales Administration and/or client management experience</li><li>Exposure to CRM data management and reporting platforms</li><li>Skilled in excel, project management and problem solving</li><li>Excellent written and verbal communication skills</li></ul><p>Click APPLY or contact Catherine Bartholomew on 03 8613 3523 for a confidential chat about your career today! If this role doesn\'t sound quite right for you but you are open to hearing about new opportunities, please get in touch or jump on to the website and sign up for our job alerts.</p><img src="https://counter.adcourier.com/Y2F0aGVyaW5lLmJhcnRob2xvbWV3Ljk3MTAwLjcwNjdAc2l4ZGVncmVlc2V4ZWN1dGl2ZWF1LmFwbGl0cmFrLmNvbQ.gif" />'}]

Nestle

The Athena query found mainly search results; they lead to this page.

objs = list(cdx.iter('jobdetails.nestle.com/job*',
                     from_ts='202004', to='202005',
                     filter=['status:200']))
pd.DataFrame(objs)[['url']].T
0 1 2 3 4 5 6 7 8 9 ... 584 585 586 587 588 589 590 591 592 593
url https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%8D%E3%82%B9%E3%83%AC%E6... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%8D%E3%82%B9%E3%83%AC%E6... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%8D%E3%82%B9%E3%83%AC%E6... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%8D%E3%82%B9%E3%83%AC%E6... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%94%E3%83%A5%E3%83%AA%E3... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%94%E3%83%A5%E3%83%AA%E3... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%94%E3%83%A5%E3%83%AA%E3... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-%E3%83%94%E3%83%A5%E3%83%AA%E3... https://jobdetails.nestle.com/job/%E5%85%B5%E5%BA%AB%E7%9C%8C%E7%A5%9E%E6%88%B8%E5%B8%82%E4%B8%AD%E5%A4%AE%E5%8C%BA-EC-Security-Specialist/5909352... https://jobdetails.nestle.com/job/%E9%9D%99%E5%B2%A1%E7%9C%8C%E5%B3%B6%E7%94%B0%E5%B8%82-%E8%A3%BD%E9%80%A0%E3%82%AA%E3%83%9A%E3%83%AC%E3%83%BC%E3... ... https://jobdetails.nestle.com/job/Wien-Digital-Marketing-Specialist-B2B-%28wmd%29/588266001/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/Wirral-Electrical-Automation-Engineer-CH62-4TH/587830801/?feedId=256801&utm_source=NestleCareers%20 https://jobdetails.nestle.com/job/Wirral-Maintenance-Planner-CH62-4TH/590320501/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/Wirral-Mechanical-Project-Engineer-CH62-4TH/587844301/?feedId=256801&utm_source=NestleCareers%20 https://jobdetails.nestle.com/job/York-Engineering-Technician-YN-YO91-1XY/596638401/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/York-Senior-Load-Compliance-Specialist-YN-YO91-1XY/590945801/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/Zula-T%C3%A9cnico-Mantenimiento-El%C3%A9ctrico/590763401/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/Zula-T%C3%A9cnico-Mantenimiento-El%C3%A9ctrico/590763401/?feedId=256801&utm_source=NestleCareers%20 https://jobdetails.nestle.com/job/Zula-T%C3%A9cnico-Mantenimiento-El%C3%A9ctrico/590763501/?feedId=256801&utm_source=NestleCareers https://jobdetails.nestle.com/job/Zula-T%C3%A9cnico-Mantenimiento-El%C3%A9ctrico/590763501/?feedId=256801&utm_source=NestleCareers%20

1 rows × 594 columns

with open('test.html', 'wb') as f:
    f.write(objs[584].content)
None

Many are dead links; need to manually extract

soup = BeautifulSoup(objs[584].content)

Location

soup.select_one('.jobLocation').text
'\nWien, AT\n\t\t\t\t\t\n'

Title

soup.find('span', attrs={'itemprop': 'title'}).text
'Digital Marketing Specialist B2B (w/m/d)\n    '

Job Text (auf Deutsch)

HTML(str(soup.select_one('.jobdescription')))

Nespresso – ein Team, eine Leidenschaft.


Das Nespresso Erfolgsgeheimnis offenbart sich im perfekten Kaffeemoment. Als Mitarbeiter (w/m/d) leisten Sie einen maßgeblichen Beitrag zum Zauber unserer Marke. Wenn Sie unsere Leidenschaft teilen und jenes unvergleichbare Kaffee-Erlebnis mitgestalten wollen, freuen wir uns auf Ihre Bewerbung.
 
Aktuell suchen wir einen Digital Marketing Specialist B2B (w/m/d)

 

Ihre Aufgaben

 

  • Verantwortung für B2B Kampagnen in den Bereichen Search, Social Media und Digital Advertising
  • Strategieentwicklung und Umsetzung zur Lead Generierung und Stärkung des B2B E-Commerce Channels
  • Kontinuierliche Optimierung von Kampagnen-Performance sowie aller relevanten KPIs
  • Verantwortung von eigenen Budgets, Projekten und Kampagnen
  • Adaptierung von internationalen Werbemitteln für lokale Kampagnen
  • Enge Zusammenarbeit mit Agenturen, internen Stakeholdern und dem HQ in der Schweiz
Unsere Anforderungen

 

  • Digital Marketing Native mit abgeschlossener Ausbildung und Berufserfahrung
  • Erfahrung im Bereich SEA, Facebook, LinkedIn, Google Analytics und Media Tools wünschenswert
  • Projekt- und Stakeholder-Managementskills
  • Hohe analytische Fähigkeiten, Ergebnisorientierung und strukturierte Arbeitsweise
  • Eigeninitiative, Kreativität und Kommunikationsstärke
  • Sehr gute MS Office, sowie Deutsch- und Englischkenntnisse in Wort und Schrift
Ihre Perspektiven

 

  • Eigenverantwortung und Gestaltungsspielraum mit starkem Rückhalt eines dynamischen Teams
  • Umfangreiche Einschulung und laufende Fortbildungen
  • Flexibles Arbeiten in einem modernen Arbeitsumfeld
  • Nationale und internationale Karrieremöglichkeiten
  • Zahlreiche attraktive Benefits
  • Ab € 2.800 brutto/Monat, sowie ein leistungsorientierter Bonus
     

 

Ihre Ansprechpartnerin, Frau Angela Hönisch, freut sich auf Ihre Bewerbung!