2026-03-19 09:55:26 +00:00
|
|
|
|
SELECT
|
|
|
|
|
|
sd.skills,
|
|
|
|
|
|
ROUND(MEDIAN(jpf.salary_year_avg)) as median_salary,
|
|
|
|
|
|
COUNT(jpf.*) as demand_skills,
|
|
|
|
|
|
COUNT(jpf.salary_year_avg) as corrected_demand_count
|
|
|
|
|
|
FROM job_postings_fact as jpf
|
|
|
|
|
|
INNER JOIN skills_job_dim as sjd
|
|
|
|
|
|
ON jpf.job_id = sjd.job_id
|
|
|
|
|
|
INNER JOIN skills_dim as sd
|
|
|
|
|
|
ON sjd.skill_id = sd.skill_id
|
|
|
|
|
|
WHERE
|
|
|
|
|
|
jpf.job_title_short LIKE 'Data Engineer'
|
|
|
|
|
|
AND
|
2026-03-19 10:14:50 +00:00
|
|
|
|
jpf.job_work_from_home = False
|
2026-03-19 09:55:26 +00:00
|
|
|
|
|
|
|
|
|
|
GROUP BY sd.skills
|
|
|
|
|
|
HAVING COUNT(jpf.*) > 100
|
|
|
|
|
|
ORDER BY
|
|
|
|
|
|
median_salary DESC
|
|
|
|
|
|
LIMIT 25
|
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
|
|
|
|
┌────────────┬───────────────┬───────────────┬────────────────────────┐
|
|
|
|
|
|
│ skills │ median_salary │ demand_skills │ corrected_demand_count │
|
|
|
|
|
|
│ varchar │ double │ int64 │ int64 │
|
|
|
|
|
|
├────────────┼───────────────┼───────────────┼────────────────────────┤
|
|
|
|
|
|
│ rust │ 210000.0 │ 232 │ 23 │
|
|
|
|
|
|
│ golang │ 184000.0 │ 912 │ 39 │
|
|
|
|
|
|
│ terraform │ 184000.0 │ 3248 │ 193 │
|
|
|
|
|
|
│ spring │ 175500.0 │ 364 │ 33 │
|
|
|
|
|
|
│ neo4j │ 170000.0 │ 277 │ 11 │
|
|
|
|
|
|
│ gdpr │ 169616.0 │ 582 │ 22 │
|
|
|
|
|
|
│ zoom │ 168438.0 │ 127 │ 12 │
|
|
|
|
|
|
│ graphql │ 167500.0 │ 445 │ 28 │
|
|
|
|
|
|
│ mongo │ 162250.0 │ 265 │ 14 │
|
|
|
|
|
|
│ fastapi │ 157500.0 │ 204 │ 3 │
|
|
|
|
|
|
│ bitbucket │ 155000.0 │ 478 │ 9 │
|
|
|
|
|
|
│ django │ 155000.0 │ 265 │ 5 │
|
|
|
|
|
|
│ crystal │ 154224.0 │ 129 │ 3 │
|
|
|
|
|
|
│ c │ 151500.0 │ 444 │ 23 │
|
|
|
|
|
|
│ atlassian │ 151500.0 │ 249 │ 9 │
|
|
|
|
|
|
│ typescript │ 151000.0 │ 388 │ 39 │
|
|
|
|
|
|
│ kubernetes │ 150500.0 │ 4202 │ 147 │
|
|
|
|
|
|
│ node │ 150000.0 │ 179 │ 22 │
|
|
|
|
|
|
│ ruby │ 150000.0 │ 736 │ 48 │
|
|
|
|
|
|
│ css │ 150000.0 │ 262 │ 13 │
|
|
|
|
|
|
│ airflow │ 150000.0 │ 9996 │ 386 │
|
|
|
|
|
|
│ redis │ 149000.0 │ 605 │ 17 │
|
|
|
|
|
|
│ vmware │ 148798.0 │ 136 │ 2 │
|
|
|
|
|
|
│ ansible │ 148798.0 │ 475 │ 14 │
|
|
|
|
|
|
│ jupyter │ 147500.0 │ 400 │ 15 │
|
|
|
|
|
|
├────────────┴───────────────┴───────────────┴────────────────────────┤
|
|
|
|
|
|
│ 25 rows 4 columns │
|
|
|
|
|
|
└─────────────────────────────────────────────────────────────────────┘
|
|
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SELECT
|
|
|
|
|
|
sd.skills,
|
|
|
|
|
|
ROUND(MEDIAN(jpf.salary_year_avg)) as median_salary,
|
|
|
|
|
|
COUNT(jpf.*) as demand_count,
|
|
|
|
|
|
ROUND(LN(COUNT(jpf.*)),2) as demand_count,
|
|
|
|
|
|
ROUND((LN(COUNT(jpf.*)) * MEDIAN(jpf.salary_year_avg))/1_000_000,2) as optimal_score
|
|
|
|
|
|
FROM job_postings_fact as jpf
|
|
|
|
|
|
INNER JOIN skills_job_dim as sjd
|
|
|
|
|
|
ON jpf.job_id = sjd.job_id
|
|
|
|
|
|
INNER JOIN skills_dim as sd
|
|
|
|
|
|
ON sjd.skill_id = sd.skill_id
|
|
|
|
|
|
WHERE
|
|
|
|
|
|
jpf.job_title_short LIKE 'Data Engineer'
|
|
|
|
|
|
AND
|
|
|
|
|
|
jpf.job_work_from_home = True
|
|
|
|
|
|
AND
|
|
|
|
|
|
jpf.salary_year_avg IS NOT NULL
|
|
|
|
|
|
GROUP BY sd.skills
|
|
|
|
|
|
HAVING COUNT(jpf.*) > 100
|
|
|
|
|
|
ORDER BY
|
|
|
|
|
|
optimal_score DESC
|
|
|
|
|
|
LIMIT 25
|
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
┌────────────┬───────────────┬──────────────┬──────────────┬───────────────┐
|
|
|
|
|
|
│ skills │ median_salary │ demand_count │ demand_count │ optimal_score │
|
|
|
|
|
|
│ varchar │ double │ int64 │ double │ double │
|
|
|
|
|
|
├────────────┼───────────────┼──────────────┼──────────────┼───────────────┤
|
|
|
|
|
|
│ terraform │ 184000.0 │ 193 │ 5.26 │ 0.97 │
|
|
|
|
|
|
│ python │ 135000.0 │ 1133 │ 7.03 │ 0.95 │
|
|
|
|
|
|
│ aws │ 137320.0 │ 783 │ 6.66 │ 0.91 │
|
|
|
|
|
|
│ sql │ 130000.0 │ 1128 │ 7.03 │ 0.91 │
|
|
|
|
|
|
│ airflow │ 150000.0 │ 386 │ 5.96 │ 0.89 │
|
|
|
|
|
|
│ spark │ 140000.0 │ 503 │ 6.22 │ 0.87 │
|
|
|
|
|
|
│ kafka │ 145000.0 │ 292 │ 5.68 │ 0.82 │
|
|
|
|
|
|
│ snowflake │ 135500.0 │ 438 │ 6.08 │ 0.82 │
|
|
|
|
|
|
│ azure │ 128000.0 │ 475 │ 6.16 │ 0.79 │
|
|
|
|
|
|
│ java │ 135000.0 │ 303 │ 5.71 │ 0.77 │
|
|
|
|
|
|
│ scala │ 137290.0 │ 247 │ 5.51 │ 0.76 │
|
|
|
|
|
|
│ git │ 140000.0 │ 208 │ 5.34 │ 0.75 │
|
|
|
|
|
|
│ kubernetes │ 150500.0 │ 147 │ 4.99 │ 0.75 │
|
|
|
|
|
|
│ databricks │ 132750.0 │ 266 │ 5.58 │ 0.74 │
|
|
|
|
|
|
│ redshift │ 130000.0 │ 274 │ 5.61 │ 0.73 │
|
|
|
|
|
|
│ gcp │ 136000.0 │ 196 │ 5.28 │ 0.72 │
|
|
|
|
|
|
│ nosql │ 134415.0 │ 193 │ 5.26 │ 0.71 │
|
|
|
|
|
|
│ hadoop │ 135000.0 │ 198 │ 5.29 │ 0.71 │
|
|
|
|
|
|
│ pyspark │ 140000.0 │ 152 │ 5.02 │ 0.7 │
|
|
|
|
|
|
│ mongodb │ 135750.0 │ 136 │ 4.91 │ 0.67 │
|
|
|
|
|
|
│ docker │ 135000.0 │ 144 │ 4.97 │ 0.67 │
|
|
|
|
|
|
│ r │ 134775.0 │ 133 │ 4.89 │ 0.66 │
|
|
|
|
|
|
│ go │ 140000.0 │ 113 │ 4.73 │ 0.66 │
|
|
|
|
|
|
│ github │ 135000.0 │ 127 │ 4.84 │ 0.65 │
|
|
|
|
|
|
│ bigquery │ 135000.0 │ 123 │ 4.81 │ 0.65 │
|
|
|
|
|
|
├────────────┴───────────────┴──────────────┴──────────────┴───────────────┤
|
|
|
|
|
|
│ 25 rows 5 columns │
|
|
|
|
|
|
└──────────────────────────────────────────────────────────────────────────┘
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Summary
|
|
|
|
|
|
This analysis examines the optimal skills for remote Data Engineer roles by combining salary
|
|
|
|
|
|
and demand into a single composite score — calculated as the log of job postings multiplied by
|
|
|
|
|
|
median salary. This approach rewards skills that are both well-compensated and widely requested,
|
|
|
|
|
|
avoiding the trap of chasing either high pay in niche roles or high volume in lower-paying ones.
|
|
|
|
|
|
The dataset covers 25 skills, each appearing in at least 100 remote job postings with a reported
|
|
|
|
|
|
salary, giving the findings strong statistical grounding.
|
|
|
|
|
|
The results reveal a clear tiering: Terraform, Python, AWS, and SQL occupy the top cluster with
|
|
|
|
|
|
optimal scores between 0.91–0.97, driven by strong salary floors ($130K–$184K) and massive demand.
|
|
|
|
|
|
Below them sits a rich mid-tier — Airflow, Spark, Kafka, Snowflake — where slightly lower demand
|
|
|
|
|
|
is offset by above-average salaries, particularly in streaming and orchestration. The bottom cluster
|
|
|
|
|
|
(Docker, Go, GitHub, BigQuery) still commands solid $135K+ medians but trails on demand volume,
|
|
|
|
|
|
making them valuable secondary skills rather than primary targets for career positioning.
|
|
|
|
|
|
|
|
|
|
|
|
Key Findings
|
|
|
|
|
|
|
|
|
|
|
|
Terraform is the highest-value single skill with the top optimal score (0.97)
|
|
|
|
|
|
and the highest median salary of any skill in the dataset at $184,000 — nearly
|
|
|
|
|
|
$50K above the group average. Despite relatively modest demand (193 postings),
|
|
|
|
|
|
its salary premium is so pronounced that it outscores even Python and SQL.
|
|
|
|
|
|
Infrastructure-as-code expertise is rare, commands a significant wage premium,
|
|
|
|
|
|
and is directly suited to remote work since all provisioning is done via CLI and
|
|
|
|
|
|
APIs.
|
|
|
|
|
|
Python and SQL are the volume anchors of the market, each appearing in over 1,100
|
|
|
|
|
|
remote postings — more than double any other skill — and both scoring 0.91–0.95.
|
|
|
|
|
|
Their median salaries ($135K and $130K respectively) are solid but not exceptional;
|
2026-03-19 10:31:51 +00:00
|
|
|
|
their dominance comes from ubiquity. For anyone enteri_ng the field, these two skills
|
2026-03-19 09:55:26 +00:00
|
|
|
|
represent the lowest-risk, highest-return investment — nearly every role expects them.
|
|
|
|
|
|
|
|
|
|
|
|
AWS leads the cloud platforms, outscoring Azure and GCP by a notable margin (0.91 vs.
|
|
|
|
|
|
0.79 and 0.72). All three cloud providers sit in the top half of the table, but AWS
|
|
|
|
|
|
uniquely combines strong demand (783 postings) with the highest cloud median salary
|
|
|
|
|
|
($137,320). This reflects AWS's continued dominance in enterprise data infrastructure
|
|
|
|
|
|
and its tight integration with modern data stacks. Azure and GCP remain important but
|
|
|
|
|
|
are stronger as complementary skills.
|
|
|
|
|
|
|
|
|
|
|
|
Streaming and orchestration tools (Kafka, Airflow, Spark) offer a high salary-to-demand
|
|
|
|
|
|
ratio, clustering between $140K–$150K median salaries with moderate but healthy demand
|
|
|
|
|
|
(292–503 postings each). These are the skills most likely to differentiate a mid-career
|
|
|
|
|
|
engineer, signalling the ability to manage real-time pipelines and complex DAG-based
|
|
|
|
|
|
workflows autonomously — exactly the profile remote-first teams are hiring for.
|
|
|
|
|
|
|
|
|
|
|
|
Infrastructure and containerisation skills (Kubernetes, Git, Docker) punch above
|
|
|
|
|
|
their demand weight on salary — Kubernetes in particular has the second-highest median
|
|
|
|
|
|
salary in the dataset at $150,500, despite appearing in only 147 postings. This
|
|
|
|
|
|
niche-but-lucrative pattern suggests that DevOps-adjacent data engineers who can
|
|
|
|
|
|
manage containerised workloads command a meaningful premium, even if the absolute
|
|
|
|
|
|
number of such roles is smaller. These are strong specialisation targets for engineers
|
|
|
|
|
|
already solid in Python/SQL/cloud.
|
|
|
|
|
|
|
|
|
|
|
|
*/
|
2026-03-19 10:31:51 +00:00
|
|
|
|
™
|