Tutorial 7 Answers

Problem Set 7: Questions and Answers

Why is the homicide choropleth misleading? Fix it! (There are many ways to fix it – do anything you think is appropriate.)

Answer:

First, here is my code making the problematic homicide choropleth.

require(tidyverse)

Loading required package: tidyverse

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.7     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

require(sf)

Loading required package: sf
Linking to GEOS 3.9.1, GDAL 3.3.2, PROJ 7.2.1; sf_use_s2() is TRUE

library(knitr)

# load block group map for dc only
bg2010 <- st_read("H:/pppa_data_viz/2019/tutorial_data/lecture05/Census_Block_Groups__2010/Census_Block_Groups__2010.shp")

Reading layer `Census_Block_Groups__2010' from data source 
  `H:\pppa_data_viz\2019\tutorial_data\lecture05\Census_Block_Groups__2010\Census_Block_Groups__2010.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 450 features and 54 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99581
Geodetic CRS:  WGS 84

names(bg2010)

 [1] "OBJECTID"   "TRACT"      "BLKGRP"     "GEOID"      "P0010001"  
 [6] "P0010002"   "P0010003"   "P0010004"   "P0010005"   "P0010006"  
[11] "P0010007"   "P0010008"   "OP000001"   "OP000002"   "OP000003"  
[16] "OP000004"   "P0020002"   "P0020005"   "P0020006"   "P0020007"  
[21] "P0020008"   "P0020009"   "P0020010"   "OP00005"    "OP00006"   
[26] "OP00007"    "OP00008"    "P0030001"   "P0030003"   "P0030004"  
[31] "P0030005"   "P0030006"   "P0030007"   "P0030008"   "OP00009"   
[36] "OP00010"    "OP00011"    "OP00012"    "P0040002"   "P0040005"  
[41] "P0040006"   "P0040007"   "P0040008"   "P0040009"   "P0040010"  
[46] "OP000013"   "OP000014"   "OP000015"   "OP000016"   "H0010001"  
[51] "H0010002"   "H0010003"   "SHAPE_Leng" "SHAPE_Area" "geometry"

# we'll use the crime data again
c2018 <- st_read("H:/pppa_data_viz/2019/tutorial_data/lecture05/Crime_Incidents_in_2018/Crime_Incidents_in_2018.shp")

Reading layer `Crime_Incidents_in_2018' from data source 
  `H:\pppa_data_viz\2019\tutorial_data\lecture05\Crime_Incidents_in_2018\Crime_Incidents_in_2018.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 33645 features and 23 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -77.11232 ymin: 38.81467 xmax: -76.91002 ymax: 38.9937
Geodetic CRS:  WGS 84

# make smaller dataframe
vc2018 <- c2018[which(c2018$OFFENSE == "HOMICIDE" | c2018$OFFENSE == "BURGLARY"),]
names(vc2018)

 [1] "CCN"        "REPORT_DAT" "SHIFT"      "METHOD"     "OFFENSE"   
 [6] "BLOCK"      "XBLOCK"     "YBLOCK"     "WARD"       "ANC"       
[11] "DISTRICT"   "PSA"        "NEIGHBORHO" "BLOCK_GROU" "CENSUS_TRA"
[16] "VOTING_PRE" "LATITUDE"   "LONGITUDE"  "BID"        "START_DATE"
[21] "END_DATE"   "OBJECTID"   "OCTO_RECOR" "geometry"

# find which crimes are in which block groups
# vc2010 actually already has the block group, but i want to be sure you can do this yourself
bg2010.small <- bg2010[,c("TRACT","BLKGRP","P0010001")]
cbg <- st_intersection(vc2018,bg2010.small)

Warning: attribute variables are assumed to be spatially constant throughout all
geometries

head(cbg)

Simple feature collection with 6 features and 26 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -77.0734 ymin: 38.90259 xmax: -77.05759 ymax: 38.91258
Geodetic CRS:  WGS 84
           CCN               REPORT_DAT    SHIFT METHOD  OFFENSE
1826  18064713 2018-04-23T13:04:28.000Z      DAY OTHERS BURGLARY
11452 18134729 2018-08-14T03:10:42.000Z MIDNIGHT OTHERS BURGLARY
26674 18042061 2018-03-15T16:09:21.000Z  EVENING OTHERS BURGLARY
30745 18026424 2018-02-16T14:31:32.000Z      DAY OTHERS BURGLARY
30750 18026435 2018-02-16T15:42:03.000Z  EVENING OTHERS BURGLARY
23472 18136654 2018-08-17T04:38:28.000Z MIDNIGHT OTHERS BURGLARY
                                            BLOCK XBLOCK YBLOCK WARD ANC
1826             3100 - 3199 BLOCK OF K STREET NW 394626 137194    2  2E
11452            3036 - 3099 BLOCK OF M STREET NW 394737 137483    2  2E
26674            3000 - 3099 BLOCK OF N STREET NW 394778 137665    2  2E
30745 2800 - 2899 BLOCK OF PENNSYLVANIA AVENUE NW 395005 137471    2  2E
30750 2800 - 2899 BLOCK OF PENNSYLVANIA AVENUE NW 395005 137471    2  2E
23472      3700 - 3799 BLOCK OF RESERVOIR ROAD NW 393634 138304    2  2E
      DISTRICT PSA NEIGHBORHO BLOCK_GROU CENSUS_TRA VOTING_PRE LATITUDE
1826         2 206  Cluster 4   000100 4     000100 Precinct 5 38.90258
11452        2 206  Cluster 4   000100 4     000100 Precinct 5 38.90519
26674        2 206  Cluster 4   000100 4     000100 Precinct 5 38.90683
30745        2 206  Cluster 4   000100 4     000100 Precinct 5 38.90508
30750        2 206  Cluster 4   000100 4     000100 Precinct 5 38.90508
23472        2 206  Cluster 4   000201 1     000201 Precinct 6 38.91258
      LONGITUDE        BID               START_DATE                 END_DATE
1826  -77.06195 GEORGETOWN 2018-04-23T10:56:35.000Z                     <NA>
11452 -77.06068 GEORGETOWN 2018-08-13T22:52:11.000Z 2018-08-14T00:19:42.000Z
26674 -77.06021       <NA> 2018-03-14T03:48:45.000Z 2018-03-14T03:49:12.000Z
30745 -77.05759 GEORGETOWN 2018-02-16T01:25:28.000Z 2018-02-16T02:30:15.000Z
30750 -77.05759 GEORGETOWN 2018-02-15T17:30:53.000Z 2018-02-16T08:15:53.000Z
23472 -77.07340       <NA> 2018-08-17T03:36:29.000Z 2018-08-17T03:46:04.000Z
       OBJECTID  OCTO_RECOR  TRACT BLKGRP P0010001                   geometry
1826  259183436 18064713-01 000100      4     1000 POINT (-77.06196 38.90259)
11452 259193062 18134729-01 000100      4     1000  POINT (-77.06068 38.9052)
26674 259240847 18042061-01 000100      4     1000 POINT (-77.06021 38.90684)
30745 259245997 18026424-01 000100      4     1000 POINT (-77.05759 38.90509)
30750 259246002 18026435-01 000100      4     1000 POINT (-77.05759 38.90509)
23472 259224604 18136654-01 000201      1     3916  POINT (-77.0734 38.91258)

# check the size
dim(cbg)

[1] 1567   27

# get rid of geometry from points file
st_geometry(cbg) <- NULL

# add up to the block group level
cbg2 <- group_by(cbg, TRACT, BLKGRP, OFFENSE)
cbg3 <- summarize(.data = cbg2, tot_crimes = n())

`summarise()` has grouped output by 'TRACT', 'BLKGRP'. You can override using
the `.groups` argument.

dim(cbg3)

[1] 478   4

# make crime data wide
cbg4 <- pivot_wider(data = cbg3,
                    id_cols = c("TRACT","BLKGRP"),
                    names_from = OFFENSE,
                    values_from = tot_crimes)

# check stuff 
head(cbg4)

# A tibble: 6 × 4
# Groups:   TRACT, BLKGRP [6]
  TRACT  BLKGRP BURGLARY HOMICIDE
  <chr>  <chr>     <int>    <int>
1 000100 2             1       NA
2 000100 3             1       NA
3 000100 4             5       NA
4 000201 1             3       NA
5 000202 1             2       NA
6 000202 2             2       NA

dim(cbg4)

[1] 387   4

# merge each back into the block groups file so we have all block groups with shapes and population
bg2010.c2 <- merge(x = bg2010.small, 
                   y = cbg4, 
                   by = c("TRACT","BLKGRP"), 
                   all = TRUE)
dim(bg2010.c2)

[1] 450   6

# --- make quartiles for homicides
# make NA values zero
bg2010.c2$HOMICIDE <- ifelse(test = is.na(bg2010.c2$HOMICIDE) == TRUE,
                             yes = 0,
                             no = bg2010.c2$HOMICIDE)
# make quartiles
bg2010.c2$hom.quartile2 <- ntile(bg2010.c2$HOMICIDE, 4)
# check
table(bg2010.c2$hom.quartile2)


  1   2   3   4 
113 113 112 112

# make color ramp
# see 
four.colors <- c("#edf8e9","#bae4b3","#74c476","#238b45")

# homicide choropleth
homc <- ggplot() +
  geom_sf(data = bg2010.c2, 
          mapping = aes(fill = as.factor(hom.quartile2)), 
          color = "white") + 
  scale_fill_manual(values = four.colors) + 
  theme(panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA))
homc

There are many ways to improve this chart, but one thing that bothered me quite a bit was that the distribution of homicides overlapped by quartile. Fundamentally, using quartiles here was a poor choice.

Here’s how I show that quartiles are a poor choice:

# make a facet wrapped histogram to show troubles with distribution
barso <- ggplot() + 
  geom_histogram(data = bg2010.c2,
            mapping = aes(x = HOMICIDE)) +
  facet_wrap(~hom.quartile2)
barso

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The first, second and third quartiles are all zeros!

Here’s a slight improvement. I separate out the zero block groups from the non-zero block groups, and then plot the map again. I then follow with a histogram legend.. which shows that maybe I still need to make some fixes, since the first, second, and part of the third quartiles are all block groups with 1 homicide. However, this second one is clearly better than the first.

# ---- now lets make a better version
# first a marker for whether homicides are zero
bg2010.c2$not.zero.hom <- ifelse(test = bg2010.c2$HOMICIDE == 0,
                                 yes = 0, 
                                 no = 1)

# now make a quartile for each of these two groups -- zeros and non-zeros
# just keep in mind that the quartiles for the zeros are garbage, b/c all obs are zero!
bg2010.c2 <- group_by(.data = bg2010.c2, not.zero.hom)
bg2010.c2 <- mutate(.data = bg2010.c2,
                    not.zero.hom.q = ntile(HOMICIDE, n = 4))
table(bg2010.c2$not.zero.hom.q)


  1   2   3   4 
114 112 112 112

table(bg2010.c2$not.zero.hom.q,bg2010.c2$HOMICIDE)

   
     0  1  2  3  4  5  6  7
  1 89 25  0  0  0  0  0  0
  2 88 24  0  0  0  0  0  0
  3 88 13 11  0  0  0  0  0
  4 88  0 10  8  3  1  1  1

# now make a final variable that is zero if there are zero homicides and the quartile number if
# there are non-zero homicides
# this is the not.zero.hom (0/1) * quartile by group
bg2010.c2$hom.zero.quart <- bg2010.c2$not.zero.hom*bg2010.c2$not.zero.hom.q

# now re-do the map with this new variable and a new light blue color for the zeros
c0 <- "#d4ebf2"
homc <- ggplot() +
    geom_sf(data = bg2010.c2, 
            mapping = aes(fill = as.factor(hom.zero.quart)), 
            color = "white") + 
    scale_fill_manual(values = c(c0,four.colors)) + 
    theme(panel.background = element_rect(fill = "transparent",colour = NA),
          plot.background = element_rect(fill = "transparent",colour = NA))
homc

# and a little histogram legend
# find no. of block groups with and without homicides
# no homicides
bg.no.hom <- nrow(filter(.data = bg2010.c2, HOMICIDE == 0))
bg.hom <- nrow(filter(.data = bg2010.c2, HOMICIDE > 0))
barso <- ggplot() + 
  geom_histogram(data = filter(.data = bg2010.c2, HOMICIDE > 0),
                 mapping = aes(x = HOMICIDE, fill = as.factor(hom.zero.quart)),
                 bins = 8) +
  scale_fill_manual(values = c(four.colors)) + 
  scale_x_continuous(breaks = c(seq(1,8,1)), labels = c(seq(1,8,1))) +
  #scale_y_continuous(labels = c(seq(1,8,1), breaks = c(seq(1,8,1))) +
  labs(subtitle = paste0(bg.no.hom," block groups have zero homicides; ", bg.hom, " block groups have > 0"),
       y = "number of block groups",
       x = "number of homicides")
  
barso

Make choropleths for both homicides and burglaries in rates (per residential population). Recall that population is in bg2010 when you load it.

Answer:

# make NA values zero
bg2010.c2$BURGLARY <- ifelse(test = is.na(bg2010.c2$BURGLARY) == TRUE,
                             yes = 0,
                             no = bg2010.c2$BURGLARY)
  
# make choropleths for burglaries and homicides in rates
choro.maker <- function(varin, varname)
{
  # calculate rate of variable
  print("beginning of function")
  print(names(bg2010.c2))
  bg2010.c2$rate <- bg2010.c2[[varin]] / bg2010.c2$P0010001
  print(summary(bg2010.c2$rate))
  
  # first a marker for whether the rate is zero
  print("before marker")
  bg2010.c2$not.zero.hom <- ifelse(test = bg2010.c2$rate == 0,
                                 yes = 0, 
                                 no = 1)

  # now make a quartile for each of these two groups -- zeros and non-zeros
  # just keep in mind that the quartiles for the zeros are garbage, b/c all obs are zero!
  print("before group_by")
  bg2010.c2 <- group_by(.data = bg2010.c2, not.zero.hom)
  bg2010.c2 <- mutate(.data = bg2010.c2,
                    not.zero.hom.q = ntile(rate, n = 4))
  
  # now make a final variable that is zero if there are zero homicides and the quartile number if
  # there are non-zero homicides
  # this is the not.zero.hom (0/1) * quartile by group
  bg2010.c2$hom.zero.quart <- bg2010.c2$not.zero.hom*bg2010.c2$not.zero.hom.q
  
  # now make a choropleth of these guys
  homc <- ggplot() +
    geom_sf(data = bg2010.c2, 
            mapping = aes(fill = as.factor(hom.zero.quart)), 
            color = "white") + 
    scale_fill_manual(values = c(c0,four.colors)) + 
    theme(panel.background = element_rect(fill = "transparent",colour = NA),
          plot.background = element_rect(fill = "transparent",colour = NA)) +
    labs(subtitle = paste0(varname, " rate by block group"))
  print(homc)
  return(homc)  
} # end of function to make choropleth for rates

# run this function
g0 <- choro.maker(varin = "HOMICIDE", varname = "Homicide")

[1] "beginning of function"
 [1] "TRACT"          "BLKGRP"         "P0010001"       "BURGLARY"      
 [5] "HOMICIDE"       "geometry"       "hom.quartile2"  "not.zero.hom"  
 [9] "not.zero.hom.q" "hom.zero.quart"
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000000 0.0000000 0.0000000 0.0003333 0.0000000 0.0303030 
[1] "before marker"
[1] "before group_by"

g0 <- choro.maker(varin = "BURGLARY", varname = "Buglary")

[1] "beginning of function"
 [1] "TRACT"          "BLKGRP"         "P0010001"       "BURGLARY"      
 [5] "HOMICIDE"       "geometry"       "hom.quartile2"  "not.zero.hom"  
 [9] "not.zero.hom.q" "hom.zero.quart"
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000000 0.0008722 0.0018753 0.0025883 0.0033515 0.0303030 
[1] "before marker"
[1] "before group_by"

Pull in other geographic data (the data we used earlier this semester is fine, as is any other of your choice) and make a choropleth map with a histogram legend.

Answer:

Here I am using some data I created for a project I’m working on about condominiums. I first look at the distribution and then decide how to create the categories.

# read the block group data I created elsewhere
fn <- "H:/condos/r_output/la_condo_exploration/condos_by_blockgroup_20230331.rds"
bg.data <- readRDS(fn)

# read the block group map
fn <- "H:/maps/united_states/census2020/block_groups/nhgis0001_shape/US_blck_grp_2020.shp"
us.bg.map <- st_read(fn)

Reading layer `US_blck_grp_2020' from data source 
  `H:\maps\united_states\census2020\block_groups\nhgis0001_shape\US_blck_grp_2020.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 241764 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -7115208 ymin: -1685018 xmax: 3321632 ymax: 4591848
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic

# cut to just la county 
bg.map.la <- us.bg.map[which(us.bg.map$STATE == "06" & us.bg.map$COUNTY == "037"),]

# merge the two of them
bg.both <- merge(x = bg.map.la,
                 y = bg.data,
                 by = c("TRACTCE","BLKGRPCE"),
                 all = TRUE)

# check on the merge
dim(bg.map.la)

[1] 6587   16

dim(bg.data)

[1] 6588   10

dim(bg.both)

[1] 6588   24

# lets get rid of all ratios that are > 1
bg.both[which(bg.both$shr.condo > 1),]

Simple feature collection with 13 features and 23 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -2048787 ymin: -181877.8 xmax: -2018635 ymax: -98299.32
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
First 10 features:
     TRACTCE BLKGRPCE         GISJOIN STATEFP COUNTYFP        GEOID
156   108102        1 G06003701081021      06      037 060371081021
1399  206051        2 G06003702060512      06      037 060372060512
1406  206202        1 G06003702062021      06      037 060372062021
1419  207304        2 G06003702073042      06      037 060372073042
1420  207305        1 G06003702073051      06      037 060372073051
1815  226002        1 G06003702260021      06      037 060372260021
2312  267901        2 G06003702679012      06      037 060372679012
2453  274100        3 G06003702741003      06      037 060372741003
2484  275604        2 G06003702756042      06      037 060372756042
5347  576001        1 G06003705760011      06      037 060375760011
          NAMELSAD MTFCC FUNCSTAT   ALAND AWATER    INTPTLAT     INTPTLON
156  Block Group 1 G5030        S  497623      0 +34.2754493 -118.5593440
1399 Block Group 2 G5030        S  576107   9675 +34.0391786 -118.2354901
1406 Block Group 1 G5030        S  377893      0 +34.0467318 -118.2403201
1419 Block Group 2 G5030        S   25157      0 +34.0470179 -118.2508304
1420 Block Group 1 G5030        S  202772      0 +34.0494280 -118.2496616
1815 Block Group 1 G5030        S 1142387      0 +34.0353003 -118.2520395
2312 Block Group 2 G5030        S  153123      0 +34.0546533 -118.4088266
2453 Block Group 3 G5030        S   44489      0 +33.9844170 -118.4437236
2484 Block Group 2 G5030        S   84955      0 +33.9696618 -118.4263120
5347 Block Group 1 G5030        S  607091 672486 +33.7622888 -118.1859177
     Shape_Leng Shape_Area num.lots num.condos num.res tot.lot.size.res
156   5171.6671  497620.01      488        476     475       2379570.96
1399  4050.6530  585782.78      675        454     433        261298.98
1406  2956.3654  377891.94      523        340     316        243459.58
1419   652.7255   25157.73      418        207     206         95367.79
1420  2521.3854  202773.10     1031        909     362        126115.68
1815  4949.0666 1142388.58     1837        280     155         80961.73
2312  2376.7435  153122.79      977        976     967       1043096.35
2453  1072.9688   44489.06      813        811     805        278760.47
2484  1321.7396   84955.22      429        420     342        415811.15
5347  6201.7905  607089.81     1007        984     758        193186.07
     tot.log.size.condo strc.size.res strc.size.condo shr.condo
156          2379570.96     1480.6824       1480.6824  1.002105
1399          243339.07     1006.5365        937.9434  1.048499
1406          194812.69      737.1648        563.5632  1.075949
1419           60736.85     2750.1927       1142.3394  1.004854
1420          116057.66      435.5010        395.4426  2.511050
1815           43228.74      240.6275        133.2462  1.806452
2312          865286.50     1377.4084       1376.0379  1.009307
2453          278760.47     1515.1685       1515.1685  1.007453
2484          236658.81      903.0350        903.0350  1.228070
5347          193186.07      925.1817        925.1817  1.298153
                           geometry
156  MULTIPOLYGON (((-2041986 -1...
1399 MULTIPOLYGON (((-2018635 -1...
1406 MULTIPOLYGON (((-2019210 -1...
1419 MULTIPOLYGON (((-2020338 -1...
1420 MULTIPOLYGON (((-2019868 -1...
1815 MULTIPOLYGON (((-2020300 -1...
2312 MULTIPOLYGON (((-2034143 -1...
2453 MULTIPOLYGON (((-2039178 -1...
2484 MULTIPOLYGON (((-2038265 -1...
5347 MULTIPOLYGON (((-2021598 -1...

# now drop if > 1
bg.both <- bg.both[which(bg.both$shr.condo <= 1),]

# lets get rid of the islands b/c they mess up the map
dim(bg.both)

[1] 6531   24

bg.both <- bg.both[which(as.numeric(bg.both$INTPTLAT) > 33.5),]
dim(bg.both)

[1] 6525   24

# make a choropleth
choro <- ggplot() +
  geom_sf(data = bg.both,
          mapping = aes(fill = shr.condo),
          color = "white")
choro

But this graph is really awful! Mostly white lines. So here is a fixed version. I focus just on the city of Los Angeles and I make some modifications to the color scheme.

# read in shape file with all incorporated municipalities in LA county
city.map.file <- "H:/maps/los_angeles/city_boundaries/20101220_inc_cities_only.shp"
city.map <- st_read(city.map.file)

Reading layer `20101220_inc_cities_only' from data source 
  `H:\maps\los_angeles\city_boundaries\20101220_inc_cities_only.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 104 features and 20 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 6281837 ymin: 1574188 xmax: 6659162 ymax: 2102434
Projected CRS: NAD83 / California zone 5 (ftUS)

head(city.map)

Simple feature collection with 6 features and 20 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 6390856 ymin: 1922165 xmax: 6586892 ymax: 2102434
Projected CRS: NAD83 / California zone 5 (ftUS)
  OBJECTID AREA    PERIMETER GPLAN_INDE GPLAN_IN_1 CITY    CITYNAME_A
1        1    0 678822.57063          2          2 1418     LANCASTER
2        8    0 679381.41469          9         10 1971      PALMDALE
3       17    0   9495.81762         18         22 2356 SANTA CLARITA
4       18    0     16.42616         19         22 2356 SANTA CLARITA
5       19    0   2673.60151         20         22 2356 SANTA CLARITA
6       23    0  36599.03312         24         25 2316  SAN FERNANDO
  SYMBOL_COL COMP_NO TYPE ADOPTED SYMBOL NAME   ACRES    CITY_COMM_ ZCIP_PHASE
1         13       0 <NA>    <NA>     13 <NA> 60489.5     LANCASTER       <NA>
2         67       0 <NA>    <NA>     67 <NA> 67951.7      PALMDALE       <NA>
3         25       0 <NA>    <NA>     25 <NA>    54.0 SANTA CLARITA       <NA>
4         25       0 <NA>    <NA>     25 <NA>     0.0 SANTA CLARITA       <NA>
5         25       0 <NA>    <NA>     25 <NA>     6.1 SANTA CLARITA       <NA>
6         25       0 <NA>    <NA>     25 <NA>  1518.2  SAN FERNANDO       <NA>
  SQ_MILES        JURISDICTI   Shape_area    Shape_len
1   94.515 INCORPORATED CITY 2.634923e+09 678822.57355
2  106.174 INCORPORATED CITY 2.959975e+09 679381.41837
3    0.084 INCORPORATED CITY 2.352046e+06   9495.81836
4    0.000 INCORPORATED CITY 1.187475e+01     16.42601
5    0.009 INCORPORATED CITY 2.636935e+05   2673.60099
6    2.372 INCORPORATED CITY 6.613310e+07  36599.03420
                        geometry
1 POLYGON ((6500775 2066684, ...
2 POLYGON ((6577419 2062852, ...
3 POLYGON ((6393158 1953813, ...
4 POLYGON ((6394042 1953301, ...
5 POLYGON ((6393931 1952652, ...
6 POLYGON ((6430051 1932497, ...

# make just la city map
la <- city.map[which(city.map$CITYNAME_A == "LOS ANGELES"),]

# chop the block group maps to the bounding box of the city of los angeles
# but first give block group map same projection as la city map
bg.both <- st_transform(bg.both, crs = st_crs(la))
la.bgs.only <- st_crop(x = bg.both, y = st_bbox(la))

Warning: attribute variables are assumed to be spatially constant throughout all
geometries

# now re-do choropleth with just city of la
choro <- ggplot() +
  geom_sf(data = la.bgs.only,
          mapping = aes(fill = shr.condo),
          color = "white",
          size = 0.2) +
  geom_sf(data = la,
          fill = "transparent",
          color = "purple") +
  scale_fill_continuous(high = "#08519c", low = "#eff3ff")
choro

Make a dot density map with the data of your choice and at least two groups.

Answer:

Here I am showing a dot density map with one group – condos in LA. I don’t have an obvious second group in these data, but this should be enough to figure out how to do two groups if you wanted to.

# we'll use the la cit block group map from above
# lets plot condos in red and plot one dot for each condo
la.bgs.only$ten.condos <- la.bgs.only$num.condos / 10
# if less than 0.1, assign zero
la.bgs.only$ten.condos <- ifelse(test = (la.bgs.only$ten.condos < 1 & la.bgs.only$ten.condos > 0),
                                   yes = 1,
                                   no = la.bgs.only$ten.condos)
la.bgs.only$ten.condos.int <- as.integer(la.bgs.only$ten.condos)
print("this is the thing to make dots from")

[1] "this is the thing to make dots from"

print(head(la.bgs.only[,c("ten.condos.int")]))

Simple feature collection with 6 features and 1 field
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 6470024 ymin: 1914053 xmax: 6477987 ymax: 1924022
Projected CRS: NAD83 / California zone 5 (ftUS)
  ten.condos.int                       geometry
1              3 POLYGON ((6474760 1918148, ...
2              1 POLYGON ((6473643 1915931, ...
3              0 POLYGON ((6471941 1918304, ...
4              0 POLYGON ((6473844 1921026, ...
5              0 POLYGON ((6477829 1917032, ...
6              2 POLYGON ((6475701 1915626, ...

# get the points to plot
hdots <- st_sample(x = la.bgs.only, size = la.bgs.only$ten.condos.int, 
                   type = "random", progress = TRUE)
hdots <- st_cast(x = hdots, to = "POINT")
hdots.mat <- st_coordinates(hdots)
hdats.df <- as.data.frame(hdots.mat)
hdats.df <- setNames(object = hdats.df, nm = c("lon","lat"))

# plot these dots with an la map county map behind it
# make the plot
dotso <- ggplot() +
        geom_sf(data = la,
                fill = "transparent",
                color = "purple") +
        geom_point(data = hdats.df,
                   mapping = aes(x=lon, y = lat),
                   shape = ".")
dotso