#!/usr/pkg/bin/ruby32
# $Id: misc_tests.rb 7466 2021-01-15 16:06:39Z flaterco $
# Miscellaneous checks to do after the last import.

require "pg"

db = PG::Connection.open(:dbname => "harmbase2")

# 1. There should be no duplicated names.

print "Checking for duplicate names: "
query = db.exec("select name from data_sets group by name having count(*) > 1")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 2. There should be only these 5 stations without states.
#  station_id |             name              
# ------------+-------------------------------
#  1619000    | Johnston Atoll, Pacific Ocean
#  1619910    | Sand Island, Midway Islands
#  8447436    | Texas Tower, Georges Shoal
#  TPT2739    | Palmyra Island
#  TPT2737    | Howland Island
# (5 rows)
# N.B., Wake Island was assigned to MH in 2017.

Stateless = ["1619000", "1619910", "8447436", "TPT2739", "TPT2737"]
print "Checking for stations missing states: "
query = db.exec("select station_id, name from data_sets where state is null")
if query.ntuples != 5 or !(query.column_values(0) - Stateless).empty?
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 3. Verify that the state mentioned in the name matches the state code.
# Allowed exception:
#  station_id | state |            name            
# ------------+-------+----------------------------
#  1890000    | MH    | Wake Island, Pacific Ocean
# (1 row)

print "Checking for stations with mismatched state names: "
query = db.exec("\
select station_id, state, name from data_sets where
  state = 'AK' and not name similar to '%, Alaska( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'AL' and not name similar to '%, Alabama( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'AS' and not name similar to '%, American Samoa( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'CA' and not name similar to '%, California( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'CT' and not name similar to '%, Connecticut( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'DC' and not name similar to '%, DC( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'DE' and not name similar to '%, Delaware( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'FL' and not name similar to '%, Florida( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'FM' and not name similar to '%, FSM( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'GA' and not name similar to '%, Georgia( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'GU' and not name similar to '%, Guam( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'HI' and not name similar to '%, Hawaii( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'LA' and not name similar to '%, Louisiana( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'MA' and not name similar to '%, Massachusetts( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'MD' and not name similar to '%, Maryland( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'ME' and not name similar to '%, Maine( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'MH' and not name similar to '%, Marshall Islands( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'MP' and not name similar to '%, Marianas( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'MS' and not name similar to '%, Mississippi( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'NC' and not name similar to '%, North Carolina( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'NH' and not name similar to '%, New Hampshire( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'NJ' and not name similar to '%, New Jersey( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'NY' and not name similar to '%, New York( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'OR' and not name similar to '%, Oregon( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'PA' and not name similar to '%, Pennsylvania( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'PR' and not name similar to '%, Puerto Rico( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'PW' and not name similar to '%, Palau( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'RI' and not name similar to '%, Rhode Island( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'SC' and not name similar to '%, South Carolina( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'TX' and not name similar to '%, Texas( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'VA' and not name similar to '%, Virginia( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'VI' and not name similar to '%, Virgin Islands( Current)?( \\((sub|\\d)\\)){0,2}' or\
  state = 'WA' and not name similar to '%, Washington( Current)?( \\((sub|\\d)\\)){0,2}'")
if query.ntuples > 1 or query.ntuples == 1 and query.getvalue(0,0) != "1890000"
  print "FAIL\n"
  query.each_row {|row|
    sid, state, name = row
    print "#{sid} #{state} #{name}\n"
  }
  print "\n"
else
  print "pass\n"
end

# 4. No station should have switched hemispheres from one year to the next.

print "Checking for stations that switched hemispheres: "
query = db.exec("select A.station_id, A.name from data_sets A, data_sets_old B where A.station_id = B.station_id and sign(A.lng)*sign(B.lng) != 1")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 5. GU, MH, MP, FM, and PW are all in the eastern hemisphere; AS is in the
# western hemisphere.

print "Checking for Pacific islands in the wrong hemisphere: "
query = db.exec("select station_id, name from data_sets where state in ('GU', 'MH', 'MP', 'FM', 'PW') and lng < 0 or state = 'AS' and lng > 0")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 6. St. Lawrence Island is in time zone :America/Adak.

print "Checking time zones on St. Lawrence Island: "
query = db.exec("select station_id, name from data_sets where name ~* 'St. Lawrence Island' and timezone != ':America/Adak'")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 7. Port Saint Joe and White City, FL are in time zone :America/New_York.

print "Checking time zones for Port Saint Joe and White City, FL: "
query = db.exec("select station_id, name, timezone from data_sets where station_id in ('8728912', '8728853') and timezone != ':America/New_York'")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 8. Checking for any sub stations depending on those.

print "Checking for any subs referring to Port Saint Joe and White City, FL: "
query = db.exec("select station_id, name, timezone from data_sets where ref_index in (select index from data_sets where station_id in ('8728912', '8728853') and datum is not null)")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# 9. Look for subs depending on wrong LST refs.

print "Checking for any subs referring to refs with LST problems: "
query = db.exec("select station_id, name from data_sets where ref_index in (select index from data_sets where notes like '%for LST%')")
if query.ntuples > 0
  print "FAIL\n"
  puts query.values
else
  print "pass\n"
end

# Setup for 10 and 11.
# Temp table: data_sets_old + data_sets, by sid not sidplus.

db.exec("create temp table geo as select substring(station_id from '^[^_]+') \
as sid, name, lat, lng, state from data_sets union select                    \
substring(station_id from '^[^_]+') as sid, name, lat, lng, state from       \
data_sets_old")

# 10. Look for stations with the same sid or coordinates, but different state.

print "Check for stations that disagree on state: "
query = db.exec("select distinct A.sid, A.state, A.name from geo A, geo B     \
where (A.sid = B.sid or (abs(A.lat - B.lat) < 0.0001 and abs(A.lng - B.lng) < \
0.0001)) and A.state != B.state order by A.sid")
if query.ntuples > 0
  print "FAIL\n"
  query.each_row {|row|
    sid, state, name = row
    print "#{sid} #{state} #{name}\n"
  }
  print "\n"
else
  print "pass\n"
end

# 11. Look for stations with the same sid but different coordinates.

print "Check for stations that disagree on coordinates: "
query = db.exec("select distinct A.sid, A.name, A.lat, A.lng from geo A, geo B where A.sid \
= B.sid and (abs(A.lat - B.lat) >= 0.0001 or abs(A.lng - B.lng) >= 0.0001)   \
order by A.sid")
if query.ntuples > 0
  print "FAIL\n"
  query.each_row {|row|
    sid, name, lat, lng = row
    print "#{sid} #{"%8.4f" % lat} #{"%9.4f" % lng} #{name}\n"
  }
else
  print "pass\n"
end
