Merge pull request #1 from rodluger/dev

Merged dev branch
rodluger · Jul 16, 2016 · 3718aef · 3718aef
2 parents bdfb72e + e24bece
commit 3718aef
Show file tree

Hide file tree

Showing 47 changed files with 177,286 additions and 325 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,5 @@
 include README.md
 include everest/tables/*.csv
 include everest/tables/*.tsv
+include everest/tables/*.info
 include everest/pbs/*.pbs
diff --git a/bin/everest b/bin/everest
@@ -13,9 +13,10 @@ import everest
 
 if __name__ == '__main__':
   parser = argparse.ArgumentParser(prog = 'everest', add_help = True)
-  parser.add_argument("epic", type = int, help = 'The EPIC target number (required)')
+  parser.add_argument("epic", type = int, help = 'The EPIC target number or the campaign number (required)')
   parser.add_argument("-a", "--aperture", action = 'store_true', help = 'Plot the aperture used for the photometry')
   parser.add_argument("-c", "--ccd", action = 'store_true', help = 'Plot the location of the target on the CCD')
+  parser.add_argument("-k", "--sky", action = 'store_true', help = 'Plot the location of the target on the sky')
   parser.add_argument("-i", "--interactive", action = 'store_true', help = 'Plot in interactive mode')
   parser.add_argument("-n", "--contamination", action = 'store_true', help = 'Plot the contamination analysis')
   parser.add_argument("-p", "--plot", action = 'store_true', help = 'Plot the everest de-trended light curve (default)')
@@ -28,10 +29,15 @@ if __name__ == '__main__':
   args = parser.parse_args()
 
   # Plot is default
-  if not any([getattr(args, a) for a in ['aperture', 'ccd', 'contamination', 'autocorrelation', 
+  if not any([getattr(args, a) for a in ['aperture', 'ccd', 'sky', 'contamination', 'autocorrelation', 
                                          'postagestamp', 'crossvalidation', 'k2sff', 'k2sc', 'k2varcat']]):
     args.plot = True
 
+  # Is this a campaign number?
+  if args.epic < 20:
+    everest.usertools.Detector(args.epic)
+    quit()
+
   # Get the data
   star = everest.Everest(args.epic)
 
@@ -40,6 +46,8 @@ if __name__ == '__main__':
     star.aperture()
   if args.ccd:
     star.ccd()
+  if args.sky:
+    star.sky()
   if args.contamination:
     star.contamination()
   if args.crossvalidation:

diff --git a/docs/211069540_everest.jpeg b/docs/211069540_everest.jpeg
diff --git a/docs/detector.rst b/docs/detector.rst
@@ -0,0 +1,14 @@
+.. automodule:: everest.usertools.detector
+   :members:
+
+.. raw:: html
+
+  <script>
+    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+    })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+    ga('create', 'UA-47070068-2', 'auto');
+    ga('send', 'pageview');
+  </script>
diff --git a/docs/everest_example.jpeg b/docs/everest_example.jpeg
diff --git a/docs/everest_example.jpg b/docs/everest_example.jpg
diff --git a/docs/issues.rst b/docs/issues.rst
@@ -2,8 +2,8 @@ Known Issues
 ============
 
 As we discuss in the paper, **EVEREST** has certain limitations, particularly when
-it comes to saturated stars and stars in crowded apertures. Below we outline these
-limitations with some examples.
+it comes to saturated stars, stars in crowded apertures, and very variable stars. 
+Below we outline these limitations with some examples.
 
 .. contents::
    :local:
@@ -66,6 +66,24 @@ greater than 2 or 3. Other pipelines are likely to perform better for these targ
           Note that saturated and crowded stars were **not** included \
           when computing the overall performance of **EVEREST** relative to \
           other pipelines (such as in `Figures 10-15 <precision.html>`_ in our paper).
+
+RR Lyrae and very variable stars
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. figure:: 211069540_everest.jpeg
+ :width: 600px
+ :align: center
+ :height: 100px
+ :figclass: align-center
+
+The :py:mod:`everest` pipeline is also likely to fail for very short period
+variable stars, such as RR Lyrae stars. When the stellar variability signal is
+stronger and at a higher frequency than the instrumental signal,
+nearly all the de-trending power comes from the GP, and the resulting CDPP is rather 
+insensitive to the value of the PLD coefficients, leading to poor de-trending. Imperfect
+optimization of the GP can also lead to damping of the stellar variability signal,
+which is evident in the light curve shown above. Consider using the 
+`K2VARCAT catalog <https://archive.stsci.edu/prepds/k2varcat/>`_ for these stars.
 
 Ultrashort-period EBs
 ~~~~~~~~~~~~~~~~~~~~~
@@ -81,7 +99,7 @@ eclipsing binaries. If the eclipses take up a significant fraction of the orbit,
 not much continuum flux to train the model on. It's also likely that the eclipses 
 (particularly the secondaries) may not be properly identified as outliers, in which case
 the GP optimization step will favor a kernel that captures the short timescale, high amplitude
-variability introduced by these eclipses. When this happens, all the de-trending power
+variability introduced by these eclipses. As in the variable star case, all the de-trending power
 comes from the GP, and the resulting CDPP is insensitive to the value of the PLD coefficients,
 which as a result end up taking on effectively random values. This results in light curves
 like the one above, where the eclipses get washed out and the white noise gets inflated

diff --git a/docs/modules.rst b/docs/modules.rst
@@ -30,6 +30,7 @@ code can be found on `github <https://github.com/rodluger/everest>`_.
    :caption: everest.usertools
 
    ccd
+   detector
    selector
    ui
 

diff --git a/docs/paper.rst b/docs/paper.rst
@@ -2,7 +2,7 @@ The Paper
 =========
 
 Below we provide links to Python scripts to generate all the figures in the
-`paper <https://github.com/rodluger/everest/blob/master/paper/tex/k2.pdf>`_.
+`paper <http://arxiv.org/abs/1607.00524>`_.
 Most allow the user to generate the figures from scratch, but some load in `.npz`
 files with pre-downloaded and pre-de-trended data. It shouldn't be *too* hard to
 generate that data yourself, though...

diff --git a/docs/quick_access.rst b/docs/quick_access.rst
@@ -26,10 +26,11 @@ for EPIC 205071984:
 The :py:mod:`everest` command accepts several options, which we list below.
 
 ====================  =================================================================================
-:py:obj:`epic`        The **EPIC** target number (`required`)
+:py:obj:`id`          The **EPIC** target number *or* the *K2* campaign number (`required`)
 :py:obj:`-a`          Plot the aperture used for the photometry
 :py:obj:`-c`          Plot the location of the target on the CCD
 :py:obj:`-i`          Plot in interactive mode
+:py:obj:`-k`          Plot all stars on the *K2* field of view (`interactive`)
 :py:obj:`-n`          Plot the contamination analysis
 :py:obj:`-p`          Plot the **EVEREST** de-trended light curve (`default`)
 :py:obj:`-r`          Plot the autocorrelation/GP fitting result

diff --git a/docs/running_everest.rst b/docs/running_everest.rst
@@ -151,6 +151,28 @@ specify the mask. The transit duration can be adjusted with the slider
 above the ``OK`` and ``Cancel`` buttons. Click ``OK`` and then ``Detrend``
 to see the results.
 
+The K2 Field of View
+~~~~~~~~~~~~~~~~~~~~
+
+The `K2` filed of view for a given campaign can be plotted by calling
+
+.. code-block:: python
+
+  from everest.usertools import Detector
+  Detector(2)
+
+.. figure:: running_everest8.jpeg
+ :width: 400px
+ :align: center
+ :height: 100px
+ :figclass: align-center
+
+Stars are colored according to their `Kepler` band magnitudes. A slider at the
+left allows the user to make cuts based on the contamination metric; note that
+stars near the edges of the detector tend to have high contamination metrics
+because of how the PRF becomes elongated far away from the center of the CCD.
+Double-clicking a star will download and plot its :py:mod:`everest` light curve.
+
 Additional Stuff
 ~~~~~~~~~~~~~~~~
 

diff --git a/docs/running_everest8.jpeg b/docs/running_everest8.jpeg
diff --git a/everest/compute.py b/everest/compute.py
@@ -30,7 +30,7 @@ def Compute(EPIC, run_name = 'default', clobber = False, apnum = 15,
             ps_iter = 30, ps_masks = 10, npc_arr = np.arange(25, 260, 10),
             inject = {}, log_level = logging.DEBUG, scatter_alpha = 0.,
             screen_level = logging.CRITICAL, gp_iter = 2, 
-            jpeg_quality = 30, fig_ext = 'jpg', **kwargs):
+            jpeg_quality = 30, fig_ext = 'jpg', use_k2sff_aperture = True, **kwargs):
   '''
   This is the main :py:mod:`everest` routine. Here we download and de-trend
   a given `EPIC` target.
@@ -70,6 +70,8 @@ def Compute(EPIC, run_name = 'default', clobber = False, apnum = 15,
   :param int jpeg_quality: If plotting JPEGs, this number (1-95) sets the image quality. Default `30`
   :param str fig_ext: The figure extension. Default `jpg`. I found that this doesn't play nice with the \
                       `MacOSX` :py:mod:`matplotlib` backend, so consider switching this to `png` or switching backends
+  :param use_k2sff_aperture: Use apertures determined by the K2SFF team? Default `True`
+  :type clobber: bool
   
   :returns: A very large dictionary:
   
@@ -97,7 +99,7 @@ def Compute(EPIC, run_name = 'default', clobber = False, apnum = 15,
   '''
 
   # Grab the data
-  k2star = GetK2Data(EPIC, apnum = apnum)
+  k2star = GetK2Data(EPIC, apnum = apnum, use_k2sff_aperture = use_k2sff_aperture)
 
   # Get the campaign
   campaign = Campaign(EPIC)

diff --git a/everest/data.py b/everest/data.py
@@ -19,6 +19,7 @@
 import numpy as np
 import re
 import os
+import sys
 import six
 from six.moves import urllib
 from tempfile import NamedTemporaryFile
@@ -194,7 +195,7 @@ class k2data(object):
   pass
 
 def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
-              calculate_contamination = True):
+              calculate_contamination = True, use_k2sff_aperture = True):
   '''
   Download and save a single quarter of `K2` data.
   
@@ -211,6 +212,9 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
   :param clobber: Overwrite existing `.npz` file? Default `False`
   :type clobber: bool
   
+  :param use_k2sff_aperture: Use apertures determined by the K2SFF team? Default `True`
+  :type clobber: bool
+  
   :returns: 
     A :class:`k2data` object containing the following attributes:
   
@@ -274,24 +278,51 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
   if clobber:
     if not os.path.exists(os.path.join(KPLR_ROOT, 'data', 'everest', str(EPIC))):
       os.makedirs(os.path.join(KPLR_ROOT, 'data', 'everest', str(EPIC)))
-
-    # Grab the K2SFF info, mainly to get the apertures
+
+    # Get the TPF
+    client = kplr.API()
     try:
-      k2sff = kplr.K2SFF(EPIC)
-      apertures = k2sff.apertures
+      star = client.k2_star(EPIC)
     except:
-      # If we can't get the K2SFF files, we can't run Everest (for now)
+      log.error("Oops... The target doesn't seem to be available on MAST!")
       return None
-
-    # Get the TPF
-    client = kplr.API()
-    star = client.k2_star(EPIC)
+
     tpf = star.get_target_pixel_files()[0]
     campaign = tpf.sci_campaign
     with tpf.open() as f:
       aperture = f[2].data
       qdata = f[1].data
 
+    try:
+      # Grab the K2SFF info, mainly to get the apertures
+      if not use_k2sff_aperture:
+        raise Exception('')
+      k2sff = kplr.K2SFF(EPIC)
+      apertures = k2sff.apertures
+    except:
+      # We will use the TPF optimal aperture (not ideal, since
+      # it's smaller). We hack it into the ``apertures`` list.
+      k2sff = None
+      apertures = [[] for i in range(20)]
+      apnew = (aperture & 2) // 2 
+
+      # HACK: Make the aperture bigger by including nearest neighbors,
+      # but only if there are fewer than 75 pixels.
+      apnew_copy = np.array(apnew)
+      for i in range(apnew.shape[0]):
+        for j in range(apnew.shape[1]):
+          if aperture[i][j] == 1:
+            for n in [(i - 1, j), (i + 1, j), (i, j - 1), (i, j + 1)]:
+              if n[0] >= 0 and n[0] < apnew.shape[0]:
+                if n[1] >= 0 and n[1] < apnew.shape[1]:
+                  if apnew[n[0]][n[1]] == 1:
+                    apnew[i][j] = 1
+      # Revert to original if it's too big!
+      if np.sum(apnew) > 75:
+        apnew = apnew_copy
+      for i in range(20):
+        apertures[i] = apnew  
+
     # Get the arrays
     time = np.array(qdata.field('TIME'), dtype='float64')
     raw_time = np.array(time)
@@ -314,6 +345,11 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
     flux = np.sum(np.array([f[apidx] for f in fpix], dtype='float64'), axis = 1)
     f_nan_inds = list(np.where(np.isnan(flux))[0])
 
+    # Make sure we have an aperture!
+    if len(apidx[0]) == 0:
+      log.error('Oops... The chosen aperture has zero size!')
+      return None
+
     # Get flagged data points. Note that like K2SFF, we do not throw out all data
     # points flagged with bit #15, but treat them separately. See "LDE Flags" in 
     # http://keplerscience.arc.nasa.gov/k2-data-release-notes.html#k2-campaign-2
@@ -339,13 +375,13 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
     # by a few cadences -- they happen later than they should. No idea why
     # this is happening, but I get *much* better folded eclipses when I remove
     # the first part of C0. Here I simply remove whatever SFF removes.
-    if time[0] < 1940.:
+    if time[0] < 1940. and k2sff is not None:
       bad_inds = np.append(bad_inds, np.where(time < k2sff.time[0]))
 
     # Campaign 2 hack. The first 1-2 days in C2 have very different noise
     # properties than the rest of the campaign, so we'll again trust the SFF
     # cuts.
-    if time[0] < 2061 and time[0] > 2059:
+    if time[0] < 2061 and time[0] > 2059 and k2sff is not None:
       bad_inds = np.append(bad_inds, np.where(time < k2sff.time[0]))
 
     # Remove them
@@ -374,13 +410,6 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
     fitsheader = [pyfits.getheader(ftpf, 0).cards,
                   pyfits.getheader(ftpf, 1).cards,
                   pyfits.getheader(ftpf, 2).cards]
-
-    # Save
-    np.savez_compressed(filename, time = time, fpix = fpix, perr = perr, cadn = cadn,
-                        aperture = aperture, nearby = _nearby, campaign = campaign,
-                        apertures = apertures, fitsheader = fitsheader,
-                        contamination = contamination, raw_time = raw_time,
-                        raw_cadn = raw_cadn)
 
     # Atomically write to disk.
     # http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
@@ -398,7 +427,8 @@ def GetK2Data(EPIC, apnum = 15, delete_kplr_data = True, clobber = False,
     # Delete the kplr tpf
     if delete_kplr_data:
       os.remove(ftpf)
-      os.remove(k2sff._file)
+      if k2sff is not None:
+        os.remove(k2sff._file)
 
   # Get any K2 planets associated with this EPIC
   planets = []
@@ -844,4 +874,29 @@ def GetK2EBs(clobber = False):
       i = np.argmax(np.array([eb.epic for eb in EBs], dtype = int) == int(epic))
       EBs[i] = EB
 
-  return EBs
+  return EBs
+
+def ClearErrors(campaign, run_name = 'default', delete_data = False):
+  '''
+  Delete all output directories that contain ``.err`` files. If `delete_data`
+  is `True`, also deletes the input data. This will force :py:mod:`everest` to
+  re-run them.
+  
+  '''
+
+  folder = os.path.join(EVEREST_DAT, 'output', 'C%02d' % campaign)
+  count = 0
+  if os.path.exists(folder):
+    stars = os.listdir(folder)
+    for i, EPIC in enumerate(stars):
+      sys.stdout.write('\rProcessing star %d/%d...' % (i + 1, len(stars)))
+      sys.stdout.flush()
+      if os.path.exists(os.path.join(folder, EPIC, run_name, '%s.err' % EPIC)):
+        count += 1
+        shutil.rmtree(os.path.join(folder, EPIC, run_name))
+        if delete_data:
+          if os.path.exists(os.path.join(KPLR_ROOT, 'data', 'everest', str(EPIC))):
+            shutil.rmtree(os.path.join(KPLR_ROOT, 'data', 'everest', str(EPIC)))
+    print("")
+  print("Deleted %d targets." % count)
+
diff --git a/everest/detrend.py b/everest/detrend.py
@@ -282,7 +282,7 @@ def Outliers(time, flux, fpix, ferr, mask = [], sigma = 5):
   ferr = np.delete(ferr, mask)
 
   # Set up a generic GP
-  amp = np.median([np.std(y) for y in Chunks(flux, int(2. / np.median(time[1:] - time [:-1])))])
+  amp = np.nanmedian([np.nanstd(y) for y in Chunks(flux, int(2. / np.nanmedian(time[1:] - time [:-1])))])
   gp = george.GP(amp ** 2 * george.kernels.Matern32Kernel(2. ** 2))
 
   # Compute the basis vectors for 1st order PLD w/ 5 chunks
@@ -291,8 +291,8 @@ def Outliers(time, flux, fpix, ferr, mask = [], sigma = 5):
   X, _ = PLDBasis(fpix, time = time, pld_order = 1, max_components = 50, breakpoints = brkpts)
 
   # First we (tentatively) clip outliers from the raw flux.
-  med = np.median(flux)
-  MAD = 1.4826 * np.median(np.abs(flux - med))
+  med = np.nanmedian(flux)
+  MAD = 1.4826 * np.nanmedian(np.abs(flux - med))
   i = np.where((flux > med + sigma * MAD) | (flux < med - sigma * MAD))[0]
   log.info('Iteration #00: %d outliers.' % len(i))
 
@@ -316,8 +316,8 @@ def Outliers(time, flux, fpix, ferr, mask = [], sigma = 5):
     fdet = flux - M - mu
 
     # Clip!
-    med = np.median(fdet)
-    MAD = 1.4826 * np.median(np.abs(fdet - med))
+    med = np.nanmedian(fdet)
+    MAD = 1.4826 * np.nanmedian(np.abs(fdet - med))
     i = np.where((fdet > med + sigma * MAD) | (fdet < med - sigma * MAD))[0]
 
     # Log
-Original file line number
+Diff line change
@@ Expand Up @@
        :caption: everest.usertools
        ccd
+       detector
        selector
        ui
@@ Expand Down @@