Clean up some header files.
diff --git a/asm_avr.inc b/asm_avr.inc
index 2729a3c..c99bf82 100644
--- a/asm_avr.inc
+++ b/asm_avr.inc
@@ -1,3 +1,8 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_ASM_AVR_H_
+#define _UECC_ASM_AVR_H_
+
 #if __AVR_HAVE_EIJMP_EICALL__
     #define IJMP "eijmp \n\t"
 #else
@@ -951,3 +956,5 @@
 }
 #define asm_square 1
 #endif /* uECC_SQUARE_FUNC && !asm_square */
+
+#endif /* _UECC_ASM_AVR_H_ */
diff --git a/asm_avr_mult_square.inc b/asm_avr_mult_square.inc
index 5c416d6..5581bb4 100644
--- a/asm_avr_mult_square.inc
+++ b/asm_avr_mult_square.inc
@@ -1,3 +1,8 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_ASM_AVR_MULT_SQUARE_H_
+#define _UECC_ASM_AVR_MULT_SQUARE_H_
+
 #define FAST_MULT_ASM_5    \
     "adiw r30, 10 \n\t"    \
     "adiw r28, 10 \n\t"    \
@@ -21177,3 +21182,5 @@
     "st z+, r23 \n\t"                  \
     "st z+, r28 \n\t"                  \
     "eor r1, r1 \n\t"
+
+#endif /* _UECC_ASM_AVR_MULT_SQUARE_H_ */
diff --git a/uECC.h b/uECC.h
index 41c98e4..5775bdb 100644
--- a/uECC.h
+++ b/uECC.h
@@ -28,6 +28,9 @@
     #define uECC_OPTIMIZATION_LEVEL 2
 #endif
 
+/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
+used for (scalar) squaring instead of the generic multiplication function. This can make things
+faster somewhat faster, but increases the code size. */
 #ifndef uECC_SQUARE_FUNC
     #define uECC_SQUARE_FUNC 0
 #endif